gcc/config/arm/arm.c

   1 /* Output routines for GCC for ARM.
   2    Copyright (C) 1991-2015 Free Software Foundation, Inc.
   3    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   4    and Martin Simmons (@harleqn.co.uk).
   5    More major hacks by Richard Earnshaw (rearnsha@arm.com).
   6
   7    This file is part of GCC.
   8
   9    GCC is free software; you can redistribute it and/or modify it
  10    under the terms of the GNU General Public License as published
  11    by the Free Software Foundation; either version 3, or (at your
  12    option) any later version.
  13
  14    GCC is distributed in the hope that it will be useful, but WITHOUT
  15    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  16    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
  17    License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with GCC; see the file COPYING3.  If not see
  21    <http://www.gnu.org/licenses/>.  */
  22
  23 #include "config.h"
  24 #include "system.h"
  25 #include "coretypes.h"
  26 #include "hash-table.h"
  27 #include "tm.h"
  28 #include "rtl.h"
  29 #include "tree.h"
  30 #include "stringpool.h"
  31 #include "stor-layout.h"
  32 #include "calls.h"
  33 #include "varasm.h"
  34 #include "obstack.h"
  35 #include "regs.h"
  36 #include "hard-reg-set.h"
  37 #include "insn-config.h"
  38 #include "conditions.h"
  39 #include "output.h"
  40 #include "insn-attr.h"
  41 #include "flags.h"
  42 #include "reload.h"
  43 #include "hashtab.h"
  44 #include "hash-set.h"
  45 #include "vec.h"
  46 #include "machmode.h"
  47 #include "input.h"
  48 #include "function.h"
  49 #include "expr.h"
  50 #include "insn-codes.h"
  51 #include "optabs.h"
  52 #include "diagnostic-core.h"
  53 #include "recog.h"
  54 #include "predict.h"
  55 #include "dominance.h"
  56 #include "cfg.h"
  57 #include "cfgrtl.h"
  58 #include "cfganal.h"
  59 #include "lcm.h"
  60 #include "cfgbuild.h"
  61 #include "cfgcleanup.h"
  62 #include "basic-block.h"
  63 #include "hash-map.h"
  64 #include "is-a.h"
  65 #include "plugin-api.h"
  66 #include "ipa-ref.h"
  67 #include "cgraph.h"
  68 #include "ggc.h"
  69 #include "except.h"
  70 #include "tm_p.h"
  71 #include "target.h"
  72 #include "sched-int.h"
  73 #include "target-def.h"
  74 #include "debug.h"
  75 #include "langhooks.h"
  76 #include "df.h"
  77 #include "intl.h"
  78 #include "libfuncs.h"
  79 #include "params.h"
  80 #include "opts.h"
  81 #include "dumpfile.h"
  82 #include "gimple-expr.h"
  83 #include "builtins.h"
  84 #include "tm-constrs.h"
  85 #include "rtl-iter.h"
  86
  87 /* Forward definitions of types.  */
  88 typedef struct minipool_node    Mnode;
  89 typedef struct minipool_fixup   Mfix;
  90
  91 void (*arm_lang_output_object_attributes_hook)(void);
  92
  93 struct four_ints
  94 {
  95   int i[4];
  96 };
  97
  98 /* Forward function declarations.  */
  99 static bool arm_const_not_ok_for_debug_p (rtx);
 100 static bool arm_lra_p (void);
 101 static bool arm_needs_doubleword_align (machine_mode, const_tree);
 102 static int arm_compute_static_chain_stack_bytes (void);
 103 static arm_stack_offsets *arm_get_frame_offsets (void);
 104 static void arm_add_gc_roots (void);
 105 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
 106                              HOST_WIDE_INT, rtx, rtx, int, int);
 107 static unsigned bit_count (unsigned long);
 108 static int arm_address_register_rtx_p (rtx, int);
 109 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
 110 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
 111 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
 112 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
 113 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
 114 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
 115 inline static int thumb1_index_register_rtx_p (rtx, int);
 116 static int thumb_far_jump_used_p (void);
 117 static bool thumb_force_lr_save (void);
 118 static unsigned arm_size_return_regs (void);
 119 static bool arm_assemble_integer (rtx, unsigned int, int);
 120 static void arm_print_operand (FILE *, rtx, int);
 121 static void arm_print_operand_address (FILE *, rtx);
 122 static bool arm_print_operand_punct_valid_p (unsigned char code);
 123 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
 124 static arm_cc get_arm_condition_code (rtx);
 125 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
 126 static const char *output_multi_immediate (rtx *, const char *, const char *,
 127                                            int, HOST_WIDE_INT);
 128 static const char *shift_op (rtx, HOST_WIDE_INT *);
 129 static struct machine_function *arm_init_machine_status (void);
 130 static void thumb_exit (FILE *, int);
 131 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
 132 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 133 static Mnode *add_minipool_forward_ref (Mfix *);
 134 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
 135 static Mnode *add_minipool_backward_ref (Mfix *);
 136 static void assign_minipool_offsets (Mfix *);
 137 static void arm_print_value (FILE *, rtx);
 138 static void dump_minipool (rtx_insn *);
 139 static int arm_barrier_cost (rtx);
 140 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
 141 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
 142 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
 143                                machine_mode, rtx);
 144 static void arm_reorg (void);
 145 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
 146 static unsigned long arm_compute_save_reg0_reg12_mask (void);
 147 static unsigned long arm_compute_save_reg_mask (void);
 148 static unsigned long arm_isr_value (tree);
 149 static unsigned long arm_compute_func_type (void);
 150 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 151 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
 152 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 153 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 154 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 155 #endif
 156 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 157 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 158 static int arm_comp_type_attributes (const_tree, const_tree);
 159 static void arm_set_default_type_attributes (tree);
 160 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
 161 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
 162 static int optimal_immediate_sequence (enum rtx_code code,
 163                                        unsigned HOST_WIDE_INT val,
 164                                        struct four_ints *return_sequence);
 165 static int optimal_immediate_sequence_1 (enum rtx_code code,
 166                                          unsigned HOST_WIDE_INT val,
 167                                          struct four_ints *return_sequence,
 168                                          int i);
 169 static int arm_get_strip_length (int);
 170 static bool arm_function_ok_for_sibcall (tree, tree);
 171 static machine_mode arm_promote_function_mode (const_tree,
 172                                                     machine_mode, int *,
 173                                                     const_tree, int);
 174 static bool arm_return_in_memory (const_tree, const_tree);
 175 static rtx arm_function_value (const_tree, const_tree, bool);
 176 static rtx arm_libcall_value_1 (machine_mode);
 177 static rtx arm_libcall_value (machine_mode, const_rtx);
 178 static bool arm_function_value_regno_p (const unsigned int);
 179 static void arm_internal_label (FILE *, const char *, unsigned long);
 180 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 181                                  tree);
 182 static bool arm_have_conditional_execution (void);
 183 static bool arm_cannot_force_const_mem (machine_mode, rtx);
 184 static bool arm_legitimate_constant_p (machine_mode, rtx);
 185 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
 186 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
 187 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 188 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 189 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 190 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
 191 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
 192 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
 193 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
 194 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
 195 static void emit_constant_insn (rtx cond, rtx pattern);
 196 static rtx_insn *emit_set_insn (rtx, rtx);
 197 static rtx emit_multi_reg_push (unsigned long, unsigned long);
 198 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
 199                                   tree, bool);
 200 static rtx arm_function_arg (cumulative_args_t, machine_mode,
 201                              const_tree, bool);
 202 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
 203                                       const_tree, bool);
 204 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
 205 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
 206                                       const_tree);
 207 static rtx aapcs_libcall_value (machine_mode);
 208 static int aapcs_select_return_coproc (const_tree, const_tree);
 209
 210 #ifdef OBJECT_FORMAT_ELF
 211 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
 212 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
 213 #endif
 214 #ifndef ARM_PE
 215 static void arm_encode_section_info (tree, rtx, int);
 216 #endif
 217
 218 static void arm_file_end (void);
 219 static void arm_file_start (void);
 220
 221 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
 222                                         tree, int *, int);
 223 static bool arm_pass_by_reference (cumulative_args_t,
 224                                    machine_mode, const_tree, bool);
 225 static bool arm_promote_prototypes (const_tree);
 226 static bool arm_default_short_enums (void);
 227 static bool arm_align_anon_bitfield (void);
 228 static bool arm_return_in_msb (const_tree);
 229 static bool arm_must_pass_in_stack (machine_mode, const_tree);
 230 static bool arm_return_in_memory (const_tree, const_tree);
 231 #if ARM_UNWIND_INFO
 232 static void arm_unwind_emit (FILE *, rtx_insn *);
 233 static bool arm_output_ttype (rtx);
 234 static void arm_asm_emit_except_personality (rtx);
 235 static void arm_asm_init_sections (void);
 236 #endif
 237 static rtx arm_dwarf_register_span (rtx);
 238
 239 static tree arm_cxx_guard_type (void);
 240 static bool arm_cxx_guard_mask_bit (void);
 241 static tree arm_get_cookie_size (tree);
 242 static bool arm_cookie_has_size (void);
 243 static bool arm_cxx_cdtor_returns_this (void);
 244 static bool arm_cxx_key_method_may_be_inline (void);
 245 static void arm_cxx_determine_class_data_visibility (tree);
 246 static bool arm_cxx_class_data_always_comdat (void);
 247 static bool arm_cxx_use_aeabi_atexit (void);
 248 static void arm_init_libfuncs (void);
 249 static tree arm_build_builtin_va_list (void);
 250 static void arm_expand_builtin_va_start (tree, rtx);
 251 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
 252 static void arm_option_override (void);
 253 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
 254 static bool arm_cannot_copy_insn_p (rtx_insn *);
 255 static int arm_issue_rate (void);
 256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
 257 static bool arm_output_addr_const_extra (FILE *, rtx);
 258 static bool arm_allocate_stack_slots_for_args (void);
 259 static bool arm_warn_func_return (tree);
 260 static const char *arm_invalid_parameter_type (const_tree t);
 261 static const char *arm_invalid_return_type (const_tree t);
 262 static tree arm_promoted_type (const_tree t);
 263 static tree arm_convert_to_type (tree type, tree expr);
 264 static bool arm_scalar_mode_supported_p (machine_mode);
 265 static bool arm_frame_pointer_required (void);
 266 static bool arm_can_eliminate (const int, const int);
 267 static void arm_asm_trampoline_template (FILE *);
 268 static void arm_trampoline_init (rtx, tree, rtx);
 269 static rtx arm_trampoline_adjust_address (rtx);
 270 static rtx arm_pic_static_addr (rtx orig, rtx reg);
 271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 272 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 273 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
 274 static bool arm_array_mode_supported_p (machine_mode,
 275                                         unsigned HOST_WIDE_INT);
 276 static machine_mode arm_preferred_simd_mode (machine_mode);
 277 static bool arm_class_likely_spilled_p (reg_class_t);
 278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
 279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 281                                                      const_tree type,
 282                                                      int misalignment,
 283                                                      bool is_packed);
 284 static void arm_conditional_register_usage (void);
 285 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
 286 static unsigned int arm_autovectorize_vector_sizes (void);
 287 static int arm_default_branch_cost (bool, bool);
 288 static int arm_cortex_a5_branch_cost (bool, bool);
 289 static int arm_cortex_m_branch_cost (bool, bool);
 290
 291 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
 292                                              const unsigned char *sel);
 293
 294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 295                                            tree vectype,
 296                                            int misalign ATTRIBUTE_UNUSED);
 297 static unsigned arm_add_stmt_cost (void *data, int count,
 298                                    enum vect_cost_for_stmt kind,
 299                                    struct _stmt_vec_info *stmt_info,
 300                                    int misalign,
 301                                    enum vect_cost_model_location where);
 302
 303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 304                                          bool op0_preserve_value);
 305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
 306
 307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
 308 \f
 309 /* Table of machine attributes.  */
 310 static const struct attribute_spec arm_attribute_table[] =
 311 {
 312   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
 313        affects_type_identity } */
 314   /* Function calls made to this symbol must be done indirectly, because
 315      it may lie outside of the 26 bit addressing range of a normal function
 316      call.  */
 317   { "long_call",    0, 0, false, true,  true,  NULL, false },
 318   /* Whereas these functions are always known to reside within the 26 bit
 319      addressing range.  */
 320   { "short_call",   0, 0, false, true,  true,  NULL, false },
 321   /* Specify the procedure call conventions for a function.  */
 322   { "pcs",          1, 1, false, true,  true,  arm_handle_pcs_attribute,
 323     false },
 324   /* Interrupt Service Routines have special prologue and epilogue requirements.  */
 325   { "isr",          0, 1, false, false, false, arm_handle_isr_attribute,
 326     false },
 327   { "interrupt",    0, 1, false, false, false, arm_handle_isr_attribute,
 328     false },
 329   { "naked",        0, 0, true,  false, false, arm_handle_fndecl_attribute,
 330     false },
 331 #ifdef ARM_PE
 332   /* ARM/PE has three new attributes:
 333      interfacearm - ?
 334      dllexport - for exporting a function/variable that will live in a dll
 335      dllimport - for importing a function/variable from a dll
 336
 337      Microsoft allows multiple declspecs in one __declspec, separating
 338      them with spaces.  We do NOT support this.  Instead, use __declspec
 339      multiple times.
 340   */
 341   { "dllimport",    0, 0, true,  false, false, NULL, false },
 342   { "dllexport",    0, 0, true,  false, false, NULL, false },
 343   { "interfacearm", 0, 0, true,  false, false, arm_handle_fndecl_attribute,
 344     false },
 345 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
 346   { "dllimport",    0, 0, false, false, false, handle_dll_attribute, false },
 347   { "dllexport",    0, 0, false, false, false, handle_dll_attribute, false },
 348   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
 349     false },
 350 #endif
 351   { NULL,           0, 0, false, false, false, NULL, false }
 352 };
 353 \f
 354 /* Initialize the GCC target structure.  */
 355 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 356 #undef  TARGET_MERGE_DECL_ATTRIBUTES
 357 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
 358 #endif
 359
 360 #undef TARGET_LEGITIMIZE_ADDRESS
 361 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
 362
 363 #undef TARGET_LRA_P
 364 #define TARGET_LRA_P arm_lra_p
 365
 366 #undef  TARGET_ATTRIBUTE_TABLE
 367 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
 368
 369 #undef TARGET_ASM_FILE_START
 370 #define TARGET_ASM_FILE_START arm_file_start
 371 #undef TARGET_ASM_FILE_END
 372 #define TARGET_ASM_FILE_END arm_file_end
 373
 374 #undef  TARGET_ASM_ALIGNED_SI_OP
 375 #define TARGET_ASM_ALIGNED_SI_OP NULL
 376 #undef  TARGET_ASM_INTEGER
 377 #define TARGET_ASM_INTEGER arm_assemble_integer
 378
 379 #undef TARGET_PRINT_OPERAND
 380 #define TARGET_PRINT_OPERAND arm_print_operand
 381 #undef TARGET_PRINT_OPERAND_ADDRESS
 382 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
 383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
 385
 386 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 387 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
 388
 389 #undef  TARGET_ASM_FUNCTION_PROLOGUE
 390 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
 391
 392 #undef  TARGET_ASM_FUNCTION_EPILOGUE
 393 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
 394
 395 #undef  TARGET_OPTION_OVERRIDE
 396 #define TARGET_OPTION_OVERRIDE arm_option_override
 397
 398 #undef  TARGET_COMP_TYPE_ATTRIBUTES
 399 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
 400
 401 #undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
 402 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
 403
 404 #undef  TARGET_SCHED_ADJUST_COST
 405 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
 406
 407 #undef TARGET_SCHED_REORDER
 408 #define TARGET_SCHED_REORDER arm_sched_reorder
 409
 410 #undef TARGET_REGISTER_MOVE_COST
 411 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
 412
 413 #undef TARGET_MEMORY_MOVE_COST
 414 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
 415
 416 #undef TARGET_ENCODE_SECTION_INFO
 417 #ifdef ARM_PE
 418 #define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
 419 #else
 420 #define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
 421 #endif
 422
 423 #undef  TARGET_STRIP_NAME_ENCODING
 424 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
 425
 426 #undef  TARGET_ASM_INTERNAL_LABEL
 427 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
 428
 429 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
 430 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
 431
 432 #undef  TARGET_FUNCTION_VALUE
 433 #define TARGET_FUNCTION_VALUE arm_function_value
 434
 435 #undef  TARGET_LIBCALL_VALUE
 436 #define TARGET_LIBCALL_VALUE arm_libcall_value
 437
 438 #undef TARGET_FUNCTION_VALUE_REGNO_P
 439 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
 440
 441 #undef  TARGET_ASM_OUTPUT_MI_THUNK
 442 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
 443 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
 444 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 445
 446 #undef  TARGET_RTX_COSTS
 447 #define TARGET_RTX_COSTS arm_rtx_costs
 448 #undef  TARGET_ADDRESS_COST
 449 #define TARGET_ADDRESS_COST arm_address_cost
 450
 451 #undef TARGET_SHIFT_TRUNCATION_MASK
 452 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 454 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
 455 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 456 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 457 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 458 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 459 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
 460 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
 461   arm_autovectorize_vector_sizes
 462
 463 #undef  TARGET_MACHINE_DEPENDENT_REORG
 464 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
 465
 466 #undef  TARGET_INIT_BUILTINS
 467 #define TARGET_INIT_BUILTINS  arm_init_builtins
 468 #undef  TARGET_EXPAND_BUILTIN
 469 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
 470 #undef  TARGET_BUILTIN_DECL
 471 #define TARGET_BUILTIN_DECL arm_builtin_decl
 472
 473 #undef TARGET_INIT_LIBFUNCS
 474 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
 475
 476 #undef TARGET_PROMOTE_FUNCTION_MODE
 477 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
 478 #undef TARGET_PROMOTE_PROTOTYPES
 479 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
 480 #undef TARGET_PASS_BY_REFERENCE
 481 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
 482 #undef TARGET_ARG_PARTIAL_BYTES
 483 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
 484 #undef TARGET_FUNCTION_ARG
 485 #define TARGET_FUNCTION_ARG arm_function_arg
 486 #undef TARGET_FUNCTION_ARG_ADVANCE
 487 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
 488 #undef TARGET_FUNCTION_ARG_BOUNDARY
 489 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
 490
 491 #undef  TARGET_SETUP_INCOMING_VARARGS
 492 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
 493
 494 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
 495 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
 496
 497 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
 498 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
 499 #undef TARGET_TRAMPOLINE_INIT
 500 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
 501 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
 502 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
 503
 504 #undef TARGET_WARN_FUNC_RETURN
 505 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
 506
 507 #undef TARGET_DEFAULT_SHORT_ENUMS
 508 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
 509
 510 #undef TARGET_ALIGN_ANON_BITFIELD
 511 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
 512
 513 #undef TARGET_NARROW_VOLATILE_BITFIELD
 514 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
 515
 516 #undef TARGET_CXX_GUARD_TYPE
 517 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
 518
 519 #undef TARGET_CXX_GUARD_MASK_BIT
 520 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
 521
 522 #undef TARGET_CXX_GET_COOKIE_SIZE
 523 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
 524
 525 #undef TARGET_CXX_COOKIE_HAS_SIZE
 526 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
 527
 528 #undef TARGET_CXX_CDTOR_RETURNS_THIS
 529 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
 530
 531 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
 532 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
 533
 534 #undef TARGET_CXX_USE_AEABI_ATEXIT
 535 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
 536
 537 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
 538 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
 539   arm_cxx_determine_class_data_visibility
 540
 541 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
 542 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
 543
 544 #undef TARGET_RETURN_IN_MSB
 545 #define TARGET_RETURN_IN_MSB arm_return_in_msb
 546
 547 #undef TARGET_RETURN_IN_MEMORY
 548 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
 549
 550 #undef TARGET_MUST_PASS_IN_STACK
 551 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
 552
 553 #if ARM_UNWIND_INFO
 554 #undef TARGET_ASM_UNWIND_EMIT
 555 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
 556
 557 /* EABI unwinding tables use a different format for the typeinfo tables.  */
 558 #undef TARGET_ASM_TTYPE
 559 #define TARGET_ASM_TTYPE arm_output_ttype
 560
 561 #undef TARGET_ARM_EABI_UNWINDER
 562 #define TARGET_ARM_EABI_UNWINDER true
 563
 564 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 565 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 566
 567 #undef TARGET_ASM_INIT_SECTIONS
 568 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 569 #endif /* ARM_UNWIND_INFO */
 570
 571 #undef TARGET_DWARF_REGISTER_SPAN
 572 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
 573
 574 #undef  TARGET_CANNOT_COPY_INSN_P
 575 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
 576
 577 #ifdef HAVE_AS_TLS
 578 #undef TARGET_HAVE_TLS
 579 #define TARGET_HAVE_TLS true
 580 #endif
 581
 582 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
 583 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
 584
 585 #undef TARGET_LEGITIMATE_CONSTANT_P
 586 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
 587
 588 #undef TARGET_CANNOT_FORCE_CONST_MEM
 589 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
 590
 591 #undef TARGET_MAX_ANCHOR_OFFSET
 592 #define TARGET_MAX_ANCHOR_OFFSET 4095
 593
 594 /* The minimum is set such that the total size of the block
 595    for a particular anchor is -4088 + 1 + 4095 bytes, which is
 596    divisible by eight, ensuring natural spacing of anchors.  */
 597 #undef TARGET_MIN_ANCHOR_OFFSET
 598 #define TARGET_MIN_ANCHOR_OFFSET -4088
 599
 600 #undef TARGET_SCHED_ISSUE_RATE
 601 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
 602
 603 #undef TARGET_MANGLE_TYPE
 604 #define TARGET_MANGLE_TYPE arm_mangle_type
 605
 606 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 607 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
 608
 609 #undef TARGET_BUILD_BUILTIN_VA_LIST
 610 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
 611 #undef TARGET_EXPAND_BUILTIN_VA_START
 612 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
 613 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 614 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
 615
 616 #ifdef HAVE_AS_TLS
 617 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
 618 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
 619 #endif
 620
 621 #undef TARGET_LEGITIMATE_ADDRESS_P
 622 #define TARGET_LEGITIMATE_ADDRESS_P     arm_legitimate_address_p
 623
 624 #undef TARGET_PREFERRED_RELOAD_CLASS
 625 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
 626
 627 #undef TARGET_INVALID_PARAMETER_TYPE
 628 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
 629
 630 #undef TARGET_INVALID_RETURN_TYPE
 631 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
 632
 633 #undef TARGET_PROMOTED_TYPE
 634 #define TARGET_PROMOTED_TYPE arm_promoted_type
 635
 636 #undef TARGET_CONVERT_TO_TYPE
 637 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
 638
 639 #undef TARGET_SCALAR_MODE_SUPPORTED_P
 640 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
 641
 642 #undef TARGET_FRAME_POINTER_REQUIRED
 643 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
 644
 645 #undef TARGET_CAN_ELIMINATE
 646 #define TARGET_CAN_ELIMINATE arm_can_eliminate
 647
 648 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 649 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
 650
 651 #undef TARGET_CLASS_LIKELY_SPILLED_P
 652 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
 653
 654 #undef TARGET_VECTORIZE_BUILTINS
 655 #define TARGET_VECTORIZE_BUILTINS
 656
 657 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
 658 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
 659   arm_builtin_vectorized_function
 660
 661 #undef TARGET_VECTOR_ALIGNMENT
 662 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 663
 664 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
 665 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
 666   arm_vector_alignment_reachable
 667
 668 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 669 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
 670   arm_builtin_support_vector_misalignment
 671
 672 #undef TARGET_PREFERRED_RENAME_CLASS
 673 #define TARGET_PREFERRED_RENAME_CLASS \
 674   arm_preferred_rename_class
 675
 676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
 677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
 678   arm_vectorize_vec_perm_const_ok
 679
 680 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 681 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
 682   arm_builtin_vectorization_cost
 683 #undef TARGET_VECTORIZE_ADD_STMT_COST
 684 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
 685
 686 #undef TARGET_CANONICALIZE_COMPARISON
 687 #define TARGET_CANONICALIZE_COMPARISON \
 688   arm_canonicalize_comparison
 689
 690 #undef TARGET_ASAN_SHADOW_OFFSET
 691 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
 692
 693 #undef MAX_INSN_PER_IT_BLOCK
 694 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
 695
 696 #undef TARGET_CAN_USE_DOLOOP_P
 697 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 698
 699 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
 700 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
 701
 702 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
 703 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
 704
 705 #undef TARGET_SCHED_FUSION_PRIORITY
 706 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
 707
 708 struct gcc_target targetm = TARGET_INITIALIZER;
 709 \f
 710 /* Obstack for minipool constant handling.  */
 711 static struct obstack minipool_obstack;
 712 static char *         minipool_startobj;
 713
 714 /* The maximum number of insns skipped which
 715    will be conditionalised if possible.  */
 716 static int max_insns_skipped = 5;
 717
 718 extern FILE * asm_out_file;
 719
 720 /* True if we are currently building a constant table.  */
 721 int making_const_table;
 722
 723 /* The processor for which instructions should be scheduled.  */
 724 enum processor_type arm_tune = arm_none;
 725
 726 /* The current tuning set.  */
 727 const struct tune_params *current_tune;
 728
 729 /* Which floating point hardware to schedule for.  */
 730 int arm_fpu_attr;
 731
 732 /* Which floating popint hardware to use.  */
 733 const struct arm_fpu_desc *arm_fpu_desc;
 734
 735 /* Used for Thumb call_via trampolines.  */
 736 rtx thumb_call_via_label[14];
 737 static int thumb_call_reg_needed;
 738
 739 /* The bits in this mask specify which
 740    instructions we are allowed to generate.  */
 741 unsigned long insn_flags = 0;
 742
 743 /* The bits in this mask specify which instruction scheduling options should
 744    be used.  */
 745 unsigned long tune_flags = 0;
 746
 747 /* The highest ARM architecture version supported by the
 748    target.  */
 749 enum base_architecture arm_base_arch = BASE_ARCH_0;
 750
 751 /* The following are used in the arm.md file as equivalents to bits
 752    in the above two flag variables.  */
 753
 754 /* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
 755 int arm_arch3m = 0;
 756
 757 /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
 758 int arm_arch4 = 0;
 759
 760 /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
 761 int arm_arch4t = 0;
 762
 763 /* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
 764 int arm_arch5 = 0;
 765
 766 /* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
 767 int arm_arch5e = 0;
 768
 769 /* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
 770 int arm_arch6 = 0;
 771
 772 /* Nonzero if this chip supports the ARM 6K extensions.  */
 773 int arm_arch6k = 0;
 774
 775 /* Nonzero if instructions present in ARMv6-M can be used.  */
 776 int arm_arch6m = 0;
 777
 778 /* Nonzero if this chip supports the ARM 7 extensions.  */
 779 int arm_arch7 = 0;
 780
 781 /* Nonzero if instructions not present in the 'M' profile can be used.  */
 782 int arm_arch_notm = 0;
 783
 784 /* Nonzero if instructions present in ARMv7E-M can be used.  */
 785 int arm_arch7em = 0;
 786
 787 /* Nonzero if instructions present in ARMv8 can be used.  */
 788 int arm_arch8 = 0;
 789
 790 /* Nonzero if this chip can benefit from load scheduling.  */
 791 int arm_ld_sched = 0;
 792
 793 /* Nonzero if this chip is a StrongARM.  */
 794 int arm_tune_strongarm = 0;
 795
 796 /* Nonzero if this chip supports Intel Wireless MMX technology.  */
 797 int arm_arch_iwmmxt = 0;
 798
 799 /* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
 800 int arm_arch_iwmmxt2 = 0;
 801
 802 /* Nonzero if this chip is an XScale.  */
 803 int arm_arch_xscale = 0;
 804
 805 /* Nonzero if tuning for XScale  */
 806 int arm_tune_xscale = 0;
 807
 808 /* Nonzero if we want to tune for stores that access the write-buffer.
 809    This typically means an ARM6 or ARM7 with MMU or MPU.  */
 810 int arm_tune_wbuf = 0;
 811
 812 /* Nonzero if tuning for Cortex-A9.  */
 813 int arm_tune_cortex_a9 = 0;
 814
 815 /* Nonzero if generating Thumb instructions.  */
 816 int thumb_code = 0;
 817
 818 /* Nonzero if generating Thumb-1 instructions.  */
 819 int thumb1_code = 0;
 820
 821 /* Nonzero if we should define __THUMB_INTERWORK__ in the
 822    preprocessor.
 823    XXX This is a bit of a hack, it's intended to help work around
 824    problems in GLD which doesn't understand that armv5t code is
 825    interworking clean.  */
 826 int arm_cpp_interwork = 0;
 827
 828 /* Nonzero if chip supports Thumb 2.  */
 829 int arm_arch_thumb2;
 830
 831 /* Nonzero if chip supports integer division instruction.  */
 832 int arm_arch_arm_hwdiv;
 833 int arm_arch_thumb_hwdiv;
 834
 835 /* Nonzero if we should use Neon to handle 64-bits operations rather
 836    than core registers.  */
 837 int prefer_neon_for_64bits = 0;
 838
 839 /* Nonzero if we shouldn't use literal pools.  */
 840 bool arm_disable_literal_pool = false;
 841
 842 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
 843    we must report the mode of the memory reference from
 844    TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS.  */
 845 machine_mode output_memory_reference_mode;
 846
 847 /* The register number to be used for the PIC offset register.  */
 848 unsigned arm_pic_register = INVALID_REGNUM;
 849
 850 enum arm_pcs arm_pcs_default;
 851
 852 /* For an explanation of these variables, see final_prescan_insn below.  */
 853 int arm_ccfsm_state;
 854 /* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
 855 enum arm_cond_code arm_current_cc;
 856
 857 rtx arm_target_insn;
 858 int arm_target_label;
 859 /* The number of conditionally executed insns, including the current insn.  */
 860 int arm_condexec_count = 0;
 861 /* A bitmask specifying the patterns for the IT block.
 862    Zero means do not output an IT block before this insn. */
 863 int arm_condexec_mask = 0;
 864 /* The number of bits used in arm_condexec_mask.  */
 865 int arm_condexec_masklen = 0;
 866
 867 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 868 int arm_arch_crc = 0;
 869
 870 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 871 int arm_m_profile_small_mul = 0;
 872
 873 /* The condition codes of the ARM, and the inverse function.  */
 874 static const char * const arm_condition_codes[] =
 875 {
 876   "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
 877   "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
 878 };
 879
 880 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 881 int arm_regs_in_sequence[] =
 882 {
 883   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 884 };
 885
 886 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
 887 #define streq(string1, string2) (strcmp (string1, string2) == 0)
 888
 889 #define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
 890                                    | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
 891                                    | (1 << PIC_OFFSET_TABLE_REGNUM)))
 892 \f
 893 /* Initialization code.  */
 894
 895 struct processors
 896 {
 897   const char *const name;
 898   enum processor_type core;
 899   const char *arch;
 900   enum base_architecture base_arch;
 901   const unsigned long flags;
 902   const struct tune_params *const tune;
 903 };
 904
 905
 906 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
 907 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
 908   prefetch_slots, \
 909   l1_size, \
 910   l1_line_size
 911
 912 /* arm generic vectorizer costs.  */
 913 static const
 914 struct cpu_vec_costs arm_default_vec_cost = {
 915   1,                                    /* scalar_stmt_cost.  */
 916   1,                                    /* scalar load_cost.  */
 917   1,                                    /* scalar_store_cost.  */
 918   1,                                    /* vec_stmt_cost.  */
 919   1,                                    /* vec_to_scalar_cost.  */
 920   1,                                    /* scalar_to_vec_cost.  */
 921   1,                                    /* vec_align_load_cost.  */
 922   1,                                    /* vec_unalign_load_cost.  */
 923   1,                                    /* vec_unalign_store_cost.  */
 924   1,                                    /* vec_store_cost.  */
 925   3,                                    /* cond_taken_branch_cost.  */
 926   1,                                    /* cond_not_taken_branch_cost.  */
 927 };
 928
 929 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
 930 #include "aarch-cost-tables.h"
 931
 932
 933
 934 const struct cpu_cost_table cortexa9_extra_costs =
 935 {
 936   /* ALU */
 937   {
 938     0,                  /* arith.  */
 939     0,                  /* logical.  */
 940     0,                  /* shift.  */
 941     COSTS_N_INSNS (1),  /* shift_reg.  */
 942     COSTS_N_INSNS (1),  /* arith_shift.  */
 943     COSTS_N_INSNS (2),  /* arith_shift_reg.  */
 944     0,                  /* log_shift.  */
 945     COSTS_N_INSNS (1),  /* log_shift_reg.  */
 946     COSTS_N_INSNS (1),  /* extend.  */
 947     COSTS_N_INSNS (2),  /* extend_arith.  */
 948     COSTS_N_INSNS (1),  /* bfi.  */
 949     COSTS_N_INSNS (1),  /* bfx.  */
 950     0,                  /* clz.  */
 951     0,                  /* rev.  */
 952     0,                  /* non_exec.  */
 953     true                /* non_exec_costs_exec.  */
 954   },
 955   {
 956     /* MULT SImode */
 957     {
 958       COSTS_N_INSNS (3),        /* simple.  */
 959       COSTS_N_INSNS (3),        /* flag_setting.  */
 960       COSTS_N_INSNS (2),        /* extend.  */
 961       COSTS_N_INSNS (3),        /* add.  */
 962       COSTS_N_INSNS (2),        /* extend_add.  */
 963       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A9.  */
 964     },
 965     /* MULT DImode */
 966     {
 967       0,                        /* simple (N/A).  */
 968       0,                        /* flag_setting (N/A).  */
 969       COSTS_N_INSNS (4),        /* extend.  */
 970       0,                        /* add (N/A).  */
 971       COSTS_N_INSNS (4),        /* extend_add.  */
 972       0                         /* idiv (N/A).  */
 973     }
 974   },
 975   /* LD/ST */
 976   {
 977     COSTS_N_INSNS (2),  /* load.  */
 978     COSTS_N_INSNS (2),  /* load_sign_extend.  */
 979     COSTS_N_INSNS (2),  /* ldrd.  */
 980     COSTS_N_INSNS (2),  /* ldm_1st.  */
 981     1,                  /* ldm_regs_per_insn_1st.  */
 982     2,                  /* ldm_regs_per_insn_subsequent.  */
 983     COSTS_N_INSNS (5),  /* loadf.  */
 984     COSTS_N_INSNS (5),  /* loadd.  */
 985     COSTS_N_INSNS (1),  /* load_unaligned.  */
 986     COSTS_N_INSNS (2),  /* store.  */
 987     COSTS_N_INSNS (2),  /* strd.  */
 988     COSTS_N_INSNS (2),  /* stm_1st.  */
 989     1,                  /* stm_regs_per_insn_1st.  */
 990     2,                  /* stm_regs_per_insn_subsequent.  */
 991     COSTS_N_INSNS (1),  /* storef.  */
 992     COSTS_N_INSNS (1),  /* stored.  */
 993     COSTS_N_INSNS (1)   /* store_unaligned.  */
 994   },
 995   {
 996     /* FP SFmode */
 997     {
 998       COSTS_N_INSNS (14),       /* div.  */
 999       COSTS_N_INSNS (4),        /* mult.  */
1000       COSTS_N_INSNS (7),        /* mult_addsub. */
1001       COSTS_N_INSNS (30),       /* fma.  */
1002       COSTS_N_INSNS (3),        /* addsub.  */
1003       COSTS_N_INSNS (1),        /* fpconst.  */
1004       COSTS_N_INSNS (1),        /* neg.  */
1005       COSTS_N_INSNS (3),        /* compare.  */
1006       COSTS_N_INSNS (3),        /* widen.  */
1007       COSTS_N_INSNS (3),        /* narrow.  */
1008       COSTS_N_INSNS (3),        /* toint.  */
1009       COSTS_N_INSNS (3),        /* fromint.  */
1010       COSTS_N_INSNS (3)         /* roundint.  */
1011     },
1012     /* FP DFmode */
1013     {
1014       COSTS_N_INSNS (24),       /* div.  */
1015       COSTS_N_INSNS (5),        /* mult.  */
1016       COSTS_N_INSNS (8),        /* mult_addsub.  */
1017       COSTS_N_INSNS (30),       /* fma.  */
1018       COSTS_N_INSNS (3),        /* addsub.  */
1019       COSTS_N_INSNS (1),        /* fpconst.  */
1020       COSTS_N_INSNS (1),        /* neg.  */
1021       COSTS_N_INSNS (3),        /* compare.  */
1022       COSTS_N_INSNS (3),        /* widen.  */
1023       COSTS_N_INSNS (3),        /* narrow.  */
1024       COSTS_N_INSNS (3),        /* toint.  */
1025       COSTS_N_INSNS (3),        /* fromint.  */
1026       COSTS_N_INSNS (3)         /* roundint.  */
1027     }
1028   },
1029   /* Vector */
1030   {
1031     COSTS_N_INSNS (1)   /* alu.  */
1032   }
1033 };
1034
1035 const struct cpu_cost_table cortexa8_extra_costs =
1036 {
1037   /* ALU */
1038   {
1039     0,                  /* arith.  */
1040     0,                  /* logical.  */
1041     COSTS_N_INSNS (1),  /* shift.  */
1042     0,                  /* shift_reg.  */
1043     COSTS_N_INSNS (1),  /* arith_shift.  */
1044     0,                  /* arith_shift_reg.  */
1045     COSTS_N_INSNS (1),  /* log_shift.  */
1046     0,                  /* log_shift_reg.  */
1047     0,                  /* extend.  */
1048     0,                  /* extend_arith.  */
1049     0,                  /* bfi.  */
1050     0,                  /* bfx.  */
1051     0,                  /* clz.  */
1052     0,                  /* rev.  */
1053     0,                  /* non_exec.  */
1054     true                /* non_exec_costs_exec.  */
1055   },
1056   {
1057     /* MULT SImode */
1058     {
1059       COSTS_N_INSNS (1),        /* simple.  */
1060       COSTS_N_INSNS (1),        /* flag_setting.  */
1061       COSTS_N_INSNS (1),        /* extend.  */
1062       COSTS_N_INSNS (1),        /* add.  */
1063       COSTS_N_INSNS (1),        /* extend_add.  */
1064       COSTS_N_INSNS (30)        /* idiv.  No HW div on Cortex A8.  */
1065     },
1066     /* MULT DImode */
1067     {
1068       0,                        /* simple (N/A).  */
1069       0,                        /* flag_setting (N/A).  */
1070       COSTS_N_INSNS (2),        /* extend.  */
1071       0,                        /* add (N/A).  */
1072       COSTS_N_INSNS (2),        /* extend_add.  */
1073       0                         /* idiv (N/A).  */
1074     }
1075   },
1076   /* LD/ST */
1077   {
1078     COSTS_N_INSNS (1),  /* load.  */
1079     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1080     COSTS_N_INSNS (1),  /* ldrd.  */
1081     COSTS_N_INSNS (1),  /* ldm_1st.  */
1082     1,                  /* ldm_regs_per_insn_1st.  */
1083     2,                  /* ldm_regs_per_insn_subsequent.  */
1084     COSTS_N_INSNS (1),  /* loadf.  */
1085     COSTS_N_INSNS (1),  /* loadd.  */
1086     COSTS_N_INSNS (1),  /* load_unaligned.  */
1087     COSTS_N_INSNS (1),  /* store.  */
1088     COSTS_N_INSNS (1),  /* strd.  */
1089     COSTS_N_INSNS (1),  /* stm_1st.  */
1090     1,                  /* stm_regs_per_insn_1st.  */
1091     2,                  /* stm_regs_per_insn_subsequent.  */
1092     COSTS_N_INSNS (1),  /* storef.  */
1093     COSTS_N_INSNS (1),  /* stored.  */
1094     COSTS_N_INSNS (1)   /* store_unaligned.  */
1095   },
1096   {
1097     /* FP SFmode */
1098     {
1099       COSTS_N_INSNS (36),       /* div.  */
1100       COSTS_N_INSNS (11),       /* mult.  */
1101       COSTS_N_INSNS (20),       /* mult_addsub. */
1102       COSTS_N_INSNS (30),       /* fma.  */
1103       COSTS_N_INSNS (9),        /* addsub.  */
1104       COSTS_N_INSNS (3),        /* fpconst.  */
1105       COSTS_N_INSNS (3),        /* neg.  */
1106       COSTS_N_INSNS (6),        /* compare.  */
1107       COSTS_N_INSNS (4),        /* widen.  */
1108       COSTS_N_INSNS (4),        /* narrow.  */
1109       COSTS_N_INSNS (8),        /* toint.  */
1110       COSTS_N_INSNS (8),        /* fromint.  */
1111       COSTS_N_INSNS (8)         /* roundint.  */
1112     },
1113     /* FP DFmode */
1114     {
1115       COSTS_N_INSNS (64),       /* div.  */
1116       COSTS_N_INSNS (16),       /* mult.  */
1117       COSTS_N_INSNS (25),       /* mult_addsub.  */
1118       COSTS_N_INSNS (30),       /* fma.  */
1119       COSTS_N_INSNS (9),        /* addsub.  */
1120       COSTS_N_INSNS (3),        /* fpconst.  */
1121       COSTS_N_INSNS (3),        /* neg.  */
1122       COSTS_N_INSNS (6),        /* compare.  */
1123       COSTS_N_INSNS (6),        /* widen.  */
1124       COSTS_N_INSNS (6),        /* narrow.  */
1125       COSTS_N_INSNS (8),        /* toint.  */
1126       COSTS_N_INSNS (8),        /* fromint.  */
1127       COSTS_N_INSNS (8)         /* roundint.  */
1128     }
1129   },
1130   /* Vector */
1131   {
1132     COSTS_N_INSNS (1)   /* alu.  */
1133   }
1134 };
1135
1136 const struct cpu_cost_table cortexa5_extra_costs =
1137 {
1138   /* ALU */
1139   {
1140     0,                  /* arith.  */
1141     0,                  /* logical.  */
1142     COSTS_N_INSNS (1),  /* shift.  */
1143     COSTS_N_INSNS (1),  /* shift_reg.  */
1144     COSTS_N_INSNS (1),  /* arith_shift.  */
1145     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1146     COSTS_N_INSNS (1),  /* log_shift.  */
1147     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1148     COSTS_N_INSNS (1),  /* extend.  */
1149     COSTS_N_INSNS (1),  /* extend_arith.  */
1150     COSTS_N_INSNS (1),  /* bfi.  */
1151     COSTS_N_INSNS (1),  /* bfx.  */
1152     COSTS_N_INSNS (1),  /* clz.  */
1153     COSTS_N_INSNS (1),  /* rev.  */
1154     0,                  /* non_exec.  */
1155     true                /* non_exec_costs_exec.  */
1156   },
1157
1158   {
1159     /* MULT SImode */
1160     {
1161       0,                        /* simple.  */
1162       COSTS_N_INSNS (1),        /* flag_setting.  */
1163       COSTS_N_INSNS (1),        /* extend.  */
1164       COSTS_N_INSNS (1),        /* add.  */
1165       COSTS_N_INSNS (1),        /* extend_add.  */
1166       COSTS_N_INSNS (7)         /* idiv.  */
1167     },
1168     /* MULT DImode */
1169     {
1170       0,                        /* simple (N/A).  */
1171       0,                        /* flag_setting (N/A).  */
1172       COSTS_N_INSNS (1),        /* extend.  */
1173       0,                        /* add.  */
1174       COSTS_N_INSNS (2),        /* extend_add.  */
1175       0                         /* idiv (N/A).  */
1176     }
1177   },
1178   /* LD/ST */
1179   {
1180     COSTS_N_INSNS (1),  /* load.  */
1181     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1182     COSTS_N_INSNS (6),  /* ldrd.  */
1183     COSTS_N_INSNS (1),  /* ldm_1st.  */
1184     1,                  /* ldm_regs_per_insn_1st.  */
1185     2,                  /* ldm_regs_per_insn_subsequent.  */
1186     COSTS_N_INSNS (2),  /* loadf.  */
1187     COSTS_N_INSNS (4),  /* loadd.  */
1188     COSTS_N_INSNS (1),  /* load_unaligned.  */
1189     COSTS_N_INSNS (1),  /* store.  */
1190     COSTS_N_INSNS (3),  /* strd.  */
1191     COSTS_N_INSNS (1),  /* stm_1st.  */
1192     1,                  /* stm_regs_per_insn_1st.  */
1193     2,                  /* stm_regs_per_insn_subsequent.  */
1194     COSTS_N_INSNS (2),  /* storef.  */
1195     COSTS_N_INSNS (2),  /* stored.  */
1196     COSTS_N_INSNS (1)   /* store_unaligned.  */
1197   },
1198   {
1199     /* FP SFmode */
1200     {
1201       COSTS_N_INSNS (15),       /* div.  */
1202       COSTS_N_INSNS (3),        /* mult.  */
1203       COSTS_N_INSNS (7),        /* mult_addsub. */
1204       COSTS_N_INSNS (7),        /* fma.  */
1205       COSTS_N_INSNS (3),        /* addsub.  */
1206       COSTS_N_INSNS (3),        /* fpconst.  */
1207       COSTS_N_INSNS (3),        /* neg.  */
1208       COSTS_N_INSNS (3),        /* compare.  */
1209       COSTS_N_INSNS (3),        /* widen.  */
1210       COSTS_N_INSNS (3),        /* narrow.  */
1211       COSTS_N_INSNS (3),        /* toint.  */
1212       COSTS_N_INSNS (3),        /* fromint.  */
1213       COSTS_N_INSNS (3)         /* roundint.  */
1214     },
1215     /* FP DFmode */
1216     {
1217       COSTS_N_INSNS (30),       /* div.  */
1218       COSTS_N_INSNS (6),        /* mult.  */
1219       COSTS_N_INSNS (10),       /* mult_addsub.  */
1220       COSTS_N_INSNS (7),        /* fma.  */
1221       COSTS_N_INSNS (3),        /* addsub.  */
1222       COSTS_N_INSNS (3),        /* fpconst.  */
1223       COSTS_N_INSNS (3),        /* neg.  */
1224       COSTS_N_INSNS (3),        /* compare.  */
1225       COSTS_N_INSNS (3),        /* widen.  */
1226       COSTS_N_INSNS (3),        /* narrow.  */
1227       COSTS_N_INSNS (3),        /* toint.  */
1228       COSTS_N_INSNS (3),        /* fromint.  */
1229       COSTS_N_INSNS (3)         /* roundint.  */
1230     }
1231   },
1232   /* Vector */
1233   {
1234     COSTS_N_INSNS (1)   /* alu.  */
1235   }
1236 };
1237
1238
1239 const struct cpu_cost_table cortexa7_extra_costs =
1240 {
1241   /* ALU */
1242   {
1243     0,                  /* arith.  */
1244     0,                  /* logical.  */
1245     COSTS_N_INSNS (1),  /* shift.  */
1246     COSTS_N_INSNS (1),  /* shift_reg.  */
1247     COSTS_N_INSNS (1),  /* arith_shift.  */
1248     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1249     COSTS_N_INSNS (1),  /* log_shift.  */
1250     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1251     COSTS_N_INSNS (1),  /* extend.  */
1252     COSTS_N_INSNS (1),  /* extend_arith.  */
1253     COSTS_N_INSNS (1),  /* bfi.  */
1254     COSTS_N_INSNS (1),  /* bfx.  */
1255     COSTS_N_INSNS (1),  /* clz.  */
1256     COSTS_N_INSNS (1),  /* rev.  */
1257     0,                  /* non_exec.  */
1258     true                /* non_exec_costs_exec.  */
1259   },
1260
1261   {
1262     /* MULT SImode */
1263     {
1264       0,                        /* simple.  */
1265       COSTS_N_INSNS (1),        /* flag_setting.  */
1266       COSTS_N_INSNS (1),        /* extend.  */
1267       COSTS_N_INSNS (1),        /* add.  */
1268       COSTS_N_INSNS (1),        /* extend_add.  */
1269       COSTS_N_INSNS (7)         /* idiv.  */
1270     },
1271     /* MULT DImode */
1272     {
1273       0,                        /* simple (N/A).  */
1274       0,                        /* flag_setting (N/A).  */
1275       COSTS_N_INSNS (1),        /* extend.  */
1276       0,                        /* add.  */
1277       COSTS_N_INSNS (2),        /* extend_add.  */
1278       0                         /* idiv (N/A).  */
1279     }
1280   },
1281   /* LD/ST */
1282   {
1283     COSTS_N_INSNS (1),  /* load.  */
1284     COSTS_N_INSNS (1),  /* load_sign_extend.  */
1285     COSTS_N_INSNS (3),  /* ldrd.  */
1286     COSTS_N_INSNS (1),  /* ldm_1st.  */
1287     1,                  /* ldm_regs_per_insn_1st.  */
1288     2,                  /* ldm_regs_per_insn_subsequent.  */
1289     COSTS_N_INSNS (2),  /* loadf.  */
1290     COSTS_N_INSNS (2),  /* loadd.  */
1291     COSTS_N_INSNS (1),  /* load_unaligned.  */
1292     COSTS_N_INSNS (1),  /* store.  */
1293     COSTS_N_INSNS (3),  /* strd.  */
1294     COSTS_N_INSNS (1),  /* stm_1st.  */
1295     1,                  /* stm_regs_per_insn_1st.  */
1296     2,                  /* stm_regs_per_insn_subsequent.  */
1297     COSTS_N_INSNS (2),  /* storef.  */
1298     COSTS_N_INSNS (2),  /* stored.  */
1299     COSTS_N_INSNS (1)   /* store_unaligned.  */
1300   },
1301   {
1302     /* FP SFmode */
1303     {
1304       COSTS_N_INSNS (15),       /* div.  */
1305       COSTS_N_INSNS (3),        /* mult.  */
1306       COSTS_N_INSNS (7),        /* mult_addsub. */
1307       COSTS_N_INSNS (7),        /* fma.  */
1308       COSTS_N_INSNS (3),        /* addsub.  */
1309       COSTS_N_INSNS (3),        /* fpconst.  */
1310       COSTS_N_INSNS (3),        /* neg.  */
1311       COSTS_N_INSNS (3),        /* compare.  */
1312       COSTS_N_INSNS (3),        /* widen.  */
1313       COSTS_N_INSNS (3),        /* narrow.  */
1314       COSTS_N_INSNS (3),        /* toint.  */
1315       COSTS_N_INSNS (3),        /* fromint.  */
1316       COSTS_N_INSNS (3)         /* roundint.  */
1317     },
1318     /* FP DFmode */
1319     {
1320       COSTS_N_INSNS (30),       /* div.  */
1321       COSTS_N_INSNS (6),        /* mult.  */
1322       COSTS_N_INSNS (10),       /* mult_addsub.  */
1323       COSTS_N_INSNS (7),        /* fma.  */
1324       COSTS_N_INSNS (3),        /* addsub.  */
1325       COSTS_N_INSNS (3),        /* fpconst.  */
1326       COSTS_N_INSNS (3),        /* neg.  */
1327       COSTS_N_INSNS (3),        /* compare.  */
1328       COSTS_N_INSNS (3),        /* widen.  */
1329       COSTS_N_INSNS (3),        /* narrow.  */
1330       COSTS_N_INSNS (3),        /* toint.  */
1331       COSTS_N_INSNS (3),        /* fromint.  */
1332       COSTS_N_INSNS (3)         /* roundint.  */
1333     }
1334   },
1335   /* Vector */
1336   {
1337     COSTS_N_INSNS (1)   /* alu.  */
1338   }
1339 };
1340
1341 const struct cpu_cost_table cortexa12_extra_costs =
1342 {
1343   /* ALU */
1344   {
1345     0,                  /* arith.  */
1346     0,                  /* logical.  */
1347     0,                  /* shift.  */
1348     COSTS_N_INSNS (1),  /* shift_reg.  */
1349     COSTS_N_INSNS (1),  /* arith_shift.  */
1350     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1351     COSTS_N_INSNS (1),  /* log_shift.  */
1352     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1353     0,                  /* extend.  */
1354     COSTS_N_INSNS (1),  /* extend_arith.  */
1355     0,                  /* bfi.  */
1356     COSTS_N_INSNS (1),  /* bfx.  */
1357     COSTS_N_INSNS (1),  /* clz.  */
1358     COSTS_N_INSNS (1),  /* rev.  */
1359     0,                  /* non_exec.  */
1360     true                /* non_exec_costs_exec.  */
1361   },
1362   /* MULT SImode */
1363   {
1364     {
1365       COSTS_N_INSNS (2),        /* simple.  */
1366       COSTS_N_INSNS (3),        /* flag_setting.  */
1367       COSTS_N_INSNS (2),        /* extend.  */
1368       COSTS_N_INSNS (3),        /* add.  */
1369       COSTS_N_INSNS (2),        /* extend_add.  */
1370       COSTS_N_INSNS (18)        /* idiv.  */
1371     },
1372     /* MULT DImode */
1373     {
1374       0,                        /* simple (N/A).  */
1375       0,                        /* flag_setting (N/A).  */
1376       COSTS_N_INSNS (3),        /* extend.  */
1377       0,                        /* add (N/A).  */
1378       COSTS_N_INSNS (3),        /* extend_add.  */
1379       0                         /* idiv (N/A).  */
1380     }
1381   },
1382   /* LD/ST */
1383   {
1384     COSTS_N_INSNS (3),  /* load.  */
1385     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1386     COSTS_N_INSNS (3),  /* ldrd.  */
1387     COSTS_N_INSNS (3),  /* ldm_1st.  */
1388     1,                  /* ldm_regs_per_insn_1st.  */
1389     2,                  /* ldm_regs_per_insn_subsequent.  */
1390     COSTS_N_INSNS (3),  /* loadf.  */
1391     COSTS_N_INSNS (3),  /* loadd.  */
1392     0,                  /* load_unaligned.  */
1393     0,                  /* store.  */
1394     0,                  /* strd.  */
1395     0,                  /* stm_1st.  */
1396     1,                  /* stm_regs_per_insn_1st.  */
1397     2,                  /* stm_regs_per_insn_subsequent.  */
1398     COSTS_N_INSNS (2),  /* storef.  */
1399     COSTS_N_INSNS (2),  /* stored.  */
1400     0                   /* store_unaligned.  */
1401   },
1402   {
1403     /* FP SFmode */
1404     {
1405       COSTS_N_INSNS (17),       /* div.  */
1406       COSTS_N_INSNS (4),        /* mult.  */
1407       COSTS_N_INSNS (8),        /* mult_addsub. */
1408       COSTS_N_INSNS (8),        /* fma.  */
1409       COSTS_N_INSNS (4),        /* addsub.  */
1410       COSTS_N_INSNS (2),        /* fpconst. */
1411       COSTS_N_INSNS (2),        /* neg.  */
1412       COSTS_N_INSNS (2),        /* compare.  */
1413       COSTS_N_INSNS (4),        /* widen.  */
1414       COSTS_N_INSNS (4),        /* narrow.  */
1415       COSTS_N_INSNS (4),        /* toint.  */
1416       COSTS_N_INSNS (4),        /* fromint.  */
1417       COSTS_N_INSNS (4)         /* roundint.  */
1418     },
1419     /* FP DFmode */
1420     {
1421       COSTS_N_INSNS (31),       /* div.  */
1422       COSTS_N_INSNS (4),        /* mult.  */
1423       COSTS_N_INSNS (8),        /* mult_addsub.  */
1424       COSTS_N_INSNS (8),        /* fma.  */
1425       COSTS_N_INSNS (4),        /* addsub.  */
1426       COSTS_N_INSNS (2),        /* fpconst.  */
1427       COSTS_N_INSNS (2),        /* neg.  */
1428       COSTS_N_INSNS (2),        /* compare.  */
1429       COSTS_N_INSNS (4),        /* widen.  */
1430       COSTS_N_INSNS (4),        /* narrow.  */
1431       COSTS_N_INSNS (4),        /* toint.  */
1432       COSTS_N_INSNS (4),        /* fromint.  */
1433       COSTS_N_INSNS (4)         /* roundint.  */
1434     }
1435   },
1436   /* Vector */
1437   {
1438     COSTS_N_INSNS (1)   /* alu.  */
1439   }
1440 };
1441
1442 const struct cpu_cost_table cortexa15_extra_costs =
1443 {
1444   /* ALU */
1445   {
1446     0,                  /* arith.  */
1447     0,                  /* logical.  */
1448     0,                  /* shift.  */
1449     0,                  /* shift_reg.  */
1450     COSTS_N_INSNS (1),  /* arith_shift.  */
1451     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1452     COSTS_N_INSNS (1),  /* log_shift.  */
1453     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1454     0,                  /* extend.  */
1455     COSTS_N_INSNS (1),  /* extend_arith.  */
1456     COSTS_N_INSNS (1),  /* bfi.  */
1457     0,                  /* bfx.  */
1458     0,                  /* clz.  */
1459     0,                  /* rev.  */
1460     0,                  /* non_exec.  */
1461     true                /* non_exec_costs_exec.  */
1462   },
1463   /* MULT SImode */
1464   {
1465     {
1466       COSTS_N_INSNS (2),        /* simple.  */
1467       COSTS_N_INSNS (3),        /* flag_setting.  */
1468       COSTS_N_INSNS (2),        /* extend.  */
1469       COSTS_N_INSNS (2),        /* add.  */
1470       COSTS_N_INSNS (2),        /* extend_add.  */
1471       COSTS_N_INSNS (18)        /* idiv.  */
1472     },
1473     /* MULT DImode */
1474     {
1475       0,                        /* simple (N/A).  */
1476       0,                        /* flag_setting (N/A).  */
1477       COSTS_N_INSNS (3),        /* extend.  */
1478       0,                        /* add (N/A).  */
1479       COSTS_N_INSNS (3),        /* extend_add.  */
1480       0                         /* idiv (N/A).  */
1481     }
1482   },
1483   /* LD/ST */
1484   {
1485     COSTS_N_INSNS (3),  /* load.  */
1486     COSTS_N_INSNS (3),  /* load_sign_extend.  */
1487     COSTS_N_INSNS (3),  /* ldrd.  */
1488     COSTS_N_INSNS (4),  /* ldm_1st.  */
1489     1,                  /* ldm_regs_per_insn_1st.  */
1490     2,                  /* ldm_regs_per_insn_subsequent.  */
1491     COSTS_N_INSNS (4),  /* loadf.  */
1492     COSTS_N_INSNS (4),  /* loadd.  */
1493     0,                  /* load_unaligned.  */
1494     0,                  /* store.  */
1495     0,                  /* strd.  */
1496     COSTS_N_INSNS (1),  /* stm_1st.  */
1497     1,                  /* stm_regs_per_insn_1st.  */
1498     2,                  /* stm_regs_per_insn_subsequent.  */
1499     0,                  /* storef.  */
1500     0,                  /* stored.  */
1501     0                   /* store_unaligned.  */
1502   },
1503   {
1504     /* FP SFmode */
1505     {
1506       COSTS_N_INSNS (17),       /* div.  */
1507       COSTS_N_INSNS (4),        /* mult.  */
1508       COSTS_N_INSNS (8),        /* mult_addsub. */
1509       COSTS_N_INSNS (8),        /* fma.  */
1510       COSTS_N_INSNS (4),        /* addsub.  */
1511       COSTS_N_INSNS (2),        /* fpconst. */
1512       COSTS_N_INSNS (2),        /* neg.  */
1513       COSTS_N_INSNS (5),        /* compare.  */
1514       COSTS_N_INSNS (4),        /* widen.  */
1515       COSTS_N_INSNS (4),        /* narrow.  */
1516       COSTS_N_INSNS (4),        /* toint.  */
1517       COSTS_N_INSNS (4),        /* fromint.  */
1518       COSTS_N_INSNS (4)         /* roundint.  */
1519     },
1520     /* FP DFmode */
1521     {
1522       COSTS_N_INSNS (31),       /* div.  */
1523       COSTS_N_INSNS (4),        /* mult.  */
1524       COSTS_N_INSNS (8),        /* mult_addsub.  */
1525       COSTS_N_INSNS (8),        /* fma.  */
1526       COSTS_N_INSNS (4),        /* addsub.  */
1527       COSTS_N_INSNS (2),        /* fpconst.  */
1528       COSTS_N_INSNS (2),        /* neg.  */
1529       COSTS_N_INSNS (2),        /* compare.  */
1530       COSTS_N_INSNS (4),        /* widen.  */
1531       COSTS_N_INSNS (4),        /* narrow.  */
1532       COSTS_N_INSNS (4),        /* toint.  */
1533       COSTS_N_INSNS (4),        /* fromint.  */
1534       COSTS_N_INSNS (4)         /* roundint.  */
1535     }
1536   },
1537   /* Vector */
1538   {
1539     COSTS_N_INSNS (1)   /* alu.  */
1540   }
1541 };
1542
1543 const struct cpu_cost_table v7m_extra_costs =
1544 {
1545   /* ALU */
1546   {
1547     0,                  /* arith.  */
1548     0,                  /* logical.  */
1549     0,                  /* shift.  */
1550     0,                  /* shift_reg.  */
1551     0,                  /* arith_shift.  */
1552     COSTS_N_INSNS (1),  /* arith_shift_reg.  */
1553     0,                  /* log_shift.  */
1554     COSTS_N_INSNS (1),  /* log_shift_reg.  */
1555     0,                  /* extend.  */
1556     COSTS_N_INSNS (1),  /* extend_arith.  */
1557     0,                  /* bfi.  */
1558     0,                  /* bfx.  */
1559     0,                  /* clz.  */
1560     0,                  /* rev.  */
1561     COSTS_N_INSNS (1),  /* non_exec.  */
1562     false               /* non_exec_costs_exec.  */
1563   },
1564   {
1565     /* MULT SImode */
1566     {
1567       COSTS_N_INSNS (1),        /* simple.  */
1568       COSTS_N_INSNS (1),        /* flag_setting.  */
1569       COSTS_N_INSNS (2),        /* extend.  */
1570       COSTS_N_INSNS (1),        /* add.  */
1571       COSTS_N_INSNS (3),        /* extend_add.  */
1572       COSTS_N_INSNS (8)         /* idiv.  */
1573     },
1574     /* MULT DImode */
1575     {
1576       0,                        /* simple (N/A).  */
1577       0,                        /* flag_setting (N/A).  */
1578       COSTS_N_INSNS (2),        /* extend.  */
1579       0,                        /* add (N/A).  */
1580       COSTS_N_INSNS (3),        /* extend_add.  */
1581       0                         /* idiv (N/A).  */
1582     }
1583   },
1584   /* LD/ST */
1585   {
1586     COSTS_N_INSNS (2),  /* load.  */
1587     0,                  /* load_sign_extend.  */
1588     COSTS_N_INSNS (3),  /* ldrd.  */
1589     COSTS_N_INSNS (2),  /* ldm_1st.  */
1590     1,                  /* ldm_regs_per_insn_1st.  */
1591     1,                  /* ldm_regs_per_insn_subsequent.  */
1592     COSTS_N_INSNS (2),  /* loadf.  */
1593     COSTS_N_INSNS (3),  /* loadd.  */
1594     COSTS_N_INSNS (1),  /* load_unaligned.  */
1595     COSTS_N_INSNS (2),  /* store.  */
1596     COSTS_N_INSNS (3),  /* strd.  */
1597     COSTS_N_INSNS (2),  /* stm_1st.  */
1598     1,                  /* stm_regs_per_insn_1st.  */
1599     1,                  /* stm_regs_per_insn_subsequent.  */
1600     COSTS_N_INSNS (2),  /* storef.  */
1601     COSTS_N_INSNS (3),  /* stored.  */
1602     COSTS_N_INSNS (1)  /* store_unaligned.  */
1603   },
1604   {
1605     /* FP SFmode */
1606     {
1607       COSTS_N_INSNS (7),        /* div.  */
1608       COSTS_N_INSNS (2),        /* mult.  */
1609       COSTS_N_INSNS (5),        /* mult_addsub.  */
1610       COSTS_N_INSNS (3),        /* fma.  */
1611       COSTS_N_INSNS (1),        /* addsub.  */
1612       0,                        /* fpconst.  */
1613       0,                        /* neg.  */
1614       0,                        /* compare.  */
1615       0,                        /* widen.  */
1616       0,                        /* narrow.  */
1617       0,                        /* toint.  */
1618       0,                        /* fromint.  */
1619       0                         /* roundint.  */
1620     },
1621     /* FP DFmode */
1622     {
1623       COSTS_N_INSNS (15),       /* div.  */
1624       COSTS_N_INSNS (5),        /* mult.  */
1625       COSTS_N_INSNS (7),        /* mult_addsub.  */
1626       COSTS_N_INSNS (7),        /* fma.  */
1627       COSTS_N_INSNS (3),        /* addsub.  */
1628       0,                        /* fpconst.  */
1629       0,                        /* neg.  */
1630       0,                        /* compare.  */
1631       0,                        /* widen.  */
1632       0,                        /* narrow.  */
1633       0,                        /* toint.  */
1634       0,                        /* fromint.  */
1635       0                         /* roundint.  */
1636     }
1637   },
1638   /* Vector */
1639   {
1640     COSTS_N_INSNS (1)   /* alu.  */
1641   }
1642 };
1643
1644 const struct tune_params arm_slowmul_tune =
1645 {
1646   arm_slowmul_rtx_costs,
1647   NULL,
1648   NULL,                                         /* Sched adj cost.  */
1649   3,                                            /* Constant limit.  */
1650   5,                                            /* Max cond insns.  */
1651   ARM_PREFETCH_NOT_BENEFICIAL,
1652   true,                                         /* Prefer constant pool.  */
1653   arm_default_branch_cost,
1654   false,                                        /* Prefer LDRD/STRD.  */
1655   {true, true},                                 /* Prefer non short circuit.  */
1656   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1657   false,                                        /* Prefer Neon for 64-bits bitops.  */
1658   false, false,                                 /* Prefer 32-bit encodings.  */
1659   false,                                        /* Prefer Neon for stringops.  */
1660   8                                             /* Maximum insns to inline memset.  */
1661 };
1662
1663 const struct tune_params arm_fastmul_tune =
1664 {
1665   arm_fastmul_rtx_costs,
1666   NULL,
1667   NULL,                                         /* Sched adj cost.  */
1668   1,                                            /* Constant limit.  */
1669   5,                                            /* Max cond insns.  */
1670   ARM_PREFETCH_NOT_BENEFICIAL,
1671   true,                                         /* Prefer constant pool.  */
1672   arm_default_branch_cost,
1673   false,                                        /* Prefer LDRD/STRD.  */
1674   {true, true},                                 /* Prefer non short circuit.  */
1675   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1676   false,                                        /* Prefer Neon for 64-bits bitops.  */
1677   false, false,                                 /* Prefer 32-bit encodings.  */
1678   false,                                        /* Prefer Neon for stringops.  */
1679   8                                             /* Maximum insns to inline memset.  */
1680 };
1681
1682 /* StrongARM has early execution of branches, so a sequence that is worth
1683    skipping is shorter.  Set max_insns_skipped to a lower value.  */
1684
1685 const struct tune_params arm_strongarm_tune =
1686 {
1687   arm_fastmul_rtx_costs,
1688   NULL,
1689   NULL,                                         /* Sched adj cost.  */
1690   1,                                            /* Constant limit.  */
1691   3,                                            /* Max cond insns.  */
1692   ARM_PREFETCH_NOT_BENEFICIAL,
1693   true,                                         /* Prefer constant pool.  */
1694   arm_default_branch_cost,
1695   false,                                        /* Prefer LDRD/STRD.  */
1696   {true, true},                                 /* Prefer non short circuit.  */
1697   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1698   false,                                        /* Prefer Neon for 64-bits bitops.  */
1699   false, false,                                 /* Prefer 32-bit encodings.  */
1700   false,                                        /* Prefer Neon for stringops.  */
1701   8                                             /* Maximum insns to inline memset.  */
1702 };
1703
1704 const struct tune_params arm_xscale_tune =
1705 {
1706   arm_xscale_rtx_costs,
1707   NULL,
1708   xscale_sched_adjust_cost,
1709   2,                                            /* Constant limit.  */
1710   3,                                            /* Max cond insns.  */
1711   ARM_PREFETCH_NOT_BENEFICIAL,
1712   true,                                         /* Prefer constant pool.  */
1713   arm_default_branch_cost,
1714   false,                                        /* Prefer LDRD/STRD.  */
1715   {true, true},                                 /* Prefer non short circuit.  */
1716   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1717   false,                                        /* Prefer Neon for 64-bits bitops.  */
1718   false, false,                                 /* Prefer 32-bit encodings.  */
1719   false,                                        /* Prefer Neon for stringops.  */
1720   8                                             /* Maximum insns to inline memset.  */
1721 };
1722
1723 const struct tune_params arm_9e_tune =
1724 {
1725   arm_9e_rtx_costs,
1726   NULL,
1727   NULL,                                         /* Sched adj cost.  */
1728   1,                                            /* Constant limit.  */
1729   5,                                            /* Max cond insns.  */
1730   ARM_PREFETCH_NOT_BENEFICIAL,
1731   true,                                         /* Prefer constant pool.  */
1732   arm_default_branch_cost,
1733   false,                                        /* Prefer LDRD/STRD.  */
1734   {true, true},                                 /* Prefer non short circuit.  */
1735   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1736   false,                                        /* Prefer Neon for 64-bits bitops.  */
1737   false, false,                                 /* Prefer 32-bit encodings.  */
1738   false,                                        /* Prefer Neon for stringops.  */
1739   8                                             /* Maximum insns to inline memset.  */
1740 };
1741
1742 const struct tune_params arm_v6t2_tune =
1743 {
1744   arm_9e_rtx_costs,
1745   NULL,
1746   NULL,                                         /* Sched adj cost.  */
1747   1,                                            /* Constant limit.  */
1748   5,                                            /* Max cond insns.  */
1749   ARM_PREFETCH_NOT_BENEFICIAL,
1750   false,                                        /* Prefer constant pool.  */
1751   arm_default_branch_cost,
1752   false,                                        /* Prefer LDRD/STRD.  */
1753   {true, true},                                 /* Prefer non short circuit.  */
1754   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1755   false,                                        /* Prefer Neon for 64-bits bitops.  */
1756   false, false,                                 /* Prefer 32-bit encodings.  */
1757   false,                                        /* Prefer Neon for stringops.  */
1758   8                                             /* Maximum insns to inline memset.  */
1759 };
1760
1761 /* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
1762 const struct tune_params arm_cortex_tune =
1763 {
1764   arm_9e_rtx_costs,
1765   &generic_extra_costs,
1766   NULL,                                         /* Sched adj cost.  */
1767   1,                                            /* Constant limit.  */
1768   5,                                            /* Max cond insns.  */
1769   ARM_PREFETCH_NOT_BENEFICIAL,
1770   false,                                        /* Prefer constant pool.  */
1771   arm_default_branch_cost,
1772   false,                                        /* Prefer LDRD/STRD.  */
1773   {true, true},                                 /* Prefer non short circuit.  */
1774   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1775   false,                                        /* Prefer Neon for 64-bits bitops.  */
1776   false, false,                                 /* Prefer 32-bit encodings.  */
1777   false,                                        /* Prefer Neon for stringops.  */
1778   8                                             /* Maximum insns to inline memset.  */
1779 };
1780
1781 const struct tune_params arm_cortex_a8_tune =
1782 {
1783   arm_9e_rtx_costs,
1784   &cortexa8_extra_costs,
1785   NULL,                                         /* Sched adj cost.  */
1786   1,                                            /* Constant limit.  */
1787   5,                                            /* Max cond insns.  */
1788   ARM_PREFETCH_NOT_BENEFICIAL,
1789   false,                                        /* Prefer constant pool.  */
1790   arm_default_branch_cost,
1791   false,                                        /* Prefer LDRD/STRD.  */
1792   {true, true},                                 /* Prefer non short circuit.  */
1793   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1794   false,                                        /* Prefer Neon for 64-bits bitops.  */
1795   false, false,                                 /* Prefer 32-bit encodings.  */
1796   true,                                         /* Prefer Neon for stringops.  */
1797   8                                             /* Maximum insns to inline memset.  */
1798 };
1799
1800 const struct tune_params arm_cortex_a7_tune =
1801 {
1802   arm_9e_rtx_costs,
1803   &cortexa7_extra_costs,
1804   NULL,
1805   1,                                            /* Constant limit.  */
1806   5,                                            /* Max cond insns.  */
1807   ARM_PREFETCH_NOT_BENEFICIAL,
1808   false,                                        /* Prefer constant pool.  */
1809   arm_default_branch_cost,
1810   false,                                        /* Prefer LDRD/STRD.  */
1811   {true, true},                                 /* Prefer non short circuit.  */
1812   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1813   false,                                        /* Prefer Neon for 64-bits bitops.  */
1814   false, false,                                 /* Prefer 32-bit encodings.  */
1815   true,                                         /* Prefer Neon for stringops.  */
1816   8                                             /* Maximum insns to inline memset.  */
1817 };
1818
1819 const struct tune_params arm_cortex_a15_tune =
1820 {
1821   arm_9e_rtx_costs,
1822   &cortexa15_extra_costs,
1823   NULL,                                         /* Sched adj cost.  */
1824   1,                                            /* Constant limit.  */
1825   2,                                            /* Max cond insns.  */
1826   ARM_PREFETCH_NOT_BENEFICIAL,
1827   false,                                        /* Prefer constant pool.  */
1828   arm_default_branch_cost,
1829   true,                                         /* Prefer LDRD/STRD.  */
1830   {true, true},                                 /* Prefer non short circuit.  */
1831   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1832   false,                                        /* Prefer Neon for 64-bits bitops.  */
1833   true, true,                                   /* Prefer 32-bit encodings.  */
1834   true,                                         /* Prefer Neon for stringops.  */
1835   8                                             /* Maximum insns to inline memset.  */
1836 };
1837
1838 const struct tune_params arm_cortex_a53_tune =
1839 {
1840   arm_9e_rtx_costs,
1841   &cortexa53_extra_costs,
1842   NULL,                                         /* Scheduler cost adjustment.  */
1843   1,                                            /* Constant limit.  */
1844   5,                                            /* Max cond insns.  */
1845   ARM_PREFETCH_NOT_BENEFICIAL,
1846   false,                                        /* Prefer constant pool.  */
1847   arm_default_branch_cost,
1848   false,                                        /* Prefer LDRD/STRD.  */
1849   {true, true},                                 /* Prefer non short circuit.  */
1850   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1851   false,                                        /* Prefer Neon for 64-bits bitops.  */
1852   false, false,                                 /* Prefer 32-bit encodings.  */
1853   false,                                        /* Prefer Neon for stringops.  */
1854   8                                             /* Maximum insns to inline memset.  */
1855 };
1856
1857 const struct tune_params arm_cortex_a57_tune =
1858 {
1859   arm_9e_rtx_costs,
1860   &cortexa57_extra_costs,
1861   NULL,                                         /* Scheduler cost adjustment.  */
1862   1,                                           /* Constant limit.  */
1863   2,                                           /* Max cond insns.  */
1864   ARM_PREFETCH_NOT_BENEFICIAL,
1865   false,                                       /* Prefer constant pool.  */
1866   arm_default_branch_cost,
1867   true,                                       /* Prefer LDRD/STRD.  */
1868   {true, true},                                /* Prefer non short circuit.  */
1869   &arm_default_vec_cost,                       /* Vectorizer costs.  */
1870   false,                                       /* Prefer Neon for 64-bits bitops.  */
1871   true, true,                                  /* Prefer 32-bit encodings.  */
1872   false,                                        /* Prefer Neon for stringops.  */
1873   8                                             /* Maximum insns to inline memset.  */
1874 };
1875
1876 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
1877    less appealing.  Set max_insns_skipped to a low value.  */
1878
1879 const struct tune_params arm_cortex_a5_tune =
1880 {
1881   arm_9e_rtx_costs,
1882   &cortexa5_extra_costs,
1883   NULL,                                         /* Sched adj cost.  */
1884   1,                                            /* Constant limit.  */
1885   1,                                            /* Max cond insns.  */
1886   ARM_PREFETCH_NOT_BENEFICIAL,
1887   false,                                        /* Prefer constant pool.  */
1888   arm_cortex_a5_branch_cost,
1889   false,                                        /* Prefer LDRD/STRD.  */
1890   {false, false},                               /* Prefer non short circuit.  */
1891   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1892   false,                                        /* Prefer Neon for 64-bits bitops.  */
1893   false, false,                                 /* Prefer 32-bit encodings.  */
1894   true,                                         /* Prefer Neon for stringops.  */
1895   8                                             /* Maximum insns to inline memset.  */
1896 };
1897
1898 const struct tune_params arm_cortex_a9_tune =
1899 {
1900   arm_9e_rtx_costs,
1901   &cortexa9_extra_costs,
1902   cortex_a9_sched_adjust_cost,
1903   1,                                            /* Constant limit.  */
1904   5,                                            /* Max cond insns.  */
1905   ARM_PREFETCH_BENEFICIAL(4,32,32),
1906   false,                                        /* Prefer constant pool.  */
1907   arm_default_branch_cost,
1908   false,                                        /* Prefer LDRD/STRD.  */
1909   {true, true},                                 /* Prefer non short circuit.  */
1910   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1911   false,                                        /* Prefer Neon for 64-bits bitops.  */
1912   false, false,                                 /* Prefer 32-bit encodings.  */
1913   false,                                        /* Prefer Neon for stringops.  */
1914   8                                             /* Maximum insns to inline memset.  */
1915 };
1916
1917 const struct tune_params arm_cortex_a12_tune =
1918 {
1919   arm_9e_rtx_costs,
1920   &cortexa12_extra_costs,
1921   NULL,
1922   1,                                            /* Constant limit.  */
1923   5,                                            /* Max cond insns.  */
1924   ARM_PREFETCH_BENEFICIAL(4,32,32),
1925   false,                                        /* Prefer constant pool.  */
1926   arm_default_branch_cost,
1927   true,                                         /* Prefer LDRD/STRD.  */
1928   {true, true},                                 /* Prefer non short circuit.  */
1929   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1930   false,                                        /* Prefer Neon for 64-bits bitops.  */
1931   false, false,                                 /* Prefer 32-bit encodings.  */
1932   true,                                         /* Prefer Neon for stringops.  */
1933   8                                             /* Maximum insns to inline memset.  */
1934 };
1935
1936 /* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
1937    cycle to execute each.  An LDR from the constant pool also takes two cycles
1938    to execute, but mildly increases pipelining opportunity (consecutive
1939    loads/stores can be pipelined together, saving one cycle), and may also
1940    improve icache utilisation.  Hence we prefer the constant pool for such
1941    processors.  */
1942
1943 const struct tune_params arm_v7m_tune =
1944 {
1945   arm_9e_rtx_costs,
1946   &v7m_extra_costs,
1947   NULL,                                         /* Sched adj cost.  */
1948   1,                                            /* Constant limit.  */
1949   2,                                            /* Max cond insns.  */
1950   ARM_PREFETCH_NOT_BENEFICIAL,
1951   true,                                         /* Prefer constant pool.  */
1952   arm_cortex_m_branch_cost,
1953   false,                                        /* Prefer LDRD/STRD.  */
1954   {false, false},                               /* Prefer non short circuit.  */
1955   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1956   false,                                        /* Prefer Neon for 64-bits bitops.  */
1957   false, false,                                 /* Prefer 32-bit encodings.  */
1958   false,                                        /* Prefer Neon for stringops.  */
1959   8                                             /* Maximum insns to inline memset.  */
1960 };
1961
1962 /* Cortex-M7 tuning.  */
1963
1964 const struct tune_params arm_cortex_m7_tune =
1965 {
1966   arm_9e_rtx_costs,
1967   &v7m_extra_costs,
1968   NULL,                                         /* Sched adj cost.  */
1969   0,                                            /* Constant limit.  */
1970   0,                                            /* Max cond insns.  */
1971   ARM_PREFETCH_NOT_BENEFICIAL,
1972   true,                                         /* Prefer constant pool.  */
1973   arm_cortex_m_branch_cost,
1974   false,                                        /* Prefer LDRD/STRD.  */
1975   {true, true},                                 /* Prefer non short circuit.  */
1976   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1977   false,                                        /* Prefer Neon for 64-bits bitops.  */
1978   false, false,                                 /* Prefer 32-bit encodings.  */
1979   false,                                        /* Prefer Neon for stringops.  */
1980   8                                             /* Maximum insns to inline memset.  */
1981 };
1982
1983 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
1984    arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus.  */
1985 const struct tune_params arm_v6m_tune =
1986 {
1987   arm_9e_rtx_costs,
1988   NULL,
1989   NULL,                                         /* Sched adj cost.  */
1990   1,                                            /* Constant limit.  */
1991   5,                                            /* Max cond insns.  */
1992   ARM_PREFETCH_NOT_BENEFICIAL,
1993   false,                                        /* Prefer constant pool.  */
1994   arm_default_branch_cost,
1995   false,                                        /* Prefer LDRD/STRD.  */
1996   {false, false},                               /* Prefer non short circuit.  */
1997   &arm_default_vec_cost,                        /* Vectorizer costs.  */
1998   false,                                        /* Prefer Neon for 64-bits bitops.  */
1999   false, false,                                 /* Prefer 32-bit encodings.  */
2000   false,                                        /* Prefer Neon for stringops.  */
2001   8                                             /* Maximum insns to inline memset.  */
2002 };
2003
2004 const struct tune_params arm_fa726te_tune =
2005 {
2006   arm_9e_rtx_costs,
2007   NULL,
2008   fa726te_sched_adjust_cost,
2009   1,                                            /* Constant limit.  */
2010   5,                                            /* Max cond insns.  */
2011   ARM_PREFETCH_NOT_BENEFICIAL,
2012   true,                                         /* Prefer constant pool.  */
2013   arm_default_branch_cost,
2014   false,                                        /* Prefer LDRD/STRD.  */
2015   {true, true},                                 /* Prefer non short circuit.  */
2016   &arm_default_vec_cost,                        /* Vectorizer costs.  */
2017   false,                                        /* Prefer Neon for 64-bits bitops.  */
2018   false, false,                                 /* Prefer 32-bit encodings.  */
2019   false,                                        /* Prefer Neon for stringops.  */
2020   8                                             /* Maximum insns to inline memset.  */
2021 };
2022
2023
2024 /* Not all of these give usefully different compilation alternatives,
2025    but there is no simple way of generalizing them.  */
2026 static const struct processors all_cores[] =
2027 {
2028   /* ARM Cores */
2029 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2030   {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH,          \
2031     FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2032 #include "arm-cores.def"
2033 #undef ARM_CORE
2034   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2035 };
2036
2037 static const struct processors all_architectures[] =
2038 {
2039   /* ARM Architectures */
2040   /* We don't specify tuning costs here as it will be figured out
2041      from the core.  */
2042
2043 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2044   {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2045 #include "arm-arches.def"
2046 #undef ARM_ARCH
2047   {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2048 };
2049
2050
2051 /* These are populated as commandline arguments are processed, or NULL
2052    if not specified.  */
2053 static const struct processors *arm_selected_arch;
2054 static const struct processors *arm_selected_cpu;
2055 static const struct processors *arm_selected_tune;
2056
2057 /* The name of the preprocessor macro to define for this architecture.  */
2058
2059 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2060
2061 /* Available values for -mfpu=.  */
2062
2063 static const struct arm_fpu_desc all_fpus[] =
2064 {
2065 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2066   { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2067 #include "arm-fpus.def"
2068 #undef ARM_FPU
2069 };
2070
2071
2072 /* Supported TLS relocations.  */
2073
2074 enum tls_reloc {
2075   TLS_GD32,
2076   TLS_LDM32,
2077   TLS_LDO32,
2078   TLS_IE32,
2079   TLS_LE32,
2080   TLS_DESCSEQ   /* GNU scheme */
2081 };
2082
2083 /* The maximum number of insns to be used when loading a constant.  */
2084 inline static int
2085 arm_constant_limit (bool size_p)
2086 {
2087   return size_p ? 1 : current_tune->constant_limit;
2088 }
2089
2090 /* Emit an insn that's a simple single-set.  Both the operands must be known
2091    to be valid.  */
2092 inline static rtx_insn *
2093 emit_set_insn (rtx x, rtx y)
2094 {
2095   return emit_insn (gen_rtx_SET (VOIDmode, x, y));
2096 }
2097
2098 /* Return the number of bits set in VALUE.  */
2099 static unsigned
2100 bit_count (unsigned long value)
2101 {
2102   unsigned long count = 0;
2103
2104   while (value)
2105     {
2106       count++;
2107       value &= value - 1;  /* Clear the least-significant set bit.  */
2108     }
2109
2110   return count;
2111 }
2112
2113 typedef struct
2114 {
2115   machine_mode mode;
2116   const char *name;
2117 } arm_fixed_mode_set;
2118
2119 /* A small helper for setting fixed-point library libfuncs.  */
2120
2121 static void
2122 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2123                              const char *funcname, const char *modename,
2124                              int num_suffix)
2125 {
2126   char buffer[50];
2127
2128   if (num_suffix == 0)
2129     sprintf (buffer, "__gnu_%s%s", funcname, modename);
2130   else
2131     sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2132
2133   set_optab_libfunc (optable, mode, buffer);
2134 }
2135
2136 static void
2137 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2138                             machine_mode from, const char *funcname,
2139                             const char *toname, const char *fromname)
2140 {
2141   char buffer[50];
2142   const char *maybe_suffix_2 = "";
2143
2144   /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
2145   if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2146       && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2147       && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2148     maybe_suffix_2 = "2";
2149
2150   sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2151            maybe_suffix_2);
2152
2153   set_conv_libfunc (optable, to, from, buffer);
2154 }
2155
2156 /* Set up library functions unique to ARM.  */
2157
2158 static void
2159 arm_init_libfuncs (void)
2160 {
2161   /* For Linux, we have access to kernel support for atomic operations.  */
2162   if (arm_abi == ARM_ABI_AAPCS_LINUX)
2163     init_sync_libfuncs (2 * UNITS_PER_WORD);
2164
2165   /* There are no special library functions unless we are using the
2166      ARM BPABI.  */
2167   if (!TARGET_BPABI)
2168     return;
2169
2170   /* The functions below are described in Section 4 of the "Run-Time
2171      ABI for the ARM architecture", Version 1.0.  */
2172
2173   /* Double-precision floating-point arithmetic.  Table 2.  */
2174   set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2175   set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2176   set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2177   set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2178   set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2179
2180   /* Double-precision comparisons.  Table 3.  */
2181   set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2182   set_optab_libfunc (ne_optab, DFmode, NULL);
2183   set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2184   set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2185   set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2186   set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2187   set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2188
2189   /* Single-precision floating-point arithmetic.  Table 4.  */
2190   set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2191   set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2192   set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2193   set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2194   set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2195
2196   /* Single-precision comparisons.  Table 5.  */
2197   set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2198   set_optab_libfunc (ne_optab, SFmode, NULL);
2199   set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2200   set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2201   set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2202   set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2203   set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2204
2205   /* Floating-point to integer conversions.  Table 6.  */
2206   set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2207   set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2208   set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2209   set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2210   set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2211   set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2212   set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2213   set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2214
2215   /* Conversions between floating types.  Table 7.  */
2216   set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2217   set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2218
2219   /* Integer to floating-point conversions.  Table 8.  */
2220   set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2221   set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2222   set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2223   set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2224   set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2225   set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2226   set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2227   set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2228
2229   /* Long long.  Table 9.  */
2230   set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2231   set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2232   set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2233   set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2234   set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2235   set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2236   set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2237   set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2238
2239   /* Integer (32/32->32) division.  \S 4.3.1.  */
2240   set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2241   set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2242
2243   /* The divmod functions are designed so that they can be used for
2244      plain division, even though they return both the quotient and the
2245      remainder.  The quotient is returned in the usual location (i.e.,
2246      r0 for SImode, {r0, r1} for DImode), just as would be expected
2247      for an ordinary division routine.  Because the AAPCS calling
2248      conventions specify that all of { r0, r1, r2, r3 } are
2249      callee-saved registers, there is no need to tell the compiler
2250      explicitly that those registers are clobbered by these
2251      routines.  */
2252   set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2253   set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2254
2255   /* For SImode division the ABI provides div-without-mod routines,
2256      which are faster.  */
2257   set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2258   set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2259
2260   /* We don't have mod libcalls.  Fortunately gcc knows how to use the
2261      divmod libcalls instead.  */
2262   set_optab_libfunc (smod_optab, DImode, NULL);
2263   set_optab_libfunc (umod_optab, DImode, NULL);
2264   set_optab_libfunc (smod_optab, SImode, NULL);
2265   set_optab_libfunc (umod_optab, SImode, NULL);
2266
2267   /* Half-precision float operations.  The compiler handles all operations
2268      with NULL libfuncs by converting the SFmode.  */
2269   switch (arm_fp16_format)
2270     {
2271     case ARM_FP16_FORMAT_IEEE:
2272     case ARM_FP16_FORMAT_ALTERNATIVE:
2273
2274       /* Conversions.  */
2275       set_conv_libfunc (trunc_optab, HFmode, SFmode,
2276                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2277                          ? "__gnu_f2h_ieee"
2278                          : "__gnu_f2h_alternative"));
2279       set_conv_libfunc (sext_optab, SFmode, HFmode,
2280                         (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2281                          ? "__gnu_h2f_ieee"
2282                          : "__gnu_h2f_alternative"));
2283
2284       /* Arithmetic.  */
2285       set_optab_libfunc (add_optab, HFmode, NULL);
2286       set_optab_libfunc (sdiv_optab, HFmode, NULL);
2287       set_optab_libfunc (smul_optab, HFmode, NULL);
2288       set_optab_libfunc (neg_optab, HFmode, NULL);
2289       set_optab_libfunc (sub_optab, HFmode, NULL);
2290
2291       /* Comparisons.  */
2292       set_optab_libfunc (eq_optab, HFmode, NULL);
2293       set_optab_libfunc (ne_optab, HFmode, NULL);
2294       set_optab_libfunc (lt_optab, HFmode, NULL);
2295       set_optab_libfunc (le_optab, HFmode, NULL);
2296       set_optab_libfunc (ge_optab, HFmode, NULL);
2297       set_optab_libfunc (gt_optab, HFmode, NULL);
2298       set_optab_libfunc (unord_optab, HFmode, NULL);
2299       break;
2300
2301     default:
2302       break;
2303     }
2304
2305   /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
2306   {
2307     const arm_fixed_mode_set fixed_arith_modes[] =
2308       {
2309         { QQmode, "qq" },
2310         { UQQmode, "uqq" },
2311         { HQmode, "hq" },
2312         { UHQmode, "uhq" },
2313         { SQmode, "sq" },
2314         { USQmode, "usq" },
2315         { DQmode, "dq" },
2316         { UDQmode, "udq" },
2317         { TQmode, "tq" },
2318         { UTQmode, "utq" },
2319         { HAmode, "ha" },
2320         { UHAmode, "uha" },
2321         { SAmode, "sa" },
2322         { USAmode, "usa" },
2323         { DAmode, "da" },
2324         { UDAmode, "uda" },
2325         { TAmode, "ta" },
2326         { UTAmode, "uta" }
2327       };
2328     const arm_fixed_mode_set fixed_conv_modes[] =
2329       {
2330         { QQmode, "qq" },
2331         { UQQmode, "uqq" },
2332         { HQmode, "hq" },
2333         { UHQmode, "uhq" },
2334         { SQmode, "sq" },
2335         { USQmode, "usq" },
2336         { DQmode, "dq" },
2337         { UDQmode, "udq" },
2338         { TQmode, "tq" },
2339         { UTQmode, "utq" },
2340         { HAmode, "ha" },
2341         { UHAmode, "uha" },
2342         { SAmode, "sa" },
2343         { USAmode, "usa" },
2344         { DAmode, "da" },
2345         { UDAmode, "uda" },
2346         { TAmode, "ta" },
2347         { UTAmode, "uta" },
2348         { QImode, "qi" },
2349         { HImode, "hi" },
2350         { SImode, "si" },
2351         { DImode, "di" },
2352         { TImode, "ti" },
2353         { SFmode, "sf" },
2354         { DFmode, "df" }
2355       };
2356     unsigned int i, j;
2357
2358     for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2359       {
2360         arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2361                                      "add", fixed_arith_modes[i].name, 3);
2362         arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2363                                      "ssadd", fixed_arith_modes[i].name, 3);
2364         arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2365                                      "usadd", fixed_arith_modes[i].name, 3);
2366         arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2367                                      "sub", fixed_arith_modes[i].name, 3);
2368         arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2369                                      "sssub", fixed_arith_modes[i].name, 3);
2370         arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2371                                      "ussub", fixed_arith_modes[i].name, 3);
2372         arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2373                                      "mul", fixed_arith_modes[i].name, 3);
2374         arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2375                                      "ssmul", fixed_arith_modes[i].name, 3);
2376         arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2377                                      "usmul", fixed_arith_modes[i].name, 3);
2378         arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2379                                      "div", fixed_arith_modes[i].name, 3);
2380         arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2381                                      "udiv", fixed_arith_modes[i].name, 3);
2382         arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2383                                      "ssdiv", fixed_arith_modes[i].name, 3);
2384         arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2385                                      "usdiv", fixed_arith_modes[i].name, 3);
2386         arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2387                                      "neg", fixed_arith_modes[i].name, 2);
2388         arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2389                                      "ssneg", fixed_arith_modes[i].name, 2);
2390         arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2391                                      "usneg", fixed_arith_modes[i].name, 2);
2392         arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2393                                      "ashl", fixed_arith_modes[i].name, 3);
2394         arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2395                                      "ashr", fixed_arith_modes[i].name, 3);
2396         arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2397                                      "lshr", fixed_arith_modes[i].name, 3);
2398         arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2399                                      "ssashl", fixed_arith_modes[i].name, 3);
2400         arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2401                                      "usashl", fixed_arith_modes[i].name, 3);
2402         arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2403                                      "cmp", fixed_arith_modes[i].name, 2);
2404       }
2405
2406     for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2407       for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2408         {
2409           if (i == j
2410               || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2411                   && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2412             continue;
2413
2414           arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2415                                       fixed_conv_modes[j].mode, "fract",
2416                                       fixed_conv_modes[i].name,
2417                                       fixed_conv_modes[j].name);
2418           arm_set_fixed_conv_libfunc (satfract_optab,
2419                                       fixed_conv_modes[i].mode,
2420                                       fixed_conv_modes[j].mode, "satfract",
2421                                       fixed_conv_modes[i].name,
2422                                       fixed_conv_modes[j].name);
2423           arm_set_fixed_conv_libfunc (fractuns_optab,
2424                                       fixed_conv_modes[i].mode,
2425                                       fixed_conv_modes[j].mode, "fractuns",
2426                                       fixed_conv_modes[i].name,
2427                                       fixed_conv_modes[j].name);
2428           arm_set_fixed_conv_libfunc (satfractuns_optab,
2429                                       fixed_conv_modes[i].mode,
2430                                       fixed_conv_modes[j].mode, "satfractuns",
2431                                       fixed_conv_modes[i].name,
2432                                       fixed_conv_modes[j].name);
2433         }
2434   }
2435
2436   if (TARGET_AAPCS_BASED)
2437     synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2438 }
2439
2440 /* On AAPCS systems, this is the "struct __va_list".  */
2441 static GTY(()) tree va_list_type;
2442
2443 /* Return the type to use as __builtin_va_list.  */
2444 static tree
2445 arm_build_builtin_va_list (void)
2446 {
2447   tree va_list_name;
2448   tree ap_field;
2449
2450   if (!TARGET_AAPCS_BASED)
2451     return std_build_builtin_va_list ();
2452
2453   /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2454      defined as:
2455
2456        struct __va_list
2457        {
2458          void *__ap;
2459        };
2460
2461      The C Library ABI further reinforces this definition in \S
2462      4.1.
2463
2464      We must follow this definition exactly.  The structure tag
2465      name is visible in C++ mangled names, and thus forms a part
2466      of the ABI.  The field name may be used by people who
2467      #include <stdarg.h>.  */
2468   /* Create the type.  */
2469   va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2470   /* Give it the required name.  */
2471   va_list_name = build_decl (BUILTINS_LOCATION,
2472                              TYPE_DECL,
2473                              get_identifier ("__va_list"),
2474                              va_list_type);
2475   DECL_ARTIFICIAL (va_list_name) = 1;
2476   TYPE_NAME (va_list_type) = va_list_name;
2477   TYPE_STUB_DECL (va_list_type) = va_list_name;
2478   /* Create the __ap field.  */
2479   ap_field = build_decl (BUILTINS_LOCATION,
2480                          FIELD_DECL,
2481                          get_identifier ("__ap"),
2482                          ptr_type_node);
2483   DECL_ARTIFICIAL (ap_field) = 1;
2484   DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2485   TYPE_FIELDS (va_list_type) = ap_field;
2486   /* Compute its layout.  */
2487   layout_type (va_list_type);
2488
2489   return va_list_type;
2490 }
2491
2492 /* Return an expression of type "void *" pointing to the next
2493    available argument in a variable-argument list.  VALIST is the
2494    user-level va_list object, of type __builtin_va_list.  */
2495 static tree
2496 arm_extract_valist_ptr (tree valist)
2497 {
2498   if (TREE_TYPE (valist) == error_mark_node)
2499     return error_mark_node;
2500
2501   /* On an AAPCS target, the pointer is stored within "struct
2502      va_list".  */
2503   if (TARGET_AAPCS_BASED)
2504     {
2505       tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2506       valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2507                        valist, ap_field, NULL_TREE);
2508     }
2509
2510   return valist;
2511 }
2512
2513 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2514 static void
2515 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2516 {
2517   valist = arm_extract_valist_ptr (valist);
2518   std_expand_builtin_va_start (valist, nextarg);
2519 }
2520
2521 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
2522 static tree
2523 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2524                           gimple_seq *post_p)
2525 {
2526   valist = arm_extract_valist_ptr (valist);
2527   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2528 }
2529
2530 /* Fix up any incompatible options that the user has specified.  */
2531 static void
2532 arm_option_override (void)
2533 {
2534   if (global_options_set.x_arm_arch_option)
2535     arm_selected_arch = &all_architectures[arm_arch_option];
2536
2537   if (global_options_set.x_arm_cpu_option)
2538     {
2539       arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2540       arm_selected_tune = &all_cores[(int) arm_cpu_option];
2541     }
2542
2543   if (global_options_set.x_arm_tune_option)
2544     arm_selected_tune = &all_cores[(int) arm_tune_option];
2545
2546 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2547   SUBTARGET_OVERRIDE_OPTIONS;
2548 #endif
2549
2550   if (arm_selected_arch)
2551     {
2552       if (arm_selected_cpu)
2553         {
2554           /* Check for conflict between mcpu and march.  */
2555           if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2556             {
2557               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2558                        arm_selected_cpu->name, arm_selected_arch->name);
2559               /* -march wins for code generation.
2560                  -mcpu wins for default tuning.  */
2561               if (!arm_selected_tune)
2562                 arm_selected_tune = arm_selected_cpu;
2563
2564               arm_selected_cpu = arm_selected_arch;
2565             }
2566           else
2567             /* -mcpu wins.  */
2568             arm_selected_arch = NULL;
2569         }
2570       else
2571         /* Pick a CPU based on the architecture.  */
2572         arm_selected_cpu = arm_selected_arch;
2573     }
2574
2575   /* If the user did not specify a processor, choose one for them.  */
2576   if (!arm_selected_cpu)
2577     {
2578       const struct processors * sel;
2579       unsigned int        sought;
2580
2581       arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2582       if (!arm_selected_cpu->name)
2583         {
2584 #ifdef SUBTARGET_CPU_DEFAULT
2585           /* Use the subtarget default CPU if none was specified by
2586              configure.  */
2587           arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2588 #endif
2589           /* Default to ARM6.  */
2590           if (!arm_selected_cpu->name)
2591             arm_selected_cpu = &all_cores[arm6];
2592         }
2593
2594       sel = arm_selected_cpu;
2595       insn_flags = sel->flags;
2596
2597       /* Now check to see if the user has specified some command line
2598          switch that require certain abilities from the cpu.  */
2599       sought = 0;
2600
2601       if (TARGET_INTERWORK || TARGET_THUMB)
2602         {
2603           sought |= (FL_THUMB | FL_MODE32);
2604
2605           /* There are no ARM processors that support both APCS-26 and
2606              interworking.  Therefore we force FL_MODE26 to be removed
2607              from insn_flags here (if it was set), so that the search
2608              below will always be able to find a compatible processor.  */
2609           insn_flags &= ~FL_MODE26;
2610         }
2611
2612       if (sought != 0 && ((sought & insn_flags) != sought))
2613         {
2614           /* Try to locate a CPU type that supports all of the abilities
2615              of the default CPU, plus the extra abilities requested by
2616              the user.  */
2617           for (sel = all_cores; sel->name != NULL; sel++)
2618             if ((sel->flags & sought) == (sought | insn_flags))
2619               break;
2620
2621           if (sel->name == NULL)
2622             {
2623               unsigned current_bit_count = 0;
2624               const struct processors * best_fit = NULL;
2625
2626               /* Ideally we would like to issue an error message here
2627                  saying that it was not possible to find a CPU compatible
2628                  with the default CPU, but which also supports the command
2629                  line options specified by the programmer, and so they
2630                  ought to use the -mcpu=<name> command line option to
2631                  override the default CPU type.
2632
2633                  If we cannot find a cpu that has both the
2634                  characteristics of the default cpu and the given
2635                  command line options we scan the array again looking
2636                  for a best match.  */
2637               for (sel = all_cores; sel->name != NULL; sel++)
2638                 if ((sel->flags & sought) == sought)
2639                   {
2640                     unsigned count;
2641
2642                     count = bit_count (sel->flags & insn_flags);
2643
2644                     if (count >= current_bit_count)
2645                       {
2646                         best_fit = sel;
2647                         current_bit_count = count;
2648                       }
2649                   }
2650
2651               gcc_assert (best_fit);
2652               sel = best_fit;
2653             }
2654
2655           arm_selected_cpu = sel;
2656         }
2657     }
2658
2659   gcc_assert (arm_selected_cpu);
2660   /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
2661   if (!arm_selected_tune)
2662     arm_selected_tune = &all_cores[arm_selected_cpu->core];
2663
2664   sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2665   insn_flags = arm_selected_cpu->flags;
2666   arm_base_arch = arm_selected_cpu->base_arch;
2667
2668   arm_tune = arm_selected_tune->core;
2669   tune_flags = arm_selected_tune->flags;
2670   current_tune = arm_selected_tune->tune;
2671
2672   /* Make sure that the processor choice does not conflict with any of the
2673      other command line choices.  */
2674   if (TARGET_ARM && !(insn_flags & FL_NOTM))
2675     error ("target CPU does not support ARM mode");
2676
2677   /* BPABI targets use linker tricks to allow interworking on cores
2678      without thumb support.  */
2679   if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2680     {
2681       warning (0, "target CPU does not support interworking" );
2682       target_flags &= ~MASK_INTERWORK;
2683     }
2684
2685   if (TARGET_THUMB && !(insn_flags & FL_THUMB))
2686     {
2687       warning (0, "target CPU does not support THUMB instructions");
2688       target_flags &= ~MASK_THUMB;
2689     }
2690
2691   if (TARGET_APCS_FRAME && TARGET_THUMB)
2692     {
2693       /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2694       target_flags &= ~MASK_APCS_FRAME;
2695     }
2696
2697   /* Callee super interworking implies thumb interworking.  Adding
2698      this to the flags here simplifies the logic elsewhere.  */
2699   if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
2700     target_flags |= MASK_INTERWORK;
2701
2702   /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2703      from here where no function is being compiled currently.  */
2704   if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
2705     warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2706
2707   if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
2708     warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2709
2710   if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
2711     {
2712       warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
2713       target_flags |= MASK_APCS_FRAME;
2714     }
2715
2716   if (TARGET_POKE_FUNCTION_NAME)
2717     target_flags |= MASK_APCS_FRAME;
2718
2719   if (TARGET_APCS_REENT && flag_pic)
2720     error ("-fpic and -mapcs-reent are incompatible");
2721
2722   if (TARGET_APCS_REENT)
2723     warning (0, "APCS reentrant code not supported.  Ignored");
2724
2725   /* If this target is normally configured to use APCS frames, warn if they
2726      are turned off and debugging is turned on.  */
2727   if (TARGET_ARM
2728       && write_symbols != NO_DEBUG
2729       && !TARGET_APCS_FRAME
2730       && (TARGET_DEFAULT & MASK_APCS_FRAME))
2731     warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2732
2733   if (TARGET_APCS_FLOAT)
2734     warning (0, "passing floating point arguments in fp regs not yet supported");
2735
2736   /* Initialize boolean versions of the flags, for use in the arm.md file.  */
2737   arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
2738   arm_arch4 = (insn_flags & FL_ARCH4) != 0;
2739   arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
2740   arm_arch5 = (insn_flags & FL_ARCH5) != 0;
2741   arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
2742   arm_arch6 = (insn_flags & FL_ARCH6) != 0;
2743   arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
2744   arm_arch_notm = (insn_flags & FL_NOTM) != 0;
2745   arm_arch6m = arm_arch6 && !arm_arch_notm;
2746   arm_arch7 = (insn_flags & FL_ARCH7) != 0;
2747   arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
2748   arm_arch8 = (insn_flags & FL_ARCH8) != 0;
2749   arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
2750   arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
2751
2752   arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
2753   arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
2754   thumb_code = TARGET_ARM == 0;
2755   thumb1_code = TARGET_THUMB1 != 0;
2756   arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
2757   arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
2758   arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
2759   arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
2760   arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
2761   arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
2762   arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
2763   arm_arch_crc = (insn_flags & FL_CRC32) != 0;
2764   arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
2765   if (arm_restrict_it == 2)
2766     arm_restrict_it = arm_arch8 && TARGET_THUMB2;
2767
2768   if (!TARGET_THUMB2)
2769     arm_restrict_it = 0;
2770
2771   /* If we are not using the default (ARM mode) section anchor offset
2772      ranges, then set the correct ranges now.  */
2773   if (TARGET_THUMB1)
2774     {
2775       /* Thumb-1 LDR instructions cannot have negative offsets.
2776          Permissible positive offset ranges are 5-bit (for byte loads),
2777          6-bit (for halfword loads), or 7-bit (for word loads).
2778          Empirical results suggest a 7-bit anchor range gives the best
2779          overall code size.  */
2780       targetm.min_anchor_offset = 0;
2781       targetm.max_anchor_offset = 127;
2782     }
2783   else if (TARGET_THUMB2)
2784     {
2785       /* The minimum is set such that the total size of the block
2786          for a particular anchor is 248 + 1 + 4095 bytes, which is
2787          divisible by eight, ensuring natural spacing of anchors.  */
2788       targetm.min_anchor_offset = -248;
2789       targetm.max_anchor_offset = 4095;
2790     }
2791
2792   /* V5 code we generate is completely interworking capable, so we turn off
2793      TARGET_INTERWORK here to avoid many tests later on.  */
2794
2795   /* XXX However, we must pass the right pre-processor defines to CPP
2796      or GLD can get confused.  This is a hack.  */
2797   if (TARGET_INTERWORK)
2798     arm_cpp_interwork = 1;
2799
2800   if (arm_arch5)
2801     target_flags &= ~MASK_INTERWORK;
2802
2803   if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
2804     error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
2805
2806   if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
2807     error ("iwmmxt abi requires an iwmmxt capable cpu");
2808
2809   if (!global_options_set.x_arm_fpu_index)
2810     {
2811       const char *target_fpu_name;
2812       bool ok;
2813
2814 #ifdef FPUTYPE_DEFAULT
2815       target_fpu_name = FPUTYPE_DEFAULT;
2816 #else
2817       target_fpu_name = "vfp";
2818 #endif
2819
2820       ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
2821                                   CL_TARGET);
2822       gcc_assert (ok);
2823     }
2824
2825   arm_fpu_desc = &all_fpus[arm_fpu_index];
2826
2827   switch (arm_fpu_desc->model)
2828     {
2829     case ARM_FP_MODEL_VFP:
2830       arm_fpu_attr = FPU_VFP;
2831       break;
2832
2833     default:
2834       gcc_unreachable();
2835     }
2836
2837   if (TARGET_AAPCS_BASED)
2838     {
2839       if (TARGET_CALLER_INTERWORKING)
2840         error ("AAPCS does not support -mcaller-super-interworking");
2841       else
2842         if (TARGET_CALLEE_INTERWORKING)
2843           error ("AAPCS does not support -mcallee-super-interworking");
2844     }
2845
2846   /* iWMMXt and NEON are incompatible.  */
2847   if (TARGET_IWMMXT && TARGET_NEON)
2848     error ("iWMMXt and NEON are incompatible");
2849
2850   /* iWMMXt unsupported under Thumb mode.  */
2851   if (TARGET_THUMB && TARGET_IWMMXT)
2852     error ("iWMMXt unsupported under Thumb mode");
2853
2854   /* __fp16 support currently assumes the core has ldrh.  */
2855   if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
2856     sorry ("__fp16 and no ldrh");
2857
2858   /* If soft-float is specified then don't use FPU.  */
2859   if (TARGET_SOFT_FLOAT)
2860     arm_fpu_attr = FPU_NONE;
2861
2862   if (TARGET_AAPCS_BASED)
2863     {
2864       if (arm_abi == ARM_ABI_IWMMXT)
2865         arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
2866       else if (arm_float_abi == ARM_FLOAT_ABI_HARD
2867                && TARGET_HARD_FLOAT
2868                && TARGET_VFP)
2869         arm_pcs_default = ARM_PCS_AAPCS_VFP;
2870       else
2871         arm_pcs_default = ARM_PCS_AAPCS;
2872     }
2873   else
2874     {
2875       if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
2876         sorry ("-mfloat-abi=hard and VFP");
2877
2878       if (arm_abi == ARM_ABI_APCS)
2879         arm_pcs_default = ARM_PCS_APCS;
2880       else
2881         arm_pcs_default = ARM_PCS_ATPCS;
2882     }
2883
2884   /* For arm2/3 there is no need to do any scheduling if we are doing
2885      software floating-point.  */
2886   if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
2887     flag_schedule_insns = flag_schedule_insns_after_reload = 0;
2888
2889   /* Use the cp15 method if it is available.  */
2890   if (target_thread_pointer == TP_AUTO)
2891     {
2892       if (arm_arch6k && !TARGET_THUMB1)
2893         target_thread_pointer = TP_CP15;
2894       else
2895         target_thread_pointer = TP_SOFT;
2896     }
2897
2898   if (TARGET_HARD_TP && TARGET_THUMB1)
2899     error ("can not use -mtp=cp15 with 16-bit Thumb");
2900
2901   /* Override the default structure alignment for AAPCS ABI.  */
2902   if (!global_options_set.x_arm_structure_size_boundary)
2903     {
2904       if (TARGET_AAPCS_BASED)
2905         arm_structure_size_boundary = 8;
2906     }
2907   else
2908     {
2909       if (arm_structure_size_boundary != 8
2910           && arm_structure_size_boundary != 32
2911           && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
2912         {
2913           if (ARM_DOUBLEWORD_ALIGN)
2914             warning (0,
2915                      "structure size boundary can only be set to 8, 32 or 64");
2916           else
2917             warning (0, "structure size boundary can only be set to 8 or 32");
2918           arm_structure_size_boundary
2919             = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
2920         }
2921     }
2922
2923   if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
2924     {
2925       error ("RTP PIC is incompatible with Thumb");
2926       flag_pic = 0;
2927     }
2928
2929   /* If stack checking is disabled, we can use r10 as the PIC register,
2930      which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
2931   if (flag_pic && TARGET_SINGLE_PIC_BASE)
2932     {
2933       if (TARGET_VXWORKS_RTP)
2934         warning (0, "RTP PIC is incompatible with -msingle-pic-base");
2935       arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
2936     }
2937
2938   if (flag_pic && TARGET_VXWORKS_RTP)
2939     arm_pic_register = 9;
2940
2941   if (arm_pic_register_string != NULL)
2942     {
2943       int pic_register = decode_reg_name (arm_pic_register_string);
2944
2945       if (!flag_pic)
2946         warning (0, "-mpic-register= is useless without -fpic");
2947
2948       /* Prevent the user from choosing an obviously stupid PIC register.  */
2949       else if (pic_register < 0 || call_used_regs[pic_register]
2950                || pic_register == HARD_FRAME_POINTER_REGNUM
2951                || pic_register == STACK_POINTER_REGNUM
2952                || pic_register >= PC_REGNUM
2953                || (TARGET_VXWORKS_RTP
2954                    && (unsigned int) pic_register != arm_pic_register))
2955         error ("unable to use '%s' for PIC register", arm_pic_register_string);
2956       else
2957         arm_pic_register = pic_register;
2958     }
2959
2960   if (TARGET_VXWORKS_RTP
2961       && !global_options_set.x_arm_pic_data_is_text_relative)
2962     arm_pic_data_is_text_relative = 0;
2963
2964   /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
2965   if (fix_cm3_ldrd == 2)
2966     {
2967       if (arm_selected_cpu->core == cortexm3)
2968         fix_cm3_ldrd = 1;
2969       else
2970         fix_cm3_ldrd = 0;
2971     }
2972
2973   /* Enable -munaligned-access by default for
2974      - all ARMv6 architecture-based processors
2975      - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2976      - ARMv8 architecture-base processors.
2977
2978      Disable -munaligned-access by default for
2979      - all pre-ARMv6 architecture-based processors
2980      - ARMv6-M architecture-based processors.  */
2981
2982   if (unaligned_access == 2)
2983     {
2984       if (arm_arch6 && (arm_arch_notm || arm_arch7))
2985         unaligned_access = 1;
2986       else
2987         unaligned_access = 0;
2988     }
2989   else if (unaligned_access == 1
2990            && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2991     {
2992       warning (0, "target CPU does not support unaligned accesses");
2993       unaligned_access = 0;
2994     }
2995
2996   if (TARGET_THUMB1 && flag_schedule_insns)
2997     {
2998       /* Don't warn since it's on by default in -O2.  */
2999       flag_schedule_insns = 0;
3000     }
3001
3002   if (optimize_size)
3003     {
3004       /* If optimizing for size, bump the number of instructions that we
3005          are prepared to conditionally execute (even on a StrongARM).  */
3006       max_insns_skipped = 6;
3007
3008       /* For THUMB2, we limit the conditional sequence to one IT block.  */
3009       if (TARGET_THUMB2)
3010         max_insns_skipped = MAX_INSN_PER_IT_BLOCK;
3011     }
3012   else
3013     max_insns_skipped = current_tune->max_insns_skipped;
3014
3015   /* Hot/Cold partitioning is not currently supported, since we can't
3016      handle literal pool placement in that case.  */
3017   if (flag_reorder_blocks_and_partition)
3018     {
3019       inform (input_location,
3020               "-freorder-blocks-and-partition not supported on this architecture");
3021       flag_reorder_blocks_and_partition = 0;
3022       flag_reorder_blocks = 1;
3023     }
3024
3025   if (flag_pic)
3026     /* Hoisting PIC address calculations more aggressively provides a small,
3027        but measurable, size reduction for PIC code.  Therefore, we decrease
3028        the bar for unrestricted expression hoisting to the cost of PIC address
3029        calculation, which is 2 instructions.  */
3030     maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3031                            global_options.x_param_values,
3032                            global_options_set.x_param_values);
3033
3034   /* ARM EABI defaults to strict volatile bitfields.  */
3035   if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3036       && abi_version_at_least(2))
3037     flag_strict_volatile_bitfields = 1;
3038
3039   /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
3040      it beneficial (signified by setting num_prefetch_slots to 1 or more.)  */
3041   if (flag_prefetch_loop_arrays < 0
3042       && HAVE_prefetch
3043       && optimize >= 3
3044       && current_tune->num_prefetch_slots > 0)
3045     flag_prefetch_loop_arrays = 1;
3046
3047   /* Set up parameters to be used in prefetching algorithm.  Do not override the
3048      defaults unless we are tuning for a core we have researched values for.  */
3049   if (current_tune->num_prefetch_slots > 0)
3050     maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3051                            current_tune->num_prefetch_slots,
3052                            global_options.x_param_values,
3053                            global_options_set.x_param_values);
3054   if (current_tune->l1_cache_line_size >= 0)
3055     maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3056                            current_tune->l1_cache_line_size,
3057                            global_options.x_param_values,
3058                            global_options_set.x_param_values);
3059   if (current_tune->l1_cache_size >= 0)
3060     maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3061                            current_tune->l1_cache_size,
3062                            global_options.x_param_values,
3063                            global_options_set.x_param_values);
3064
3065   /* Use Neon to perform 64-bits operations rather than core
3066      registers.  */
3067   prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3068   if (use_neon_for_64bits == 1)
3069      prefer_neon_for_64bits = true;
3070
3071   /* Use the alternative scheduling-pressure algorithm by default.  */
3072   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3073                          global_options.x_param_values,
3074                          global_options_set.x_param_values);
3075
3076   /* Disable shrink-wrap when optimizing function for size, since it tends to
3077      generate additional returns.  */
3078   if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
3079     flag_shrink_wrap = false;
3080   /* TBD: Dwarf info for apcs frame is not handled yet.  */
3081   if (TARGET_APCS_FRAME)
3082     flag_shrink_wrap = false;
3083
3084   /* We only support -mslow-flash-data on armv7-m targets.  */
3085   if (target_slow_flash_data
3086       && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
3087           || (TARGET_THUMB1 || flag_pic || TARGET_NEON)))
3088     error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
3089
3090   /* Currently, for slow flash data, we just disable literal pools.  */
3091   if (target_slow_flash_data)
3092     arm_disable_literal_pool = true;
3093
3094   /* Thumb2 inline assembly code should always use unified syntax.
3095      This will apply to ARM and Thumb1 eventually.  */
3096   if (TARGET_THUMB2)
3097     inline_asm_unified = 1;
3098
3099   /* Disable scheduling fusion by default if it's not armv7 processor
3100      or doesn't prefer ldrd/strd.  */
3101   if (flag_schedule_fusion == 2
3102       && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3103     flag_schedule_fusion = 0;
3104
3105   /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3106      - epilogue_insns - does not accurately model the corresponding insns
3107      emitted in the asm file.  In particular, see the comment in thumb_exit
3108      'Find out how many of the (return) argument registers we can corrupt'.
3109      As a consequence, the epilogue may clobber registers without fipa-ra
3110      finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
3111      TODO: Accurately model clobbers for epilogue_insns and reenable
3112      fipa-ra.  */
3113   if (TARGET_THUMB1)
3114     flag_ipa_ra = 0;
3115
3116   /* Register global variables with the garbage collector.  */
3117   arm_add_gc_roots ();
3118 }
3119
3120 static void
3121 arm_add_gc_roots (void)
3122 {
3123   gcc_obstack_init(&minipool_obstack);
3124   minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3125 }
3126 \f
3127 /* A table of known ARM exception types.
3128    For use with the interrupt function attribute.  */
3129
3130 typedef struct
3131 {
3132   const char *const arg;
3133   const unsigned long return_value;
3134 }
3135 isr_attribute_arg;
3136
3137 static const isr_attribute_arg isr_attribute_args [] =
3138 {
3139   { "IRQ",   ARM_FT_ISR },
3140   { "irq",   ARM_FT_ISR },
3141   { "FIQ",   ARM_FT_FIQ },
3142   { "fiq",   ARM_FT_FIQ },
3143   { "ABORT", ARM_FT_ISR },
3144   { "abort", ARM_FT_ISR },
3145   { "ABORT", ARM_FT_ISR },
3146   { "abort", ARM_FT_ISR },
3147   { "UNDEF", ARM_FT_EXCEPTION },
3148   { "undef", ARM_FT_EXCEPTION },
3149   { "SWI",   ARM_FT_EXCEPTION },
3150   { "swi",   ARM_FT_EXCEPTION },
3151   { NULL,    ARM_FT_NORMAL }
3152 };
3153
3154 /* Returns the (interrupt) function type of the current
3155    function, or ARM_FT_UNKNOWN if the type cannot be determined.  */
3156
3157 static unsigned long
3158 arm_isr_value (tree argument)
3159 {
3160   const isr_attribute_arg * ptr;
3161   const char *              arg;
3162
3163   if (!arm_arch_notm)
3164     return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3165
3166   /* No argument - default to IRQ.  */
3167   if (argument == NULL_TREE)
3168     return ARM_FT_ISR;
3169
3170   /* Get the value of the argument.  */
3171   if (TREE_VALUE (argument) == NULL_TREE
3172       || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3173     return ARM_FT_UNKNOWN;
3174
3175   arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3176
3177   /* Check it against the list of known arguments.  */
3178   for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3179     if (streq (arg, ptr->arg))
3180       return ptr->return_value;
3181
3182   /* An unrecognized interrupt type.  */
3183   return ARM_FT_UNKNOWN;
3184 }
3185
3186 /* Computes the type of the current function.  */
3187
3188 static unsigned long
3189 arm_compute_func_type (void)
3190 {
3191   unsigned long type = ARM_FT_UNKNOWN;
3192   tree a;
3193   tree attr;
3194
3195   gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3196
3197   /* Decide if the current function is volatile.  Such functions
3198      never return, and many memory cycles can be saved by not storing
3199      register values that will never be needed again.  This optimization
3200      was added to speed up context switching in a kernel application.  */
3201   if (optimize > 0
3202       && (TREE_NOTHROW (current_function_decl)
3203           || !(flag_unwind_tables
3204                || (flag_exceptions
3205                    && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3206       && TREE_THIS_VOLATILE (current_function_decl))
3207     type |= ARM_FT_VOLATILE;
3208
3209   if (cfun->static_chain_decl != NULL)
3210     type |= ARM_FT_NESTED;
3211
3212   attr = DECL_ATTRIBUTES (current_function_decl);
3213
3214   a = lookup_attribute ("naked", attr);
3215   if (a != NULL_TREE)
3216     type |= ARM_FT_NAKED;
3217
3218   a = lookup_attribute ("isr", attr);
3219   if (a == NULL_TREE)
3220     a = lookup_attribute ("interrupt", attr);
3221
3222   if (a == NULL_TREE)
3223     type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3224   else
3225     type |= arm_isr_value (TREE_VALUE (a));
3226
3227   return type;
3228 }
3229
3230 /* Returns the type of the current function.  */
3231
3232 unsigned long
3233 arm_current_func_type (void)
3234 {
3235   if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3236     cfun->machine->func_type = arm_compute_func_type ();
3237
3238   return cfun->machine->func_type;
3239 }
3240
3241 bool
3242 arm_allocate_stack_slots_for_args (void)
3243 {
3244   /* Naked functions should not allocate stack slots for arguments.  */
3245   return !IS_NAKED (arm_current_func_type ());
3246 }
3247
3248 static bool
3249 arm_warn_func_return (tree decl)
3250 {
3251   /* Naked functions are implemented entirely in assembly, including the
3252      return sequence, so suppress warnings about this.  */
3253   return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3254 }
3255
3256 \f
3257 /* Output assembler code for a block containing the constant parts
3258    of a trampoline, leaving space for the variable parts.
3259
3260    On the ARM, (if r8 is the static chain regnum, and remembering that
3261    referencing pc adds an offset of 8) the trampoline looks like:
3262            ldr          r8, [pc, #0]
3263            ldr          pc, [pc]
3264            .word        static chain value
3265            .word        function's address
3266    XXX FIXME: When the trampoline returns, r8 will be clobbered.  */
3267
3268 static void
3269 arm_asm_trampoline_template (FILE *f)
3270 {
3271   if (TARGET_ARM)
3272     {
3273       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3274       asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3275     }
3276   else if (TARGET_THUMB2)
3277     {
3278       /* The Thumb-2 trampoline is similar to the arm implementation.
3279          Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
3280       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3281                    STATIC_CHAIN_REGNUM, PC_REGNUM);
3282       asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3283     }
3284   else
3285     {
3286       ASM_OUTPUT_ALIGN (f, 2);
3287       fprintf (f, "\t.code\t16\n");
3288       fprintf (f, ".Ltrampoline_start:\n");
3289       asm_fprintf (f, "\tpush\t{r0, r1}\n");
3290       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3291       asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3292       asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3293       asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3294       asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3295     }
3296   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3297   assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3298 }
3299
3300 /* Emit RTL insns to initialize the variable parts of a trampoline.  */
3301
3302 static void
3303 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3304 {
3305   rtx fnaddr, mem, a_tramp;
3306
3307   emit_block_move (m_tramp, assemble_trampoline_template (),
3308                    GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3309
3310   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3311   emit_move_insn (mem, chain_value);
3312
3313   mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3314   fnaddr = XEXP (DECL_RTL (fndecl), 0);
3315   emit_move_insn (mem, fnaddr);
3316
3317   a_tramp = XEXP (m_tramp, 0);
3318   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3319                      LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3320                      plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3321 }
3322
3323 /* Thumb trampolines should be entered in thumb mode, so set
3324    the bottom bit of the address.  */
3325
3326 static rtx
3327 arm_trampoline_adjust_address (rtx addr)
3328 {
3329   if (TARGET_THUMB)
3330     addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3331                                 NULL, 0, OPTAB_LIB_WIDEN);
3332   return addr;
3333 }
3334 \f
3335 /* Return 1 if it is possible to return using a single instruction.
3336    If SIBLING is non-null, this is a test for a return before a sibling
3337    call.  SIBLING is the call insn, so we can examine its register usage.  */
3338
3339 int
3340 use_return_insn (int iscond, rtx sibling)
3341 {
3342   int regno;
3343   unsigned int func_type;
3344   unsigned long saved_int_regs;
3345   unsigned HOST_WIDE_INT stack_adjust;
3346   arm_stack_offsets *offsets;
3347
3348   /* Never use a return instruction before reload has run.  */
3349   if (!reload_completed)
3350     return 0;
3351
3352   func_type = arm_current_func_type ();
3353
3354   /* Naked, volatile and stack alignment functions need special
3355      consideration.  */
3356   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3357     return 0;
3358
3359   /* So do interrupt functions that use the frame pointer and Thumb
3360      interrupt functions.  */
3361   if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3362     return 0;
3363
3364   if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3365       && !optimize_function_for_size_p (cfun))
3366     return 0;
3367
3368   offsets = arm_get_frame_offsets ();
3369   stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3370
3371   /* As do variadic functions.  */
3372   if (crtl->args.pretend_args_size
3373       || cfun->machine->uses_anonymous_args
3374       /* Or if the function calls __builtin_eh_return () */
3375       || crtl->calls_eh_return
3376       /* Or if the function calls alloca */
3377       || cfun->calls_alloca
3378       /* Or if there is a stack adjustment.  However, if the stack pointer
3379          is saved on the stack, we can use a pre-incrementing stack load.  */
3380       || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3381                                  && stack_adjust == 4)))
3382     return 0;
3383
3384   saved_int_regs = offsets->saved_regs_mask;
3385
3386   /* Unfortunately, the insn
3387
3388        ldmib sp, {..., sp, ...}
3389
3390      triggers a bug on most SA-110 based devices, such that the stack
3391      pointer won't be correctly restored if the instruction takes a
3392      page fault.  We work around this problem by popping r3 along with
3393      the other registers, since that is never slower than executing
3394      another instruction.
3395
3396      We test for !arm_arch5 here, because code for any architecture
3397      less than this could potentially be run on one of the buggy
3398      chips.  */
3399   if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3400     {
3401       /* Validate that r3 is a call-clobbered register (always true in
3402          the default abi) ...  */
3403       if (!call_used_regs[3])
3404         return 0;
3405
3406       /* ... that it isn't being used for a return value ... */
3407       if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3408         return 0;
3409
3410       /* ... or for a tail-call argument ...  */
3411       if (sibling)
3412         {
3413           gcc_assert (CALL_P (sibling));
3414
3415           if (find_regno_fusage (sibling, USE, 3))
3416             return 0;
3417         }
3418
3419       /* ... and that there are no call-saved registers in r0-r2
3420          (always true in the default ABI).  */
3421       if (saved_int_regs & 0x7)
3422         return 0;
3423     }
3424
3425   /* Can't be done if interworking with Thumb, and any registers have been
3426      stacked.  */
3427   if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3428     return 0;
3429
3430   /* On StrongARM, conditional returns are expensive if they aren't
3431      taken and multiple registers have been stacked.  */
3432   if (iscond && arm_tune_strongarm)
3433     {
3434       /* Conditional return when just the LR is stored is a simple
3435          conditional-load instruction, that's not expensive.  */
3436       if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3437         return 0;
3438
3439       if (flag_pic
3440           && arm_pic_register != INVALID_REGNUM
3441           && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3442         return 0;
3443     }
3444
3445   /* If there are saved registers but the LR isn't saved, then we need
3446      two instructions for the return.  */
3447   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3448     return 0;
3449
3450   /* Can't be done if any of the VFP regs are pushed,
3451      since this also requires an insn.  */
3452   if (TARGET_HARD_FLOAT && TARGET_VFP)
3453     for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3454       if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3455         return 0;
3456
3457   if (TARGET_REALLY_IWMMXT)
3458     for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3459       if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3460         return 0;
3461
3462   return 1;
3463 }
3464
3465 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3466    shrink-wrapping if possible.  This is the case if we need to emit a
3467    prologue, which we can test by looking at the offsets.  */
3468 bool
3469 use_simple_return_p (void)
3470 {
3471   arm_stack_offsets *offsets;
3472
3473   offsets = arm_get_frame_offsets ();
3474   return offsets->outgoing_args != 0;
3475 }
3476
3477 /* Return TRUE if int I is a valid immediate ARM constant.  */
3478
3479 int
3480 const_ok_for_arm (HOST_WIDE_INT i)
3481 {
3482   int lowbit;
3483
3484   /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3485      be all zero, or all one.  */
3486   if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3487       && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3488           != ((~(unsigned HOST_WIDE_INT) 0)
3489               & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3490     return FALSE;
3491
3492   i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3493
3494   /* Fast return for 0 and small values.  We must do this for zero, since
3495      the code below can't handle that one case.  */
3496   if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3497     return TRUE;
3498
3499   /* Get the number of trailing zeros.  */
3500   lowbit = ffs((int) i) - 1;
3501
3502   /* Only even shifts are allowed in ARM mode so round down to the
3503      nearest even number.  */
3504   if (TARGET_ARM)
3505     lowbit &= ~1;
3506
3507   if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3508     return TRUE;
3509
3510   if (TARGET_ARM)
3511     {
3512       /* Allow rotated constants in ARM mode.  */
3513       if (lowbit <= 4
3514            && ((i & ~0xc000003f) == 0
3515                || (i & ~0xf000000f) == 0
3516                || (i & ~0xfc000003) == 0))
3517         return TRUE;
3518     }
3519   else
3520     {
3521       HOST_WIDE_INT v;
3522
3523       /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
3524       v = i & 0xff;
3525       v |= v << 16;
3526       if (i == v || i == (v | (v << 8)))
3527         return TRUE;
3528
3529       /* Allow repeated pattern 0xXY00XY00.  */
3530       v = i & 0xff00;
3531       v |= v << 16;
3532       if (i == v)
3533         return TRUE;
3534     }
3535
3536   return FALSE;
3537 }
3538
3539 /* Return true if I is a valid constant for the operation CODE.  */
3540 int
3541 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3542 {
3543   if (const_ok_for_arm (i))
3544     return 1;
3545
3546   switch (code)
3547     {
3548     case SET:
3549       /* See if we can use movw.  */
3550       if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3551         return 1;
3552       else
3553         /* Otherwise, try mvn.  */
3554         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3555
3556     case PLUS:
3557       /* See if we can use addw or subw.  */
3558       if (TARGET_THUMB2
3559           && ((i & 0xfffff000) == 0
3560               || ((-i) & 0xfffff000) == 0))
3561         return 1;
3562       /* else fall through.  */
3563
3564     case COMPARE:
3565     case EQ:
3566     case NE:
3567     case GT:
3568     case LE:
3569     case LT:
3570     case GE:
3571     case GEU:
3572     case LTU:
3573     case GTU:
3574     case LEU:
3575     case UNORDERED:
3576     case ORDERED:
3577     case UNEQ:
3578     case UNGE:
3579     case UNLT:
3580     case UNGT:
3581     case UNLE:
3582       return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3583
3584     case MINUS:         /* Should only occur with (MINUS I reg) => rsb */
3585     case XOR:
3586       return 0;
3587
3588     case IOR:
3589       if (TARGET_THUMB2)
3590         return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3591       return 0;
3592
3593     case AND:
3594       return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3595
3596     default:
3597       gcc_unreachable ();
3598     }
3599 }
3600
3601 /* Return true if I is a valid di mode constant for the operation CODE.  */
3602 int
3603 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3604 {
3605   HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3606   HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3607   rtx hi = GEN_INT (hi_val);
3608   rtx lo = GEN_INT (lo_val);
3609
3610   if (TARGET_THUMB1)
3611     return 0;
3612
3613   switch (code)
3614     {
3615     case AND:
3616     case IOR:
3617     case XOR:
3618       return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3619               && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3620     case PLUS:
3621       return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3622
3623     default:
3624       return 0;
3625     }
3626 }
3627
3628 /* Emit a sequence of insns to handle a large constant.
3629    CODE is the code of the operation required, it can be any of SET, PLUS,
3630    IOR, AND, XOR, MINUS;
3631    MODE is the mode in which the operation is being performed;
3632    VAL is the integer to operate on;
3633    SOURCE is the other operand (a register, or a null-pointer for SET);
3634    SUBTARGETS means it is safe to create scratch registers if that will
3635    either produce a simpler sequence, or we will want to cse the values.
3636    Return value is the number of insns emitted.  */
3637
3638 /* ??? Tweak this for thumb2.  */
3639 int
3640 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3641                     HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3642 {
3643   rtx cond;
3644
3645   if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3646     cond = COND_EXEC_TEST (PATTERN (insn));
3647   else
3648     cond = NULL_RTX;
3649
3650   if (subtargets || code == SET
3651       || (REG_P (target) && REG_P (source)
3652           && REGNO (target) != REGNO (source)))
3653     {
3654       /* After arm_reorg has been called, we can't fix up expensive
3655          constants by pushing them into memory so we must synthesize
3656          them in-line, regardless of the cost.  This is only likely to
3657          be more costly on chips that have load delay slots and we are
3658          compiling without running the scheduler (so no splitting
3659          occurred before the final instruction emission).
3660
3661          Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3662       */
3663       if (!cfun->machine->after_arm_reorg
3664           && !cond
3665           && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3666                                 1, 0)
3667               > (arm_constant_limit (optimize_function_for_size_p (cfun))
3668                  + (code != SET))))
3669         {
3670           if (code == SET)
3671             {
3672               /* Currently SET is the only monadic value for CODE, all
3673                  the rest are diadic.  */
3674               if (TARGET_USE_MOVT)
3675                 arm_emit_movpair (target, GEN_INT (val));
3676               else
3677                 emit_set_insn (target, GEN_INT (val));
3678
3679               return 1;
3680             }
3681           else
3682             {
3683               rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3684
3685               if (TARGET_USE_MOVT)
3686                 arm_emit_movpair (temp, GEN_INT (val));
3687               else
3688                 emit_set_insn (temp, GEN_INT (val));
3689
3690               /* For MINUS, the value is subtracted from, since we never
3691                  have subtraction of a constant.  */
3692               if (code == MINUS)
3693                 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3694               else
3695                 emit_set_insn (target,
3696                                gen_rtx_fmt_ee (code, mode, source, temp));
3697               return 2;
3698             }
3699         }
3700     }
3701
3702   return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3703                            1);
3704 }
3705
3706 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3707    ARM/THUMB2 immediates, and add up to VAL.
3708    Thr function return value gives the number of insns required.  */
3709 static int
3710 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3711                             struct four_ints *return_sequence)
3712 {
3713   int best_consecutive_zeros = 0;
3714   int i;
3715   int best_start = 0;
3716   int insns1, insns2;
3717   struct four_ints tmp_sequence;
3718
3719   /* If we aren't targeting ARM, the best place to start is always at
3720      the bottom, otherwise look more closely.  */
3721   if (TARGET_ARM)
3722     {
3723       for (i = 0; i < 32; i += 2)
3724         {
3725           int consecutive_zeros = 0;
3726
3727           if (!(val & (3 << i)))
3728             {
3729               while ((i < 32) && !(val & (3 << i)))
3730                 {
3731                   consecutive_zeros += 2;
3732                   i += 2;
3733                 }
3734               if (consecutive_zeros > best_consecutive_zeros)
3735                 {
3736                   best_consecutive_zeros = consecutive_zeros;
3737                   best_start = i - consecutive_zeros;
3738                 }
3739               i -= 2;
3740             }
3741         }
3742     }
3743
3744   /* So long as it won't require any more insns to do so, it's
3745      desirable to emit a small constant (in bits 0...9) in the last
3746      insn.  This way there is more chance that it can be combined with
3747      a later addressing insn to form a pre-indexed load or store
3748      operation.  Consider:
3749
3750            *((volatile int *)0xe0000100) = 1;
3751            *((volatile int *)0xe0000110) = 2;
3752
3753      We want this to wind up as:
3754
3755             mov rA, #0xe0000000
3756             mov rB, #1
3757             str rB, [rA, #0x100]
3758             mov rB, #2
3759             str rB, [rA, #0x110]
3760
3761      rather than having to synthesize both large constants from scratch.
3762
3763      Therefore, we calculate how many insns would be required to emit
3764      the constant starting from `best_start', and also starting from
3765      zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
3766      yield a shorter sequence, we may as well use zero.  */
3767   insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3768   if (best_start != 0
3769       && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
3770     {
3771       insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
3772       if (insns2 <= insns1)
3773         {
3774           *return_sequence = tmp_sequence;
3775           insns1 = insns2;
3776         }
3777     }
3778
3779   return insns1;
3780 }
3781
3782 /* As for optimal_immediate_sequence, but starting at bit-position I.  */
3783 static int
3784 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
3785                              struct four_ints *return_sequence, int i)
3786 {
3787   int remainder = val & 0xffffffff;
3788   int insns = 0;
3789
3790   /* Try and find a way of doing the job in either two or three
3791      instructions.
3792
3793      In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
3794      location.  We start at position I.  This may be the MSB, or
3795      optimial_immediate_sequence may have positioned it at the largest block
3796      of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
3797      wrapping around to the top of the word when we drop off the bottom.
3798      In the worst case this code should produce no more than four insns.
3799
3800      In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
3801      constants, shifted to any arbitrary location.  We should always start
3802      at the MSB.  */
3803   do
3804     {
3805       int end;
3806       unsigned int b1, b2, b3, b4;
3807       unsigned HOST_WIDE_INT result;
3808       int loc;
3809
3810       gcc_assert (insns < 4);
3811
3812       if (i <= 0)
3813         i += 32;
3814
3815       /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
3816       if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
3817         {
3818           loc = i;
3819           if (i <= 12 && TARGET_THUMB2 && code == PLUS)
3820             /* We can use addw/subw for the last 12 bits.  */
3821             result = remainder;
3822           else
3823             {
3824               /* Use an 8-bit shifted/rotated immediate.  */
3825               end = i - 8;
3826               if (end < 0)
3827                 end += 32;
3828               result = remainder & ((0x0ff << end)
3829                                    | ((i < end) ? (0xff >> (32 - end))
3830                                                 : 0));
3831               i -= 8;
3832             }
3833         }
3834       else
3835         {
3836           /* Arm allows rotates by a multiple of two. Thumb-2 allows
3837              arbitrary shifts.  */
3838           i -= TARGET_ARM ? 2 : 1;
3839           continue;
3840         }
3841
3842       /* Next, see if we can do a better job with a thumb2 replicated
3843          constant.
3844
3845          We do it this way around to catch the cases like 0x01F001E0 where
3846          two 8-bit immediates would work, but a replicated constant would
3847          make it worse.
3848
3849          TODO: 16-bit constants that don't clear all the bits, but still win.
3850          TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
3851       if (TARGET_THUMB2)
3852         {
3853           b1 = (remainder & 0xff000000) >> 24;
3854           b2 = (remainder & 0x00ff0000) >> 16;
3855           b3 = (remainder & 0x0000ff00) >> 8;
3856           b4 = remainder & 0xff;
3857
3858           if (loc > 24)
3859             {
3860               /* The 8-bit immediate already found clears b1 (and maybe b2),
3861                  but must leave b3 and b4 alone.  */
3862
3863               /* First try to find a 32-bit replicated constant that clears
3864                  almost everything.  We can assume that we can't do it in one,
3865                  or else we wouldn't be here.  */
3866               unsigned int tmp = b1 & b2 & b3 & b4;
3867               unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
3868                                   + (tmp << 24);
3869               unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
3870                                             + (tmp == b3) + (tmp == b4);
3871               if (tmp
3872                   && (matching_bytes >= 3
3873                       || (matching_bytes == 2
3874                           && const_ok_for_op (remainder & ~tmp2, code))))
3875                 {
3876                   /* At least 3 of the bytes match, and the fourth has at
3877                      least as many bits set, or two of the bytes match
3878                      and it will only require one more insn to finish.  */
3879                   result = tmp2;
3880                   i = tmp != b1 ? 32
3881                       : tmp != b2 ? 24
3882                       : tmp != b3 ? 16
3883                       : 8;
3884                 }
3885
3886               /* Second, try to find a 16-bit replicated constant that can
3887                  leave three of the bytes clear.  If b2 or b4 is already
3888                  zero, then we can.  If the 8-bit from above would not
3889                  clear b2 anyway, then we still win.  */
3890               else if (b1 == b3 && (!b2 || !b4
3891                                || (remainder & 0x00ff0000 & ~result)))
3892                 {
3893                   result = remainder & 0xff00ff00;
3894                   i = 24;
3895                 }
3896             }
3897           else if (loc > 16)
3898             {
3899               /* The 8-bit immediate already found clears b2 (and maybe b3)
3900                  and we don't get here unless b1 is alredy clear, but it will
3901                  leave b4 unchanged.  */
3902
3903               /* If we can clear b2 and b4 at once, then we win, since the
3904                  8-bits couldn't possibly reach that far.  */
3905               if (b2 == b4)
3906                 {
3907                   result = remainder & 0x00ff00ff;
3908                   i = 16;
3909                 }
3910             }
3911         }
3912
3913       return_sequence->i[insns++] = result;
3914       remainder &= ~result;
3915
3916       if (code == SET || code == MINUS)
3917         code = PLUS;
3918     }
3919   while (remainder);
3920
3921   return insns;
3922 }
3923
3924 /* Emit an instruction with the indicated PATTERN.  If COND is
3925    non-NULL, conditionalize the execution of the instruction on COND
3926    being true.  */
3927
3928 static void
3929 emit_constant_insn (rtx cond, rtx pattern)
3930 {
3931   if (cond)
3932     pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
3933   emit_insn (pattern);
3934 }
3935
3936 /* As above, but extra parameter GENERATE which, if clear, suppresses
3937    RTL generation.  */
3938
3939 static int
3940 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
3941                   HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
3942                   int generate)
3943 {
3944   int can_invert = 0;
3945   int can_negate = 0;
3946   int final_invert = 0;
3947   int i;
3948   int set_sign_bit_copies = 0;
3949   int clear_sign_bit_copies = 0;
3950   int clear_zero_bit_copies = 0;
3951   int set_zero_bit_copies = 0;
3952   int insns = 0, neg_insns, inv_insns;
3953   unsigned HOST_WIDE_INT temp1, temp2;
3954   unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
3955   struct four_ints *immediates;
3956   struct four_ints pos_immediates, neg_immediates, inv_immediates;
3957
3958   /* Find out which operations are safe for a given CODE.  Also do a quick
3959      check for degenerate cases; these can occur when DImode operations
3960      are split.  */
3961   switch (code)
3962     {
3963     case SET:
3964       can_invert = 1;
3965       break;
3966
3967     case PLUS:
3968       can_negate = 1;
3969       break;
3970
3971     case IOR:
3972       if (remainder == 0xffffffff)
3973         {
3974           if (generate)
3975             emit_constant_insn (cond,
3976                                 gen_rtx_SET (VOIDmode, target,
3977                                              GEN_INT (ARM_SIGN_EXTEND (val))));
3978           return 1;
3979         }
3980
3981       if (remainder == 0)
3982         {
3983           if (reload_completed && rtx_equal_p (target, source))
3984             return 0;
3985
3986           if (generate)
3987             emit_constant_insn (cond,
3988                                 gen_rtx_SET (VOIDmode, target, source));
3989           return 1;
3990         }
3991       break;
3992
3993     case AND:
3994       if (remainder == 0)
3995         {
3996           if (generate)
3997             emit_constant_insn (cond,
3998                                 gen_rtx_SET (VOIDmode, target, const0_rtx));
3999           return 1;
4000         }
4001       if (remainder == 0xffffffff)
4002         {
4003           if (reload_completed && rtx_equal_p (target, source))
4004             return 0;
4005           if (generate)
4006             emit_constant_insn (cond,
4007                                 gen_rtx_SET (VOIDmode, target, source));
4008           return 1;
4009         }
4010       can_invert = 1;
4011       break;
4012
4013     case XOR:
4014       if (remainder == 0)
4015         {
4016           if (reload_completed && rtx_equal_p (target, source))
4017             return 0;
4018           if (generate)
4019             emit_constant_insn (cond,
4020                                 gen_rtx_SET (VOIDmode, target, source));
4021           return 1;
4022         }
4023
4024       if (remainder == 0xffffffff)
4025         {
4026           if (generate)
4027             emit_constant_insn (cond,
4028                                 gen_rtx_SET (VOIDmode, target,
4029                                              gen_rtx_NOT (mode, source)));
4030           return 1;
4031         }
4032       final_invert = 1;
4033       break;
4034
4035     case MINUS:
4036       /* We treat MINUS as (val - source), since (source - val) is always
4037          passed as (source + (-val)).  */
4038       if (remainder == 0)
4039         {
4040           if (generate)
4041             emit_constant_insn (cond,
4042                                 gen_rtx_SET (VOIDmode, target,
4043                                              gen_rtx_NEG (mode, source)));
4044           return 1;
4045         }
4046       if (const_ok_for_arm (val))
4047         {
4048           if (generate)
4049             emit_constant_insn (cond,
4050                                 gen_rtx_SET (VOIDmode, target,
4051                                              gen_rtx_MINUS (mode, GEN_INT (val),
4052                                                             source)));
4053           return 1;
4054         }
4055
4056       break;
4057
4058     default:
4059       gcc_unreachable ();
4060     }
4061
4062   /* If we can do it in one insn get out quickly.  */
4063   if (const_ok_for_op (val, code))
4064     {
4065       if (generate)
4066         emit_constant_insn (cond,
4067                             gen_rtx_SET (VOIDmode, target,
4068                                          (source
4069                                           ? gen_rtx_fmt_ee (code, mode, source,
4070                                                             GEN_INT (val))
4071                                           : GEN_INT (val))));
4072       return 1;
4073     }
4074
4075   /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4076      insn.  */
4077   if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4078       && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4079     {
4080       if (generate)
4081         {
4082           if (mode == SImode && i == 16)
4083             /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4084                smaller insn.  */
4085             emit_constant_insn (cond,
4086                                 gen_zero_extendhisi2
4087                                 (target, gen_lowpart (HImode, source)));
4088           else
4089             /* Extz only supports SImode, but we can coerce the operands
4090                into that mode.  */
4091             emit_constant_insn (cond,
4092                                 gen_extzv_t2 (gen_lowpart (SImode, target),
4093                                               gen_lowpart (SImode, source),
4094                                               GEN_INT (i), const0_rtx));
4095         }
4096
4097       return 1;
4098     }
4099
4100   /* Calculate a few attributes that may be useful for specific
4101      optimizations.  */
4102   /* Count number of leading zeros.  */
4103   for (i = 31; i >= 0; i--)
4104     {
4105       if ((remainder & (1 << i)) == 0)
4106         clear_sign_bit_copies++;
4107       else
4108         break;
4109     }
4110
4111   /* Count number of leading 1's.  */
4112   for (i = 31; i >= 0; i--)
4113     {
4114       if ((remainder & (1 << i)) != 0)
4115         set_sign_bit_copies++;
4116       else
4117         break;
4118     }
4119
4120   /* Count number of trailing zero's.  */
4121   for (i = 0; i <= 31; i++)
4122     {
4123       if ((remainder & (1 << i)) == 0)
4124         clear_zero_bit_copies++;
4125       else
4126         break;
4127     }
4128
4129   /* Count number of trailing 1's.  */
4130   for (i = 0; i <= 31; i++)
4131     {
4132       if ((remainder & (1 << i)) != 0)
4133         set_zero_bit_copies++;
4134       else
4135         break;
4136     }
4137
4138   switch (code)
4139     {
4140     case SET:
4141       /* See if we can do this by sign_extending a constant that is known
4142          to be negative.  This is a good, way of doing it, since the shift
4143          may well merge into a subsequent insn.  */
4144       if (set_sign_bit_copies > 1)
4145         {
4146           if (const_ok_for_arm
4147               (temp1 = ARM_SIGN_EXTEND (remainder
4148                                         << (set_sign_bit_copies - 1))))
4149             {
4150               if (generate)
4151                 {
4152                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4153                   emit_constant_insn (cond,
4154                                       gen_rtx_SET (VOIDmode, new_src,
4155                                                    GEN_INT (temp1)));
4156                   emit_constant_insn (cond,
4157                                       gen_ashrsi3 (target, new_src,
4158                                                    GEN_INT (set_sign_bit_copies - 1)));
4159                 }
4160               return 2;
4161             }
4162           /* For an inverted constant, we will need to set the low bits,
4163              these will be shifted out of harm's way.  */
4164           temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4165           if (const_ok_for_arm (~temp1))
4166             {
4167               if (generate)
4168                 {
4169                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4170                   emit_constant_insn (cond,
4171                                       gen_rtx_SET (VOIDmode, new_src,
4172                                                    GEN_INT (temp1)));
4173                   emit_constant_insn (cond,
4174                                       gen_ashrsi3 (target, new_src,
4175                                                    GEN_INT (set_sign_bit_copies - 1)));
4176                 }
4177               return 2;
4178             }
4179         }
4180
4181       /* See if we can calculate the value as the difference between two
4182          valid immediates.  */
4183       if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4184         {
4185           int topshift = clear_sign_bit_copies & ~1;
4186
4187           temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4188                                    & (0xff000000 >> topshift));
4189
4190           /* If temp1 is zero, then that means the 9 most significant
4191              bits of remainder were 1 and we've caused it to overflow.
4192              When topshift is 0 we don't need to do anything since we
4193              can borrow from 'bit 32'.  */
4194           if (temp1 == 0 && topshift != 0)
4195             temp1 = 0x80000000 >> (topshift - 1);
4196
4197           temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4198
4199           if (const_ok_for_arm (temp2))
4200             {
4201               if (generate)
4202                 {
4203                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4204                   emit_constant_insn (cond,
4205                                       gen_rtx_SET (VOIDmode, new_src,
4206                                                    GEN_INT (temp1)));
4207                   emit_constant_insn (cond,
4208                                       gen_addsi3 (target, new_src,
4209                                                   GEN_INT (-temp2)));
4210                 }
4211
4212               return 2;
4213             }
4214         }
4215
4216       /* See if we can generate this by setting the bottom (or the top)
4217          16 bits, and then shifting these into the other half of the
4218          word.  We only look for the simplest cases, to do more would cost
4219          too much.  Be careful, however, not to generate this when the
4220          alternative would take fewer insns.  */
4221       if (val & 0xffff0000)
4222         {
4223           temp1 = remainder & 0xffff0000;
4224           temp2 = remainder & 0x0000ffff;
4225
4226           /* Overlaps outside this range are best done using other methods.  */
4227           for (i = 9; i < 24; i++)
4228             {
4229               if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4230                   && !const_ok_for_arm (temp2))
4231                 {
4232                   rtx new_src = (subtargets
4233                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4234                                  : target);
4235                   insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4236                                             source, subtargets, generate);
4237                   source = new_src;
4238                   if (generate)
4239                     emit_constant_insn
4240                       (cond,
4241                        gen_rtx_SET
4242                        (VOIDmode, target,
4243                         gen_rtx_IOR (mode,
4244                                      gen_rtx_ASHIFT (mode, source,
4245                                                      GEN_INT (i)),
4246                                      source)));
4247                   return insns + 1;
4248                 }
4249             }
4250
4251           /* Don't duplicate cases already considered.  */
4252           for (i = 17; i < 24; i++)
4253             {
4254               if (((temp1 | (temp1 >> i)) == remainder)
4255                   && !const_ok_for_arm (temp1))
4256                 {
4257                   rtx new_src = (subtargets
4258                                  ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4259                                  : target);
4260                   insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4261                                             source, subtargets, generate);
4262                   source = new_src;
4263                   if (generate)
4264                     emit_constant_insn
4265                       (cond,
4266                        gen_rtx_SET (VOIDmode, target,
4267                                     gen_rtx_IOR
4268                                     (mode,
4269                                      gen_rtx_LSHIFTRT (mode, source,
4270                                                        GEN_INT (i)),
4271                                      source)));
4272                   return insns + 1;
4273                 }
4274             }
4275         }
4276       break;
4277
4278     case IOR:
4279     case XOR:
4280       /* If we have IOR or XOR, and the constant can be loaded in a
4281          single instruction, and we can find a temporary to put it in,
4282          then this can be done in two instructions instead of 3-4.  */
4283       if (subtargets
4284           /* TARGET can't be NULL if SUBTARGETS is 0 */
4285           || (reload_completed && !reg_mentioned_p (target, source)))
4286         {
4287           if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4288             {
4289               if (generate)
4290                 {
4291                   rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4292
4293                   emit_constant_insn (cond,
4294                                       gen_rtx_SET (VOIDmode, sub,
4295                                                    GEN_INT (val)));
4296                   emit_constant_insn (cond,
4297                                       gen_rtx_SET (VOIDmode, target,
4298                                                    gen_rtx_fmt_ee (code, mode,
4299                                                                    source, sub)));
4300                 }
4301               return 2;
4302             }
4303         }
4304
4305       if (code == XOR)
4306         break;
4307
4308       /*  Convert.
4309           x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4310                              and the remainder 0s for e.g. 0xfff00000)
4311           x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4312
4313           This can be done in 2 instructions by using shifts with mov or mvn.
4314           e.g. for
4315           x = x | 0xfff00000;
4316           we generate.
4317           mvn   r0, r0, asl #12
4318           mvn   r0, r0, lsr #12  */
4319       if (set_sign_bit_copies > 8
4320           && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4321         {
4322           if (generate)
4323             {
4324               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4325               rtx shift = GEN_INT (set_sign_bit_copies);
4326
4327               emit_constant_insn
4328                 (cond,
4329                  gen_rtx_SET (VOIDmode, sub,
4330                               gen_rtx_NOT (mode,
4331                                            gen_rtx_ASHIFT (mode,
4332                                                            source,
4333                                                            shift))));
4334               emit_constant_insn
4335                 (cond,
4336                  gen_rtx_SET (VOIDmode, target,
4337                               gen_rtx_NOT (mode,
4338                                            gen_rtx_LSHIFTRT (mode, sub,
4339                                                              shift))));
4340             }
4341           return 2;
4342         }
4343
4344       /* Convert
4345           x = y | constant (which has set_zero_bit_copies number of trailing ones).
4346            to
4347           x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4348
4349           For eg. r0 = r0 | 0xfff
4350                mvn      r0, r0, lsr #12
4351                mvn      r0, r0, asl #12
4352
4353       */
4354       if (set_zero_bit_copies > 8
4355           && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4356         {
4357           if (generate)
4358             {
4359               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4360               rtx shift = GEN_INT (set_zero_bit_copies);
4361
4362               emit_constant_insn
4363                 (cond,
4364                  gen_rtx_SET (VOIDmode, sub,
4365                               gen_rtx_NOT (mode,
4366                                            gen_rtx_LSHIFTRT (mode,
4367                                                              source,
4368                                                              shift))));
4369               emit_constant_insn
4370                 (cond,
4371                  gen_rtx_SET (VOIDmode, target,
4372                               gen_rtx_NOT (mode,
4373                                            gen_rtx_ASHIFT (mode, sub,
4374                                                            shift))));
4375             }
4376           return 2;
4377         }
4378
4379       /* This will never be reached for Thumb2 because orn is a valid
4380          instruction. This is for Thumb1 and the ARM 32 bit cases.
4381
4382          x = y | constant (such that ~constant is a valid constant)
4383          Transform this to
4384          x = ~(~y & ~constant).
4385       */
4386       if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4387         {
4388           if (generate)
4389             {
4390               rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4391               emit_constant_insn (cond,
4392                                   gen_rtx_SET (VOIDmode, sub,
4393                                                gen_rtx_NOT (mode, source)));
4394               source = sub;
4395               if (subtargets)
4396                 sub = gen_reg_rtx (mode);
4397               emit_constant_insn (cond,
4398                                   gen_rtx_SET (VOIDmode, sub,
4399                                                gen_rtx_AND (mode, source,
4400                                                             GEN_INT (temp1))));
4401               emit_constant_insn (cond,
4402                                   gen_rtx_SET (VOIDmode, target,
4403                                                gen_rtx_NOT (mode, sub)));
4404             }
4405           return 3;
4406         }
4407       break;
4408
4409     case AND:
4410       /* See if two shifts will do 2 or more insn's worth of work.  */
4411       if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4412         {
4413           HOST_WIDE_INT shift_mask = ((0xffffffff
4414                                        << (32 - clear_sign_bit_copies))
4415                                       & 0xffffffff);
4416
4417           if ((remainder | shift_mask) != 0xffffffff)
4418             {
4419               if (generate)
4420                 {
4421                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4422                   insns = arm_gen_constant (AND, mode, cond,
4423                                             remainder | shift_mask,
4424                                             new_src, source, subtargets, 1);
4425                   source = new_src;
4426                 }
4427               else
4428                 {
4429                   rtx targ = subtargets ? NULL_RTX : target;
4430                   insns = arm_gen_constant (AND, mode, cond,
4431                                             remainder | shift_mask,
4432                                             targ, source, subtargets, 0);
4433                 }
4434             }
4435
4436           if (generate)
4437             {
4438               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4439               rtx shift = GEN_INT (clear_sign_bit_copies);
4440
4441               emit_insn (gen_ashlsi3 (new_src, source, shift));
4442               emit_insn (gen_lshrsi3 (target, new_src, shift));
4443             }
4444
4445           return insns + 2;
4446         }
4447
4448       if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4449         {
4450           HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4451
4452           if ((remainder | shift_mask) != 0xffffffff)
4453             {
4454               if (generate)
4455                 {
4456                   rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4457
4458                   insns = arm_gen_constant (AND, mode, cond,
4459                                             remainder | shift_mask,
4460                                             new_src, source, subtargets, 1);
4461                   source = new_src;
4462                 }
4463               else
4464                 {
4465                   rtx targ = subtargets ? NULL_RTX : target;
4466
4467                   insns = arm_gen_constant (AND, mode, cond,
4468                                             remainder | shift_mask,
4469                                             targ, source, subtargets, 0);
4470                 }
4471             }
4472
4473           if (generate)
4474             {
4475               rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4476               rtx shift = GEN_INT (clear_zero_bit_copies);
4477
4478               emit_insn (gen_lshrsi3 (new_src, source, shift));
4479               emit_insn (gen_ashlsi3 (target, new_src, shift));
4480             }
4481
4482           return insns + 2;
4483         }
4484
4485       break;
4486
4487     default:
4488       break;
4489     }
4490
4491   /* Calculate what the instruction sequences would be if we generated it
4492      normally, negated, or inverted.  */
4493   if (code == AND)
4494     /* AND cannot be split into multiple insns, so invert and use BIC.  */
4495     insns = 99;
4496   else
4497     insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4498
4499   if (can_negate)
4500     neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4501                                             &neg_immediates);
4502   else
4503     neg_insns = 99;
4504
4505   if (can_invert || final_invert)
4506     inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4507                                             &inv_immediates);
4508   else
4509     inv_insns = 99;
4510
4511   immediates = &pos_immediates;
4512
4513   /* Is the negated immediate sequence more efficient?  */
4514   if (neg_insns < insns && neg_insns <= inv_insns)
4515     {
4516       insns = neg_insns;
4517       immediates = &neg_immediates;
4518     }
4519   else
4520     can_negate = 0;
4521
4522   /* Is the inverted immediate sequence more efficient?
4523      We must allow for an extra NOT instruction for XOR operations, although
4524      there is some chance that the final 'mvn' will get optimized later.  */
4525   if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4526     {
4527       insns = inv_insns;
4528       immediates = &inv_immediates;
4529     }
4530   else
4531     {
4532       can_invert = 0;
4533       final_invert = 0;
4534     }
4535
4536   /* Now output the chosen sequence as instructions.  */
4537   if (generate)
4538     {
4539       for (i = 0; i < insns; i++)
4540         {
4541           rtx new_src, temp1_rtx;
4542
4543           temp1 = immediates->i[i];
4544
4545           if (code == SET || code == MINUS)
4546             new_src = (subtargets ? gen_reg_rtx (mode) : target);
4547           else if ((final_invert || i < (insns - 1)) && subtargets)
4548             new_src = gen_reg_rtx (mode);
4549           else
4550             new_src = target;
4551
4552           if (can_invert)
4553             temp1 = ~temp1;
4554           else if (can_negate)
4555             temp1 = -temp1;
4556
4557           temp1 = trunc_int_for_mode (temp1, mode);
4558           temp1_rtx = GEN_INT (temp1);
4559
4560           if (code == SET)
4561             ;
4562           else if (code == MINUS)
4563             temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4564           else
4565             temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4566
4567           emit_constant_insn (cond,
4568                               gen_rtx_SET (VOIDmode, new_src,
4569                                            temp1_rtx));
4570           source = new_src;
4571
4572           if (code == SET)
4573             {
4574               can_negate = can_invert;
4575               can_invert = 0;
4576               code = PLUS;
4577             }
4578           else if (code == MINUS)
4579             code = PLUS;
4580         }
4581     }
4582
4583   if (final_invert)
4584     {
4585       if (generate)
4586         emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
4587                                                gen_rtx_NOT (mode, source)));
4588       insns++;
4589     }
4590
4591   return insns;
4592 }
4593
4594 /* Canonicalize a comparison so that we are more likely to recognize it.
4595    This can be done for a few constant compares, where we can make the
4596    immediate value easier to load.  */
4597
4598 static void
4599 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4600                              bool op0_preserve_value)
4601 {
4602   machine_mode mode;
4603   unsigned HOST_WIDE_INT i, maxval;
4604
4605   mode = GET_MODE (*op0);
4606   if (mode == VOIDmode)
4607     mode = GET_MODE (*op1);
4608
4609   maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4610
4611   /* For DImode, we have GE/LT/GEU/LTU comparisons.  In ARM mode
4612      we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be either
4613      reversed or (for constant OP1) adjusted to GE/LT.  Similarly
4614      for GTU/LEU in Thumb mode.  */
4615   if (mode == DImode)
4616     {
4617
4618       if (*code == GT || *code == LE
4619           || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4620         {
4621           /* Missing comparison.  First try to use an available
4622              comparison.  */
4623           if (CONST_INT_P (*op1))
4624             {
4625               i = INTVAL (*op1);
4626               switch (*code)
4627                 {
4628                 case GT:
4629                 case LE:
4630                   if (i != maxval
4631                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4632                     {
4633                       *op1 = GEN_INT (i + 1);
4634                       *code = *code == GT ? GE : LT;
4635                       return;
4636                     }
4637                   break;
4638                 case GTU:
4639                 case LEU:
4640                   if (i != ~((unsigned HOST_WIDE_INT) 0)
4641                       && arm_const_double_by_immediates (GEN_INT (i + 1)))
4642                     {
4643                       *op1 = GEN_INT (i + 1);
4644                       *code = *code == GTU ? GEU : LTU;
4645                       return;
4646                     }
4647                   break;
4648                 default:
4649                   gcc_unreachable ();
4650                 }
4651             }
4652
4653           /* If that did not work, reverse the condition.  */
4654           if (!op0_preserve_value)
4655             {
4656               std::swap (*op0, *op1);
4657               *code = (int)swap_condition ((enum rtx_code)*code);
4658             }
4659         }
4660       return;
4661     }
4662
4663   /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4664      with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4665      to facilitate possible combining with a cmp into 'ands'.  */
4666   if (mode == SImode
4667       && GET_CODE (*op0) == ZERO_EXTEND
4668       && GET_CODE (XEXP (*op0, 0)) == SUBREG
4669       && GET_MODE (XEXP (*op0, 0)) == QImode
4670       && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4671       && subreg_lowpart_p (XEXP (*op0, 0))
4672       && *op1 == const0_rtx)
4673     *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4674                         GEN_INT (255));
4675
4676   /* Comparisons smaller than DImode.  Only adjust comparisons against
4677      an out-of-range constant.  */
4678   if (!CONST_INT_P (*op1)
4679       || const_ok_for_arm (INTVAL (*op1))
4680       || const_ok_for_arm (- INTVAL (*op1)))
4681     return;
4682
4683   i = INTVAL (*op1);
4684
4685   switch (*code)
4686     {
4687     case EQ:
4688     case NE:
4689       return;
4690
4691     case GT:
4692     case LE:
4693       if (i != maxval
4694           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4695         {
4696           *op1 = GEN_INT (i + 1);
4697           *code = *code == GT ? GE : LT;
4698           return;
4699         }
4700       break;
4701
4702     case GE:
4703     case LT:
4704       if (i != ~maxval
4705           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4706         {
4707           *op1 = GEN_INT (i - 1);
4708           *code = *code == GE ? GT : LE;
4709           return;
4710         }
4711       break;
4712
4713     case GTU:
4714     case LEU:
4715       if (i != ~((unsigned HOST_WIDE_INT) 0)
4716           && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4717         {
4718           *op1 = GEN_INT (i + 1);
4719           *code = *code == GTU ? GEU : LTU;
4720           return;
4721         }
4722       break;
4723
4724     case GEU:
4725     case LTU:
4726       if (i != 0
4727           && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4728         {
4729           *op1 = GEN_INT (i - 1);
4730           *code = *code == GEU ? GTU : LEU;
4731           return;
4732         }
4733       break;
4734
4735     default:
4736       gcc_unreachable ();
4737     }
4738 }
4739
4740
4741 /* Define how to find the value returned by a function.  */
4742
4743 static rtx
4744 arm_function_value(const_tree type, const_tree func,
4745                    bool outgoing ATTRIBUTE_UNUSED)
4746 {
4747   machine_mode mode;
4748   int unsignedp ATTRIBUTE_UNUSED;
4749   rtx r ATTRIBUTE_UNUSED;
4750
4751   mode = TYPE_MODE (type);
4752
4753   if (TARGET_AAPCS_BASED)
4754     return aapcs_allocate_return_reg (mode, type, func);
4755
4756   /* Promote integer types.  */
4757   if (INTEGRAL_TYPE_P (type))
4758     mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4759
4760   /* Promotes small structs returned in a register to full-word size
4761      for big-endian AAPCS.  */
4762   if (arm_return_in_msb (type))
4763     {
4764       HOST_WIDE_INT size = int_size_in_bytes (type);
4765       if (size % UNITS_PER_WORD != 0)
4766         {
4767           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4768           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4769         }
4770     }
4771
4772   return arm_libcall_value_1 (mode);
4773 }
4774
4775 /* libcall hashtable helpers.  */
4776
4777 struct libcall_hasher : typed_noop_remove <rtx_def>
4778 {
4779   typedef rtx_def value_type;
4780   typedef rtx_def compare_type;
4781   static inline hashval_t hash (const value_type *);
4782   static inline bool equal (const value_type *, const compare_type *);
4783   static inline void remove (value_type *);
4784 };
4785
4786 inline bool
4787 libcall_hasher::equal (const value_type *p1, const compare_type *p2)
4788 {
4789   return rtx_equal_p (p1, p2);
4790 }
4791
4792 inline hashval_t
4793 libcall_hasher::hash (const value_type *p1)
4794 {
4795   return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
4796 }
4797
4798 typedef hash_table<libcall_hasher> libcall_table_type;
4799
4800 static void
4801 add_libcall (libcall_table_type *htab, rtx libcall)
4802 {
4803   *htab->find_slot (libcall, INSERT) = libcall;
4804 }
4805
4806 static bool
4807 arm_libcall_uses_aapcs_base (const_rtx libcall)
4808 {
4809   static bool init_done = false;
4810   static libcall_table_type *libcall_htab = NULL;
4811
4812   if (!init_done)
4813     {
4814       init_done = true;
4815
4816       libcall_htab = new libcall_table_type (31);
4817       add_libcall (libcall_htab,
4818                    convert_optab_libfunc (sfloat_optab, SFmode, SImode));
4819       add_libcall (libcall_htab,
4820                    convert_optab_libfunc (sfloat_optab, DFmode, SImode));
4821       add_libcall (libcall_htab,
4822                    convert_optab_libfunc (sfloat_optab, SFmode, DImode));
4823       add_libcall (libcall_htab,
4824                    convert_optab_libfunc (sfloat_optab, DFmode, DImode));
4825
4826       add_libcall (libcall_htab,
4827                    convert_optab_libfunc (ufloat_optab, SFmode, SImode));
4828       add_libcall (libcall_htab,
4829                    convert_optab_libfunc (ufloat_optab, DFmode, SImode));
4830       add_libcall (libcall_htab,
4831                    convert_optab_libfunc (ufloat_optab, SFmode, DImode));
4832       add_libcall (libcall_htab,
4833                    convert_optab_libfunc (ufloat_optab, DFmode, DImode));
4834
4835       add_libcall (libcall_htab,
4836                    convert_optab_libfunc (sext_optab, SFmode, HFmode));
4837       add_libcall (libcall_htab,
4838                    convert_optab_libfunc (trunc_optab, HFmode, SFmode));
4839       add_libcall (libcall_htab,
4840                    convert_optab_libfunc (sfix_optab, SImode, DFmode));
4841       add_libcall (libcall_htab,
4842                    convert_optab_libfunc (ufix_optab, SImode, DFmode));
4843       add_libcall (libcall_htab,
4844                    convert_optab_libfunc (sfix_optab, DImode, DFmode));
4845       add_libcall (libcall_htab,
4846                    convert_optab_libfunc (ufix_optab, DImode, DFmode));
4847       add_libcall (libcall_htab,
4848                    convert_optab_libfunc (sfix_optab, DImode, SFmode));
4849       add_libcall (libcall_htab,
4850                    convert_optab_libfunc (ufix_optab, DImode, SFmode));
4851
4852       /* Values from double-precision helper functions are returned in core
4853          registers if the selected core only supports single-precision
4854          arithmetic, even if we are using the hard-float ABI.  The same is
4855          true for single-precision helpers, but we will never be using the
4856          hard-float ABI on a CPU which doesn't support single-precision
4857          operations in hardware.  */
4858       add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
4859       add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
4860       add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
4861       add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
4862       add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
4863       add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
4864       add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
4865       add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
4866       add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
4867       add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
4868       add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
4869       add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
4870                                                         SFmode));
4871       add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
4872                                                         DFmode));
4873     }
4874
4875   return libcall && libcall_htab->find (libcall) != NULL;
4876 }
4877
4878 static rtx
4879 arm_libcall_value_1 (machine_mode mode)
4880 {
4881   if (TARGET_AAPCS_BASED)
4882     return aapcs_libcall_value (mode);
4883   else if (TARGET_IWMMXT_ABI
4884            && arm_vector_mode_supported_p (mode))
4885     return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
4886   else
4887     return gen_rtx_REG (mode, ARG_REGISTER (1));
4888 }
4889
4890 /* Define how to find the value returned by a library function
4891    assuming the value has mode MODE.  */
4892
4893 static rtx
4894 arm_libcall_value (machine_mode mode, const_rtx libcall)
4895 {
4896   if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
4897       && GET_MODE_CLASS (mode) == MODE_FLOAT)
4898     {
4899       /* The following libcalls return their result in integer registers,
4900          even though they return a floating point value.  */
4901       if (arm_libcall_uses_aapcs_base (libcall))
4902         return gen_rtx_REG (mode, ARG_REGISTER(1));
4903
4904     }
4905
4906   return arm_libcall_value_1 (mode);
4907 }
4908
4909 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
4910
4911 static bool
4912 arm_function_value_regno_p (const unsigned int regno)
4913 {
4914   if (regno == ARG_REGISTER (1)
4915       || (TARGET_32BIT
4916           && TARGET_AAPCS_BASED
4917           && TARGET_VFP
4918           && TARGET_HARD_FLOAT
4919           && regno == FIRST_VFP_REGNUM)
4920       || (TARGET_IWMMXT_ABI
4921           && regno == FIRST_IWMMXT_REGNUM))
4922     return true;
4923
4924   return false;
4925 }
4926
4927 /* Determine the amount of memory needed to store the possible return
4928    registers of an untyped call.  */
4929 int
4930 arm_apply_result_size (void)
4931 {
4932   int size = 16;
4933
4934   if (TARGET_32BIT)
4935     {
4936       if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
4937         size += 32;
4938       if (TARGET_IWMMXT_ABI)
4939         size += 8;
4940     }
4941
4942   return size;
4943 }
4944
4945 /* Decide whether TYPE should be returned in memory (true)
4946    or in a register (false).  FNTYPE is the type of the function making
4947    the call.  */
4948 static bool
4949 arm_return_in_memory (const_tree type, const_tree fntype)
4950 {
4951   HOST_WIDE_INT size;
4952
4953   size = int_size_in_bytes (type);  /* Negative if not fixed size.  */
4954
4955   if (TARGET_AAPCS_BASED)
4956     {
4957       /* Simple, non-aggregate types (ie not including vectors and
4958          complex) are always returned in a register (or registers).
4959          We don't care about which register here, so we can short-cut
4960          some of the detail.  */
4961       if (!AGGREGATE_TYPE_P (type)
4962           && TREE_CODE (type) != VECTOR_TYPE
4963           && TREE_CODE (type) != COMPLEX_TYPE)
4964         return false;
4965
4966       /* Any return value that is no larger than one word can be
4967          returned in r0.  */
4968       if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
4969         return false;
4970
4971       /* Check any available co-processors to see if they accept the
4972          type as a register candidate (VFP, for example, can return
4973          some aggregates in consecutive registers).  These aren't
4974          available if the call is variadic.  */
4975       if (aapcs_select_return_coproc (type, fntype) >= 0)
4976         return false;
4977
4978       /* Vector values should be returned using ARM registers, not
4979          memory (unless they're over 16 bytes, which will break since
4980          we only have four call-clobbered registers to play with).  */
4981       if (TREE_CODE (type) == VECTOR_TYPE)
4982         return (size < 0 || size > (4 * UNITS_PER_WORD));
4983
4984       /* The rest go in memory.  */
4985       return true;
4986     }
4987
4988   if (TREE_CODE (type) == VECTOR_TYPE)
4989     return (size < 0 || size > (4 * UNITS_PER_WORD));
4990
4991   if (!AGGREGATE_TYPE_P (type) &&
4992       (TREE_CODE (type) != VECTOR_TYPE))
4993     /* All simple types are returned in registers.  */
4994     return false;
4995
4996   if (arm_abi != ARM_ABI_APCS)
4997     {
4998       /* ATPCS and later return aggregate types in memory only if they are
4999          larger than a word (or are variable size).  */
5000       return (size < 0 || size > UNITS_PER_WORD);
5001     }
5002
5003   /* For the arm-wince targets we choose to be compatible with Microsoft's
5004      ARM and Thumb compilers, which always return aggregates in memory.  */
5005 #ifndef ARM_WINCE
5006   /* All structures/unions bigger than one word are returned in memory.
5007      Also catch the case where int_size_in_bytes returns -1.  In this case
5008      the aggregate is either huge or of variable size, and in either case
5009      we will want to return it via memory and not in a register.  */
5010   if (size < 0 || size > UNITS_PER_WORD)
5011     return true;
5012
5013   if (TREE_CODE (type) == RECORD_TYPE)
5014     {
5015       tree field;
5016
5017       /* For a struct the APCS says that we only return in a register
5018          if the type is 'integer like' and every addressable element
5019          has an offset of zero.  For practical purposes this means
5020          that the structure can have at most one non bit-field element
5021          and that this element must be the first one in the structure.  */
5022
5023       /* Find the first field, ignoring non FIELD_DECL things which will
5024          have been created by C++.  */
5025       for (field = TYPE_FIELDS (type);
5026            field && TREE_CODE (field) != FIELD_DECL;
5027            field = DECL_CHAIN (field))
5028         continue;
5029
5030       if (field == NULL)
5031         return false; /* An empty structure.  Allowed by an extension to ANSI C.  */
5032
5033       /* Check that the first field is valid for returning in a register.  */
5034
5035       /* ... Floats are not allowed */
5036       if (FLOAT_TYPE_P (TREE_TYPE (field)))
5037         return true;
5038
5039       /* ... Aggregates that are not themselves valid for returning in
5040          a register are not allowed.  */
5041       if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5042         return true;
5043
5044       /* Now check the remaining fields, if any.  Only bitfields are allowed,
5045          since they are not addressable.  */
5046       for (field = DECL_CHAIN (field);
5047            field;
5048            field = DECL_CHAIN (field))
5049         {
5050           if (TREE_CODE (field) != FIELD_DECL)
5051             continue;
5052
5053           if (!DECL_BIT_FIELD_TYPE (field))
5054             return true;
5055         }
5056
5057       return false;
5058     }
5059
5060   if (TREE_CODE (type) == UNION_TYPE)
5061     {
5062       tree field;
5063
5064       /* Unions can be returned in registers if every element is
5065          integral, or can be returned in an integer register.  */
5066       for (field = TYPE_FIELDS (type);
5067            field;
5068            field = DECL_CHAIN (field))
5069         {
5070           if (TREE_CODE (field) != FIELD_DECL)
5071             continue;
5072
5073           if (FLOAT_TYPE_P (TREE_TYPE (field)))
5074             return true;
5075
5076           if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5077             return true;
5078         }
5079
5080       return false;
5081     }
5082 #endif /* not ARM_WINCE */
5083
5084   /* Return all other types in memory.  */
5085   return true;
5086 }
5087
5088 const struct pcs_attribute_arg
5089 {
5090   const char *arg;
5091   enum arm_pcs value;
5092 } pcs_attribute_args[] =
5093   {
5094     {"aapcs", ARM_PCS_AAPCS},
5095     {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5096 #if 0
5097     /* We could recognize these, but changes would be needed elsewhere
5098      * to implement them.  */
5099     {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5100     {"atpcs", ARM_PCS_ATPCS},
5101     {"apcs", ARM_PCS_APCS},
5102 #endif
5103     {NULL, ARM_PCS_UNKNOWN}
5104   };
5105
5106 static enum arm_pcs
5107 arm_pcs_from_attribute (tree attr)
5108 {
5109   const struct pcs_attribute_arg *ptr;
5110   const char *arg;
5111
5112   /* Get the value of the argument.  */
5113   if (TREE_VALUE (attr) == NULL_TREE
5114       || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5115     return ARM_PCS_UNKNOWN;
5116
5117   arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5118
5119   /* Check it against the list of known arguments.  */
5120   for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5121     if (streq (arg, ptr->arg))
5122       return ptr->value;
5123
5124   /* An unrecognized interrupt type.  */
5125   return ARM_PCS_UNKNOWN;
5126 }
5127
5128 /* Get the PCS variant to use for this call.  TYPE is the function's type
5129    specification, DECL is the specific declartion.  DECL may be null if
5130    the call could be indirect or if this is a library call.  */
5131 static enum arm_pcs
5132 arm_get_pcs_model (const_tree type, const_tree decl)
5133 {
5134   bool user_convention = false;
5135   enum arm_pcs user_pcs = arm_pcs_default;
5136   tree attr;
5137
5138   gcc_assert (type);
5139
5140   attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5141   if (attr)
5142     {
5143       user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5144       user_convention = true;
5145     }
5146
5147   if (TARGET_AAPCS_BASED)
5148     {
5149       /* Detect varargs functions.  These always use the base rules
5150          (no argument is ever a candidate for a co-processor
5151          register).  */
5152       bool base_rules = stdarg_p (type);
5153
5154       if (user_convention)
5155         {
5156           if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5157             sorry ("non-AAPCS derived PCS variant");
5158           else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5159             error ("variadic functions must use the base AAPCS variant");
5160         }
5161
5162       if (base_rules)
5163         return ARM_PCS_AAPCS;
5164       else if (user_convention)
5165         return user_pcs;
5166       else if (decl && flag_unit_at_a_time)
5167         {
5168           /* Local functions never leak outside this compilation unit,
5169              so we are free to use whatever conventions are
5170              appropriate.  */
5171           /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
5172           cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5173           if (i && i->local)
5174             return ARM_PCS_AAPCS_LOCAL;
5175         }
5176     }
5177   else if (user_convention && user_pcs != arm_pcs_default)
5178     sorry ("PCS variant");
5179
5180   /* For everything else we use the target's default.  */
5181   return arm_pcs_default;
5182 }
5183
5184
5185 static void
5186 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5187                     const_tree fntype ATTRIBUTE_UNUSED,
5188                     rtx libcall ATTRIBUTE_UNUSED,
5189                     const_tree fndecl ATTRIBUTE_UNUSED)
5190 {
5191   /* Record the unallocated VFP registers.  */
5192   pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5193   pcum->aapcs_vfp_reg_alloc = 0;
5194 }
5195
5196 /* Walk down the type tree of TYPE counting consecutive base elements.
5197    If *MODEP is VOIDmode, then set it to the first valid floating point
5198    type.  If a non-floating point type is found, or if a floating point
5199    type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5200    otherwise return the count in the sub-tree.  */
5201 static int
5202 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5203 {
5204   machine_mode mode;
5205   HOST_WIDE_INT size;
5206
5207   switch (TREE_CODE (type))
5208     {
5209     case REAL_TYPE:
5210       mode = TYPE_MODE (type);
5211       if (mode != DFmode && mode != SFmode)
5212         return -1;
5213
5214       if (*modep == VOIDmode)
5215         *modep = mode;
5216
5217       if (*modep == mode)
5218         return 1;
5219
5220       break;
5221
5222     case COMPLEX_TYPE:
5223       mode = TYPE_MODE (TREE_TYPE (type));
5224       if (mode != DFmode && mode != SFmode)
5225         return -1;
5226
5227       if (*modep == VOIDmode)
5228         *modep = mode;
5229
5230       if (*modep == mode)
5231         return 2;
5232
5233       break;
5234
5235     case VECTOR_TYPE:
5236       /* Use V2SImode and V4SImode as representatives of all 64-bit
5237          and 128-bit vector types, whether or not those modes are
5238          supported with the present options.  */
5239       size = int_size_in_bytes (type);
5240       switch (size)
5241         {
5242         case 8:
5243           mode = V2SImode;
5244           break;
5245         case 16:
5246           mode = V4SImode;
5247           break;
5248         default:
5249           return -1;
5250         }
5251
5252       if (*modep == VOIDmode)
5253         *modep = mode;
5254
5255       /* Vector modes are considered to be opaque: two vectors are
5256          equivalent for the purposes of being homogeneous aggregates
5257          if they are the same size.  */
5258       if (*modep == mode)
5259         return 1;
5260
5261       break;
5262
5263     case ARRAY_TYPE:
5264       {
5265         int count;
5266         tree index = TYPE_DOMAIN (type);
5267
5268         /* Can't handle incomplete types nor sizes that are not
5269            fixed.  */
5270         if (!COMPLETE_TYPE_P (type)
5271             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5272           return -1;
5273
5274         count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5275         if (count == -1
5276             || !index
5277             || !TYPE_MAX_VALUE (index)
5278             || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5279             || !TYPE_MIN_VALUE (index)
5280             || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5281             || count < 0)
5282           return -1;
5283
5284         count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5285                       - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5286
5287         /* There must be no padding.  */
5288         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5289           return -1;
5290
5291         return count;
5292       }
5293
5294     case RECORD_TYPE:
5295       {
5296         int count = 0;
5297         int sub_count;
5298         tree field;
5299
5300         /* Can't handle incomplete types nor sizes that are not
5301            fixed.  */
5302         if (!COMPLETE_TYPE_P (type)
5303             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5304           return -1;
5305
5306         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5307           {
5308             if (TREE_CODE (field) != FIELD_DECL)
5309               continue;
5310
5311             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5312             if (sub_count < 0)
5313               return -1;
5314             count += sub_count;
5315           }
5316
5317         /* There must be no padding.  */
5318         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5319           return -1;
5320
5321         return count;
5322       }
5323
5324     case UNION_TYPE:
5325     case QUAL_UNION_TYPE:
5326       {
5327         /* These aren't very interesting except in a degenerate case.  */
5328         int count = 0;
5329         int sub_count;
5330         tree field;
5331
5332         /* Can't handle incomplete types nor sizes that are not
5333            fixed.  */
5334         if (!COMPLETE_TYPE_P (type)
5335             || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5336           return -1;
5337
5338         for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5339           {
5340             if (TREE_CODE (field) != FIELD_DECL)
5341               continue;
5342
5343             sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5344             if (sub_count < 0)
5345               return -1;
5346             count = count > sub_count ? count : sub_count;
5347           }
5348
5349         /* There must be no padding.  */
5350         if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5351           return -1;
5352
5353         return count;
5354       }
5355
5356     default:
5357       break;
5358     }
5359
5360   return -1;
5361 }
5362
5363 /* Return true if PCS_VARIANT should use VFP registers.  */
5364 static bool
5365 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5366 {
5367   if (pcs_variant == ARM_PCS_AAPCS_VFP)
5368     {
5369       static bool seen_thumb1_vfp = false;
5370
5371       if (TARGET_THUMB1 && !seen_thumb1_vfp)
5372         {
5373           sorry ("Thumb-1 hard-float VFP ABI");
5374           /* sorry() is not immediately fatal, so only display this once.  */
5375           seen_thumb1_vfp = true;
5376         }
5377
5378       return true;
5379     }
5380
5381   if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5382     return false;
5383
5384   return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5385           (TARGET_VFP_DOUBLE || !is_double));
5386 }
5387
5388 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5389    suitable for passing or returning in VFP registers for the PCS
5390    variant selected.  If it is, then *BASE_MODE is updated to contain
5391    a machine mode describing each element of the argument's type and
5392    *COUNT to hold the number of such elements.  */
5393 static bool
5394 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5395                                        machine_mode mode, const_tree type,
5396                                        machine_mode *base_mode, int *count)
5397 {
5398   machine_mode new_mode = VOIDmode;
5399
5400   /* If we have the type information, prefer that to working things
5401      out from the mode.  */
5402   if (type)
5403     {
5404       int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5405
5406       if (ag_count > 0 && ag_count <= 4)
5407         *count = ag_count;
5408       else
5409         return false;
5410     }
5411   else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5412            || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5413            || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5414     {
5415       *count = 1;
5416       new_mode = mode;
5417     }
5418   else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5419     {
5420       *count = 2;
5421       new_mode = (mode == DCmode ? DFmode : SFmode);
5422     }
5423   else
5424     return false;
5425
5426
5427   if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5428     return false;
5429
5430   *base_mode = new_mode;
5431   return true;
5432 }
5433
5434 static bool
5435 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5436                                machine_mode mode, const_tree type)
5437 {
5438   int count ATTRIBUTE_UNUSED;
5439   machine_mode ag_mode ATTRIBUTE_UNUSED;
5440
5441   if (!use_vfp_abi (pcs_variant, false))
5442     return false;
5443   return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5444                                                 &ag_mode, &count);
5445 }
5446
5447 static bool
5448 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5449                              const_tree type)
5450 {
5451   if (!use_vfp_abi (pcum->pcs_variant, false))
5452     return false;
5453
5454   return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5455                                                 &pcum->aapcs_vfp_rmode,
5456                                                 &pcum->aapcs_vfp_rcount);
5457 }
5458
5459 static bool
5460 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5461                     const_tree type  ATTRIBUTE_UNUSED)
5462 {
5463   int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5464   unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5465   int regno;
5466
5467   for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5468     if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5469       {
5470         pcum->aapcs_vfp_reg_alloc = mask << regno;
5471         if (mode == BLKmode
5472             || (mode == TImode && ! TARGET_NEON)
5473             || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5474           {
5475             int i;
5476             int rcount = pcum->aapcs_vfp_rcount;
5477             int rshift = shift;
5478             machine_mode rmode = pcum->aapcs_vfp_rmode;
5479             rtx par;
5480             if (!TARGET_NEON)
5481               {
5482                 /* Avoid using unsupported vector modes.  */
5483                 if (rmode == V2SImode)
5484                   rmode = DImode;
5485                 else if (rmode == V4SImode)
5486                   {
5487                     rmode = DImode;
5488                     rcount *= 2;
5489                     rshift /= 2;
5490                   }
5491               }
5492             par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5493             for (i = 0; i < rcount; i++)
5494               {
5495                 rtx tmp = gen_rtx_REG (rmode,
5496                                        FIRST_VFP_REGNUM + regno + i * rshift);
5497                 tmp = gen_rtx_EXPR_LIST
5498                   (VOIDmode, tmp,
5499                    GEN_INT (i * GET_MODE_SIZE (rmode)));
5500                 XVECEXP (par, 0, i) = tmp;
5501               }
5502
5503             pcum->aapcs_reg = par;
5504           }
5505         else
5506           pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5507         return true;
5508       }
5509   return false;
5510 }
5511
5512 static rtx
5513 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5514                                machine_mode mode,
5515                                const_tree type ATTRIBUTE_UNUSED)
5516 {
5517   if (!use_vfp_abi (pcs_variant, false))
5518     return NULL;
5519
5520   if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5521     {
5522       int count;
5523       machine_mode ag_mode;
5524       int i;
5525       rtx par;
5526       int shift;
5527
5528       aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5529                                              &ag_mode, &count);
5530
5531       if (!TARGET_NEON)
5532         {
5533           if (ag_mode == V2SImode)
5534             ag_mode = DImode;
5535           else if (ag_mode == V4SImode)
5536             {
5537               ag_mode = DImode;
5538               count *= 2;
5539             }
5540         }
5541       shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5542       par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5543       for (i = 0; i < count; i++)
5544         {
5545           rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5546           tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5547                                    GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5548           XVECEXP (par, 0, i) = tmp;
5549         }
5550
5551       return par;
5552     }
5553
5554   return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5555 }
5556
5557 static void
5558 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
5559                    machine_mode mode  ATTRIBUTE_UNUSED,
5560                    const_tree type  ATTRIBUTE_UNUSED)
5561 {
5562   pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5563   pcum->aapcs_vfp_reg_alloc = 0;
5564   return;
5565 }
5566
5567 #define AAPCS_CP(X)                             \
5568   {                                             \
5569     aapcs_ ## X ## _cum_init,                   \
5570     aapcs_ ## X ## _is_call_candidate,          \
5571     aapcs_ ## X ## _allocate,                   \
5572     aapcs_ ## X ## _is_return_candidate,        \
5573     aapcs_ ## X ## _allocate_return_reg,        \
5574     aapcs_ ## X ## _advance                     \
5575   }
5576
5577 /* Table of co-processors that can be used to pass arguments in
5578    registers.  Idealy no arugment should be a candidate for more than
5579    one co-processor table entry, but the table is processed in order
5580    and stops after the first match.  If that entry then fails to put
5581    the argument into a co-processor register, the argument will go on
5582    the stack.  */
5583 static struct
5584 {
5585   /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
5586   void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5587
5588   /* Return true if an argument of mode MODE (or type TYPE if MODE is
5589      BLKmode) is a candidate for this co-processor's registers; this
5590      function should ignore any position-dependent state in
5591      CUMULATIVE_ARGS and only use call-type dependent information.  */
5592   bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5593
5594   /* Return true if the argument does get a co-processor register; it
5595      should set aapcs_reg to an RTX of the register allocated as is
5596      required for a return from FUNCTION_ARG.  */
5597   bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5598
5599   /* Return true if a result of mode MODE (or type TYPE if MODE is
5600      BLKmode) is can be returned in this co-processor's registers.  */
5601   bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5602
5603   /* Allocate and return an RTX element to hold the return type of a
5604      call, this routine must not fail and will only be called if
5605      is_return_candidate returned true with the same parameters.  */
5606   rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5607
5608   /* Finish processing this argument and prepare to start processing
5609      the next one.  */
5610   void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5611 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5612   {
5613     AAPCS_CP(vfp)
5614   };
5615
5616 #undef AAPCS_CP
5617
5618 static int
5619 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5620                           const_tree type)
5621 {
5622   int i;
5623
5624   for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5625     if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5626       return i;
5627
5628   return -1;
5629 }
5630
5631 static int
5632 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5633 {
5634   /* We aren't passed a decl, so we can't check that a call is local.
5635      However, it isn't clear that that would be a win anyway, since it
5636      might limit some tail-calling opportunities.  */
5637   enum arm_pcs pcs_variant;
5638
5639   if (fntype)
5640     {
5641       const_tree fndecl = NULL_TREE;
5642
5643       if (TREE_CODE (fntype) == FUNCTION_DECL)
5644         {
5645           fndecl = fntype;
5646           fntype = TREE_TYPE (fntype);
5647         }
5648
5649       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5650     }
5651   else
5652     pcs_variant = arm_pcs_default;
5653
5654   if (pcs_variant != ARM_PCS_AAPCS)
5655     {
5656       int i;
5657
5658       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5659         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5660                                                         TYPE_MODE (type),
5661                                                         type))
5662           return i;
5663     }
5664   return -1;
5665 }
5666
5667 static rtx
5668 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5669                            const_tree fntype)
5670 {
5671   /* We aren't passed a decl, so we can't check that a call is local.
5672      However, it isn't clear that that would be a win anyway, since it
5673      might limit some tail-calling opportunities.  */
5674   enum arm_pcs pcs_variant;
5675   int unsignedp ATTRIBUTE_UNUSED;
5676
5677   if (fntype)
5678     {
5679       const_tree fndecl = NULL_TREE;
5680
5681       if (TREE_CODE (fntype) == FUNCTION_DECL)
5682         {
5683           fndecl = fntype;
5684           fntype = TREE_TYPE (fntype);
5685         }
5686
5687       pcs_variant = arm_get_pcs_model (fntype, fndecl);
5688     }
5689   else
5690     pcs_variant = arm_pcs_default;
5691
5692   /* Promote integer types.  */
5693   if (type && INTEGRAL_TYPE_P (type))
5694     mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5695
5696   if (pcs_variant != ARM_PCS_AAPCS)
5697     {
5698       int i;
5699
5700       for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5701         if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5702                                                         type))
5703           return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5704                                                              mode, type);
5705     }
5706
5707   /* Promotes small structs returned in a register to full-word size
5708      for big-endian AAPCS.  */
5709   if (type && arm_return_in_msb (type))
5710     {
5711       HOST_WIDE_INT size = int_size_in_bytes (type);
5712       if (size % UNITS_PER_WORD != 0)
5713         {
5714           size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5715           mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5716         }
5717     }
5718
5719   return gen_rtx_REG (mode, R0_REGNUM);
5720 }
5721
5722 static rtx
5723 aapcs_libcall_value (machine_mode mode)
5724 {
5725   if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5726       && GET_MODE_SIZE (mode) <= 4)
5727     mode = SImode;
5728
5729   return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5730 }
5731
5732 /* Lay out a function argument using the AAPCS rules.  The rule
5733    numbers referred to here are those in the AAPCS.  */
5734 static void
5735 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5736                   const_tree type, bool named)
5737 {
5738   int nregs, nregs2;
5739   int ncrn;
5740
5741   /* We only need to do this once per argument.  */
5742   if (pcum->aapcs_arg_processed)
5743     return;
5744
5745   pcum->aapcs_arg_processed = true;
5746
5747   /* Special case: if named is false then we are handling an incoming
5748      anonymous argument which is on the stack.  */
5749   if (!named)
5750     return;
5751
5752   /* Is this a potential co-processor register candidate?  */
5753   if (pcum->pcs_variant != ARM_PCS_AAPCS)
5754     {
5755       int slot = aapcs_select_call_coproc (pcum, mode, type);
5756       pcum->aapcs_cprc_slot = slot;
5757
5758       /* We don't have to apply any of the rules from part B of the
5759          preparation phase, these are handled elsewhere in the
5760          compiler.  */
5761
5762       if (slot >= 0)
5763         {
5764           /* A Co-processor register candidate goes either in its own
5765              class of registers or on the stack.  */
5766           if (!pcum->aapcs_cprc_failed[slot])
5767             {
5768               /* C1.cp - Try to allocate the argument to co-processor
5769                  registers.  */
5770               if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5771                 return;
5772
5773               /* C2.cp - Put the argument on the stack and note that we
5774                  can't assign any more candidates in this slot.  We also
5775                  need to note that we have allocated stack space, so that
5776                  we won't later try to split a non-cprc candidate between
5777                  core registers and the stack.  */
5778               pcum->aapcs_cprc_failed[slot] = true;
5779               pcum->can_split = false;
5780             }
5781
5782           /* We didn't get a register, so this argument goes on the
5783              stack.  */
5784           gcc_assert (pcum->can_split == false);
5785           return;
5786         }
5787     }
5788
5789   /* C3 - For double-word aligned arguments, round the NCRN up to the
5790      next even number.  */
5791   ncrn = pcum->aapcs_ncrn;
5792   if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
5793     ncrn++;
5794
5795   nregs = ARM_NUM_REGS2(mode, type);
5796
5797   /* Sigh, this test should really assert that nregs > 0, but a GCC
5798      extension allows empty structs and then gives them empty size; it
5799      then allows such a structure to be passed by value.  For some of
5800      the code below we have to pretend that such an argument has
5801      non-zero size so that we 'locate' it correctly either in
5802      registers or on the stack.  */
5803   gcc_assert (nregs >= 0);
5804
5805   nregs2 = nregs ? nregs : 1;
5806
5807   /* C4 - Argument fits entirely in core registers.  */
5808   if (ncrn + nregs2 <= NUM_ARG_REGS)
5809     {
5810       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5811       pcum->aapcs_next_ncrn = ncrn + nregs;
5812       return;
5813     }
5814
5815   /* C5 - Some core registers left and there are no arguments already
5816      on the stack: split this argument between the remaining core
5817      registers and the stack.  */
5818   if (ncrn < NUM_ARG_REGS && pcum->can_split)
5819     {
5820       pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
5821       pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5822       pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
5823       return;
5824     }
5825
5826   /* C6 - NCRN is set to 4.  */
5827   pcum->aapcs_next_ncrn = NUM_ARG_REGS;
5828
5829   /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
5830   return;
5831 }
5832
5833 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5834    for a call to a function whose data type is FNTYPE.
5835    For a library call, FNTYPE is NULL.  */
5836 void
5837 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
5838                           rtx libname,
5839                           tree fndecl ATTRIBUTE_UNUSED)
5840 {
5841   /* Long call handling.  */
5842   if (fntype)
5843     pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
5844   else
5845     pcum->pcs_variant = arm_pcs_default;
5846
5847   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5848     {
5849       if (arm_libcall_uses_aapcs_base (libname))
5850         pcum->pcs_variant = ARM_PCS_AAPCS;
5851
5852       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
5853       pcum->aapcs_reg = NULL_RTX;
5854       pcum->aapcs_partial = 0;
5855       pcum->aapcs_arg_processed = false;
5856       pcum->aapcs_cprc_slot = -1;
5857       pcum->can_split = true;
5858
5859       if (pcum->pcs_variant != ARM_PCS_AAPCS)
5860         {
5861           int i;
5862
5863           for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5864             {
5865               pcum->aapcs_cprc_failed[i] = false;
5866               aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
5867             }
5868         }
5869       return;
5870     }
5871
5872   /* Legacy ABIs */
5873
5874   /* On the ARM, the offset starts at 0.  */
5875   pcum->nregs = 0;
5876   pcum->iwmmxt_nregs = 0;
5877   pcum->can_split = true;
5878
5879   /* Varargs vectors are treated the same as long long.
5880      named_count avoids having to change the way arm handles 'named' */
5881   pcum->named_count = 0;
5882   pcum->nargs = 0;
5883
5884   if (TARGET_REALLY_IWMMXT && fntype)
5885     {
5886       tree fn_arg;
5887
5888       for (fn_arg = TYPE_ARG_TYPES (fntype);
5889            fn_arg;
5890            fn_arg = TREE_CHAIN (fn_arg))
5891         pcum->named_count += 1;
5892
5893       if (! pcum->named_count)
5894         pcum->named_count = INT_MAX;
5895     }
5896 }
5897
5898 /* Return true if we use LRA instead of reload pass.  */
5899 static bool
5900 arm_lra_p (void)
5901 {
5902   return arm_lra_flag;
5903 }
5904
5905 /* Return true if mode/type need doubleword alignment.  */
5906 static bool
5907 arm_needs_doubleword_align (machine_mode mode, const_tree type)
5908 {
5909   return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
5910           || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
5911 }
5912
5913
5914 /* Determine where to put an argument to a function.
5915    Value is zero to push the argument on the stack,
5916    or a hard register in which to store the argument.
5917
5918    MODE is the argument's machine mode.
5919    TYPE is the data type of the argument (as a tree).
5920     This is null for libcalls where that information may
5921     not be available.
5922    CUM is a variable of type CUMULATIVE_ARGS which gives info about
5923     the preceding args and about the function being called.
5924    NAMED is nonzero if this argument is a named parameter
5925     (otherwise it is an extra parameter matching an ellipsis).
5926
5927    On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
5928    other arguments are passed on the stack.  If (NAMED == 0) (which happens
5929    only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
5930    defined), say it is passed in the stack (function_prologue will
5931    indeed make it pass in the stack if necessary).  */
5932
5933 static rtx
5934 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
5935                   const_tree type, bool named)
5936 {
5937   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5938   int nregs;
5939
5940   /* Handle the special case quickly.  Pick an arbitrary value for op2 of
5941      a call insn (op3 of a call_value insn).  */
5942   if (mode == VOIDmode)
5943     return const0_rtx;
5944
5945   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
5946     {
5947       aapcs_layout_arg (pcum, mode, type, named);
5948       return pcum->aapcs_reg;
5949     }
5950
5951   /* Varargs vectors are treated the same as long long.
5952      named_count avoids having to change the way arm handles 'named' */
5953   if (TARGET_IWMMXT_ABI
5954       && arm_vector_mode_supported_p (mode)
5955       && pcum->named_count > pcum->nargs + 1)
5956     {
5957       if (pcum->iwmmxt_nregs <= 9)
5958         return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
5959       else
5960         {
5961           pcum->can_split = false;
5962           return NULL_RTX;
5963         }
5964     }
5965
5966   /* Put doubleword aligned quantities in even register pairs.  */
5967   if (pcum->nregs & 1
5968       && ARM_DOUBLEWORD_ALIGN
5969       && arm_needs_doubleword_align (mode, type))
5970     pcum->nregs++;
5971
5972   /* Only allow splitting an arg between regs and memory if all preceding
5973      args were allocated to regs.  For args passed by reference we only count
5974      the reference pointer.  */
5975   if (pcum->can_split)
5976     nregs = 1;
5977   else
5978     nregs = ARM_NUM_REGS2 (mode, type);
5979
5980   if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
5981     return NULL_RTX;
5982
5983   return gen_rtx_REG (mode, pcum->nregs);
5984 }
5985
5986 static unsigned int
5987 arm_function_arg_boundary (machine_mode mode, const_tree type)
5988 {
5989   return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
5990           ? DOUBLEWORD_ALIGNMENT
5991           : PARM_BOUNDARY);
5992 }
5993
5994 static int
5995 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
5996                        tree type, bool named)
5997 {
5998   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
5999   int nregs = pcum->nregs;
6000
6001   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6002     {
6003       aapcs_layout_arg (pcum, mode, type, named);
6004       return pcum->aapcs_partial;
6005     }
6006
6007   if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6008     return 0;
6009
6010   if (NUM_ARG_REGS > nregs
6011       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6012       && pcum->can_split)
6013     return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6014
6015   return 0;
6016 }
6017
6018 /* Update the data in PCUM to advance over an argument
6019    of mode MODE and data type TYPE.
6020    (TYPE is null for libcalls where that information may not be available.)  */
6021
6022 static void
6023 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6024                           const_tree type, bool named)
6025 {
6026   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6027
6028   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6029     {
6030       aapcs_layout_arg (pcum, mode, type, named);
6031
6032       if (pcum->aapcs_cprc_slot >= 0)
6033         {
6034           aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6035                                                               type);
6036           pcum->aapcs_cprc_slot = -1;
6037         }
6038
6039       /* Generic stuff.  */
6040       pcum->aapcs_arg_processed = false;
6041       pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6042       pcum->aapcs_reg = NULL_RTX;
6043       pcum->aapcs_partial = 0;
6044     }
6045   else
6046     {
6047       pcum->nargs += 1;
6048       if (arm_vector_mode_supported_p (mode)
6049           && pcum->named_count > pcum->nargs
6050           && TARGET_IWMMXT_ABI)
6051         pcum->iwmmxt_nregs += 1;
6052       else
6053         pcum->nregs += ARM_NUM_REGS2 (mode, type);
6054     }
6055 }
6056
6057 /* Variable sized types are passed by reference.  This is a GCC
6058    extension to the ARM ABI.  */
6059
6060 static bool
6061 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6062                        machine_mode mode ATTRIBUTE_UNUSED,
6063                        const_tree type, bool named ATTRIBUTE_UNUSED)
6064 {
6065   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6066 }
6067 \f
6068 /* Encode the current state of the #pragma [no_]long_calls.  */
6069 typedef enum
6070 {
6071   OFF,          /* No #pragma [no_]long_calls is in effect.  */
6072   LONG,         /* #pragma long_calls is in effect.  */
6073   SHORT         /* #pragma no_long_calls is in effect.  */
6074 } arm_pragma_enum;
6075
6076 static arm_pragma_enum arm_pragma_long_calls = OFF;
6077
6078 void
6079 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6080 {
6081   arm_pragma_long_calls = LONG;
6082 }
6083
6084 void
6085 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6086 {
6087   arm_pragma_long_calls = SHORT;
6088 }
6089
6090 void
6091 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6092 {
6093   arm_pragma_long_calls = OFF;
6094 }
6095 \f
6096 /* Handle an attribute requiring a FUNCTION_DECL;
6097    arguments as in struct attribute_spec.handler.  */
6098 static tree
6099 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6100                              int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6101 {
6102   if (TREE_CODE (*node) != FUNCTION_DECL)
6103     {
6104       warning (OPT_Wattributes, "%qE attribute only applies to functions",
6105                name);
6106       *no_add_attrs = true;
6107     }
6108
6109   return NULL_TREE;
6110 }
6111
6112 /* Handle an "interrupt" or "isr" attribute;
6113    arguments as in struct attribute_spec.handler.  */
6114 static tree
6115 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6116                           bool *no_add_attrs)
6117 {
6118   if (DECL_P (*node))
6119     {
6120       if (TREE_CODE (*node) != FUNCTION_DECL)
6121         {
6122           warning (OPT_Wattributes, "%qE attribute only applies to functions",
6123                    name);
6124           *no_add_attrs = true;
6125         }
6126       /* FIXME: the argument if any is checked for type attributes;
6127          should it be checked for decl ones?  */
6128     }
6129   else
6130     {
6131       if (TREE_CODE (*node) == FUNCTION_TYPE
6132           || TREE_CODE (*node) == METHOD_TYPE)
6133         {
6134           if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6135             {
6136               warning (OPT_Wattributes, "%qE attribute ignored",
6137                        name);
6138               *no_add_attrs = true;
6139             }
6140         }
6141       else if (TREE_CODE (*node) == POINTER_TYPE
6142                && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6143                    || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6144                && arm_isr_value (args) != ARM_FT_UNKNOWN)
6145         {
6146           *node = build_variant_type_copy (*node);
6147           TREE_TYPE (*node) = build_type_attribute_variant
6148             (TREE_TYPE (*node),
6149              tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6150           *no_add_attrs = true;
6151         }
6152       else
6153         {
6154           /* Possibly pass this attribute on from the type to a decl.  */
6155           if (flags & ((int) ATTR_FLAG_DECL_NEXT
6156                        | (int) ATTR_FLAG_FUNCTION_NEXT
6157                        | (int) ATTR_FLAG_ARRAY_NEXT))
6158             {
6159               *no_add_attrs = true;
6160               return tree_cons (name, args, NULL_TREE);
6161             }
6162           else
6163             {
6164               warning (OPT_Wattributes, "%qE attribute ignored",
6165                        name);
6166             }
6167         }
6168     }
6169
6170   return NULL_TREE;
6171 }
6172
6173 /* Handle a "pcs" attribute; arguments as in struct
6174    attribute_spec.handler.  */
6175 static tree
6176 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6177                           int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6178 {
6179   if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6180     {
6181       warning (OPT_Wattributes, "%qE attribute ignored", name);
6182       *no_add_attrs = true;
6183     }
6184   return NULL_TREE;
6185 }
6186
6187 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6188 /* Handle the "notshared" attribute.  This attribute is another way of
6189    requesting hidden visibility.  ARM's compiler supports
6190    "__declspec(notshared)"; we support the same thing via an
6191    attribute.  */
6192
6193 static tree
6194 arm_handle_notshared_attribute (tree *node,
6195                                 tree name ATTRIBUTE_UNUSED,
6196                                 tree args ATTRIBUTE_UNUSED,
6197                                 int flags ATTRIBUTE_UNUSED,
6198                                 bool *no_add_attrs)
6199 {
6200   tree decl = TYPE_NAME (*node);
6201
6202   if (decl)
6203     {
6204       DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6205       DECL_VISIBILITY_SPECIFIED (decl) = 1;
6206       *no_add_attrs = false;
6207     }
6208   return NULL_TREE;
6209 }
6210 #endif
6211
6212 /* Return 0 if the attributes for two types are incompatible, 1 if they
6213    are compatible, and 2 if they are nearly compatible (which causes a
6214    warning to be generated).  */
6215 static int
6216 arm_comp_type_attributes (const_tree type1, const_tree type2)
6217 {
6218   int l1, l2, s1, s2;
6219
6220   /* Check for mismatch of non-default calling convention.  */
6221   if (TREE_CODE (type1) != FUNCTION_TYPE)
6222     return 1;
6223
6224   /* Check for mismatched call attributes.  */
6225   l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6226   l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6227   s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6228   s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6229
6230   /* Only bother to check if an attribute is defined.  */
6231   if (l1 | l2 | s1 | s2)
6232     {
6233       /* If one type has an attribute, the other must have the same attribute.  */
6234       if ((l1 != l2) || (s1 != s2))
6235         return 0;
6236
6237       /* Disallow mixed attributes.  */
6238       if ((l1 & s2) || (l2 & s1))
6239         return 0;
6240     }
6241
6242   /* Check for mismatched ISR attribute.  */
6243   l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6244   if (! l1)
6245     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6246   l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6247   if (! l2)
6248     l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6249   if (l1 != l2)
6250     return 0;
6251
6252   return 1;
6253 }
6254
6255 /*  Assigns default attributes to newly defined type.  This is used to
6256     set short_call/long_call attributes for function types of
6257     functions defined inside corresponding #pragma scopes.  */
6258 static void
6259 arm_set_default_type_attributes (tree type)
6260 {
6261   /* Add __attribute__ ((long_call)) to all functions, when
6262      inside #pragma long_calls or __attribute__ ((short_call)),
6263      when inside #pragma no_long_calls.  */
6264   if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6265     {
6266       tree type_attr_list, attr_name;
6267       type_attr_list = TYPE_ATTRIBUTES (type);
6268
6269       if (arm_pragma_long_calls == LONG)
6270         attr_name = get_identifier ("long_call");
6271       else if (arm_pragma_long_calls == SHORT)
6272         attr_name = get_identifier ("short_call");
6273       else
6274         return;
6275
6276       type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6277       TYPE_ATTRIBUTES (type) = type_attr_list;
6278     }
6279 }
6280 \f
6281 /* Return true if DECL is known to be linked into section SECTION.  */
6282
6283 static bool
6284 arm_function_in_section_p (tree decl, section *section)
6285 {
6286   /* We can only be certain about functions defined in the same
6287      compilation unit.  */
6288   if (!TREE_STATIC (decl))
6289     return false;
6290
6291   /* Make sure that SYMBOL always binds to the definition in this
6292      compilation unit.  */
6293   if (!targetm.binds_local_p (decl))
6294     return false;
6295
6296   /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
6297   if (!DECL_SECTION_NAME (decl))
6298     {
6299       /* Make sure that we will not create a unique section for DECL.  */
6300       if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6301         return false;
6302     }
6303
6304   return function_section (decl) == section;
6305 }
6306
6307 /* Return nonzero if a 32-bit "long_call" should be generated for
6308    a call from the current function to DECL.  We generate a long_call
6309    if the function:
6310
6311         a.  has an __attribute__((long call))
6312      or b.  is within the scope of a #pragma long_calls
6313      or c.  the -mlong-calls command line switch has been specified
6314
6315    However we do not generate a long call if the function:
6316
6317         d.  has an __attribute__ ((short_call))
6318      or e.  is inside the scope of a #pragma no_long_calls
6319      or f.  is defined in the same section as the current function.  */
6320
6321 bool
6322 arm_is_long_call_p (tree decl)
6323 {
6324   tree attrs;
6325
6326   if (!decl)
6327     return TARGET_LONG_CALLS;
6328
6329   attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6330   if (lookup_attribute ("short_call", attrs))
6331     return false;
6332
6333   /* For "f", be conservative, and only cater for cases in which the
6334      whole of the current function is placed in the same section.  */
6335   if (!flag_reorder_blocks_and_partition
6336       && TREE_CODE (decl) == FUNCTION_DECL
6337       && arm_function_in_section_p (decl, current_function_section ()))
6338     return false;
6339
6340   if (lookup_attribute ("long_call", attrs))
6341     return true;
6342
6343   return TARGET_LONG_CALLS;
6344 }
6345
6346 /* Return nonzero if it is ok to make a tail-call to DECL.  */
6347 static bool
6348 arm_function_ok_for_sibcall (tree decl, tree exp)
6349 {
6350   unsigned long func_type;
6351
6352   if (cfun->machine->sibcall_blocked)
6353     return false;
6354
6355   /* Never tailcall something if we are generating code for Thumb-1.  */
6356   if (TARGET_THUMB1)
6357     return false;
6358
6359   /* The PIC register is live on entry to VxWorks PLT entries, so we
6360      must make the call before restoring the PIC register.  */
6361   if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6362     return false;
6363
6364   /* If we are interworking and the function is not declared static
6365      then we can't tail-call it unless we know that it exists in this
6366      compilation unit (since it might be a Thumb routine).  */
6367   if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6368       && !TREE_ASM_WRITTEN (decl))
6369     return false;
6370
6371   func_type = arm_current_func_type ();
6372   /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
6373   if (IS_INTERRUPT (func_type))
6374     return false;
6375
6376   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6377     {
6378       /* Check that the return value locations are the same.  For
6379          example that we aren't returning a value from the sibling in
6380          a VFP register but then need to transfer it to a core
6381          register.  */
6382       rtx a, b;
6383
6384       a = arm_function_value (TREE_TYPE (exp), decl, false);
6385       b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6386                               cfun->decl, false);
6387       if (!rtx_equal_p (a, b))
6388         return false;
6389     }
6390
6391   /* Never tailcall if function may be called with a misaligned SP.  */
6392   if (IS_STACKALIGN (func_type))
6393     return false;
6394
6395   /* The AAPCS says that, on bare-metal, calls to unresolved weak
6396      references should become a NOP.  Don't convert such calls into
6397      sibling calls.  */
6398   if (TARGET_AAPCS_BASED
6399       && arm_abi == ARM_ABI_AAPCS
6400       && decl
6401       && DECL_WEAK (decl))
6402     return false;
6403
6404   /* Everything else is ok.  */
6405   return true;
6406 }
6407
6408 \f
6409 /* Addressing mode support functions.  */
6410
6411 /* Return nonzero if X is a legitimate immediate operand when compiling
6412    for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
6413 int
6414 legitimate_pic_operand_p (rtx x)
6415 {
6416   if (GET_CODE (x) == SYMBOL_REF
6417       || (GET_CODE (x) == CONST
6418           && GET_CODE (XEXP (x, 0)) == PLUS
6419           && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6420     return 0;
6421
6422   return 1;
6423 }
6424
6425 /* Record that the current function needs a PIC register.  Initialize
6426    cfun->machine->pic_reg if we have not already done so.  */
6427
6428 static void
6429 require_pic_register (void)
6430 {
6431   /* A lot of the logic here is made obscure by the fact that this
6432      routine gets called as part of the rtx cost estimation process.
6433      We don't want those calls to affect any assumptions about the real
6434      function; and further, we can't call entry_of_function() until we
6435      start the real expansion process.  */
6436   if (!crtl->uses_pic_offset_table)
6437     {
6438       gcc_assert (can_create_pseudo_p ());
6439       if (arm_pic_register != INVALID_REGNUM
6440           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6441         {
6442           if (!cfun->machine->pic_reg)
6443             cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6444
6445           /* Play games to avoid marking the function as needing pic
6446              if we are being called as part of the cost-estimation
6447              process.  */
6448           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6449             crtl->uses_pic_offset_table = 1;
6450         }
6451       else
6452         {
6453           rtx_insn *seq, *insn;
6454
6455           if (!cfun->machine->pic_reg)
6456             cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6457
6458           /* Play games to avoid marking the function as needing pic
6459              if we are being called as part of the cost-estimation
6460              process.  */
6461           if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6462             {
6463               crtl->uses_pic_offset_table = 1;
6464               start_sequence ();
6465
6466               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6467                   && arm_pic_register > LAST_LO_REGNUM)
6468                 emit_move_insn (cfun->machine->pic_reg,
6469                                 gen_rtx_REG (Pmode, arm_pic_register));
6470               else
6471                 arm_load_pic_register (0UL);
6472
6473               seq = get_insns ();
6474               end_sequence ();
6475
6476               for (insn = seq; insn; insn = NEXT_INSN (insn))
6477                 if (INSN_P (insn))
6478                   INSN_LOCATION (insn) = prologue_location;
6479
6480               /* We can be called during expansion of PHI nodes, where
6481                  we can't yet emit instructions directly in the final
6482                  insn stream.  Queue the insns on the entry edge, they will
6483                  be committed after everything else is expanded.  */
6484               insert_insn_on_edge (seq,
6485                                    single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6486             }
6487         }
6488     }
6489 }
6490
6491 rtx
6492 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6493 {
6494   if (GET_CODE (orig) == SYMBOL_REF
6495       || GET_CODE (orig) == LABEL_REF)
6496     {
6497       rtx insn;
6498
6499       if (reg == 0)
6500         {
6501           gcc_assert (can_create_pseudo_p ());
6502           reg = gen_reg_rtx (Pmode);
6503         }
6504
6505       /* VxWorks does not impose a fixed gap between segments; the run-time
6506          gap can be different from the object-file gap.  We therefore can't
6507          use GOTOFF unless we are absolutely sure that the symbol is in the
6508          same segment as the GOT.  Unfortunately, the flexibility of linker
6509          scripts means that we can't be sure of that in general, so assume
6510          that GOTOFF is never valid on VxWorks.  */
6511       if ((GET_CODE (orig) == LABEL_REF
6512            || (GET_CODE (orig) == SYMBOL_REF &&
6513                SYMBOL_REF_LOCAL_P (orig)))
6514           && NEED_GOT_RELOC
6515           && arm_pic_data_is_text_relative)
6516         insn = arm_pic_static_addr (orig, reg);
6517       else
6518         {
6519           rtx pat;
6520           rtx mem;
6521
6522           /* If this function doesn't have a pic register, create one now.  */
6523           require_pic_register ();
6524
6525           pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6526
6527           /* Make the MEM as close to a constant as possible.  */
6528           mem = SET_SRC (pat);
6529           gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6530           MEM_READONLY_P (mem) = 1;
6531           MEM_NOTRAP_P (mem) = 1;
6532
6533           insn = emit_insn (pat);
6534         }
6535
6536       /* Put a REG_EQUAL note on this insn, so that it can be optimized
6537          by loop.  */
6538       set_unique_reg_note (insn, REG_EQUAL, orig);
6539
6540       return reg;
6541     }
6542   else if (GET_CODE (orig) == CONST)
6543     {
6544       rtx base, offset;
6545
6546       if (GET_CODE (XEXP (orig, 0)) == PLUS
6547           && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6548         return orig;
6549
6550       /* Handle the case where we have: const (UNSPEC_TLS).  */
6551       if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6552           && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6553         return orig;
6554
6555       /* Handle the case where we have:
6556          const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
6557          CONST_INT.  */
6558       if (GET_CODE (XEXP (orig, 0)) == PLUS
6559           && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6560           && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6561         {
6562           gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6563           return orig;
6564         }
6565
6566       if (reg == 0)
6567         {
6568           gcc_assert (can_create_pseudo_p ());
6569           reg = gen_reg_rtx (Pmode);
6570         }
6571
6572       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6573
6574       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6575       offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6576                                        base == reg ? 0 : reg);
6577
6578       if (CONST_INT_P (offset))
6579         {
6580           /* The base register doesn't really matter, we only want to
6581              test the index for the appropriate mode.  */
6582           if (!arm_legitimate_index_p (mode, offset, SET, 0))
6583             {
6584               gcc_assert (can_create_pseudo_p ());
6585               offset = force_reg (Pmode, offset);
6586             }
6587
6588           if (CONST_INT_P (offset))
6589             return plus_constant (Pmode, base, INTVAL (offset));
6590         }
6591
6592       if (GET_MODE_SIZE (mode) > 4
6593           && (GET_MODE_CLASS (mode) == MODE_INT
6594               || TARGET_SOFT_FLOAT))
6595         {
6596           emit_insn (gen_addsi3 (reg, base, offset));
6597           return reg;
6598         }
6599
6600       return gen_rtx_PLUS (Pmode, base, offset);
6601     }
6602
6603   return orig;
6604 }
6605
6606
6607 /* Find a spare register to use during the prolog of a function.  */
6608
6609 static int
6610 thumb_find_work_register (unsigned long pushed_regs_mask)
6611 {
6612   int reg;
6613
6614   /* Check the argument registers first as these are call-used.  The
6615      register allocation order means that sometimes r3 might be used
6616      but earlier argument registers might not, so check them all.  */
6617   for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6618     if (!df_regs_ever_live_p (reg))
6619       return reg;
6620
6621   /* Before going on to check the call-saved registers we can try a couple
6622      more ways of deducing that r3 is available.  The first is when we are
6623      pushing anonymous arguments onto the stack and we have less than 4
6624      registers worth of fixed arguments(*).  In this case r3 will be part of
6625      the variable argument list and so we can be sure that it will be
6626      pushed right at the start of the function.  Hence it will be available
6627      for the rest of the prologue.
6628      (*): ie crtl->args.pretend_args_size is greater than 0.  */
6629   if (cfun->machine->uses_anonymous_args
6630       && crtl->args.pretend_args_size > 0)
6631     return LAST_ARG_REGNUM;
6632
6633   /* The other case is when we have fixed arguments but less than 4 registers
6634      worth.  In this case r3 might be used in the body of the function, but
6635      it is not being used to convey an argument into the function.  In theory
6636      we could just check crtl->args.size to see how many bytes are
6637      being passed in argument registers, but it seems that it is unreliable.
6638      Sometimes it will have the value 0 when in fact arguments are being
6639      passed.  (See testcase execute/20021111-1.c for an example).  So we also
6640      check the args_info.nregs field as well.  The problem with this field is
6641      that it makes no allowances for arguments that are passed to the
6642      function but which are not used.  Hence we could miss an opportunity
6643      when a function has an unused argument in r3.  But it is better to be
6644      safe than to be sorry.  */
6645   if (! cfun->machine->uses_anonymous_args
6646       && crtl->args.size >= 0
6647       && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6648       && (TARGET_AAPCS_BASED
6649           ? crtl->args.info.aapcs_ncrn < 4
6650           : crtl->args.info.nregs < 4))
6651     return LAST_ARG_REGNUM;
6652
6653   /* Otherwise look for a call-saved register that is going to be pushed.  */
6654   for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6655     if (pushed_regs_mask & (1 << reg))
6656       return reg;
6657
6658   if (TARGET_THUMB2)
6659     {
6660       /* Thumb-2 can use high regs.  */
6661       for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6662         if (pushed_regs_mask & (1 << reg))
6663           return reg;
6664     }
6665   /* Something went wrong - thumb_compute_save_reg_mask()
6666      should have arranged for a suitable register to be pushed.  */
6667   gcc_unreachable ();
6668 }
6669
6670 static GTY(()) int pic_labelno;
6671
6672 /* Generate code to load the PIC register.  In thumb mode SCRATCH is a
6673    low register.  */
6674
6675 void
6676 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6677 {
6678   rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6679
6680   if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6681     return;
6682
6683   gcc_assert (flag_pic);
6684
6685   pic_reg = cfun->machine->pic_reg;
6686   if (TARGET_VXWORKS_RTP)
6687     {
6688       pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6689       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6690       emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6691
6692       emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6693
6694       pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6695       emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6696     }
6697   else
6698     {
6699       /* We use an UNSPEC rather than a LABEL_REF because this label
6700          never appears in the code stream.  */
6701
6702       labelno = GEN_INT (pic_labelno++);
6703       l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6704       l1 = gen_rtx_CONST (VOIDmode, l1);
6705
6706       /* On the ARM the PC register contains 'dot + 8' at the time of the
6707          addition, on the Thumb it is 'dot + 4'.  */
6708       pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6709       pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6710                                 UNSPEC_GOTSYM_OFF);
6711       pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6712
6713       if (TARGET_32BIT)
6714         {
6715           emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6716         }
6717       else /* TARGET_THUMB1 */
6718         {
6719           if (arm_pic_register != INVALID_REGNUM
6720               && REGNO (pic_reg) > LAST_LO_REGNUM)
6721             {
6722               /* We will have pushed the pic register, so we should always be
6723                  able to find a work register.  */
6724               pic_tmp = gen_rtx_REG (SImode,
6725                                      thumb_find_work_register (saved_regs));
6726               emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6727               emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6728               emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6729             }
6730           else if (arm_pic_register != INVALID_REGNUM
6731                    && arm_pic_register > LAST_LO_REGNUM
6732                    && REGNO (pic_reg) <= LAST_LO_REGNUM)
6733             {
6734               emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6735               emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6736               emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6737             }
6738           else
6739             emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6740         }
6741     }
6742
6743   /* Need to emit this whether or not we obey regdecls,
6744      since setjmp/longjmp can cause life info to screw up.  */
6745   emit_use (pic_reg);
6746 }
6747
6748 /* Generate code to load the address of a static var when flag_pic is set.  */
6749 static rtx
6750 arm_pic_static_addr (rtx orig, rtx reg)
6751 {
6752   rtx l1, labelno, offset_rtx, insn;
6753
6754   gcc_assert (flag_pic);
6755
6756   /* We use an UNSPEC rather than a LABEL_REF because this label
6757      never appears in the code stream.  */
6758   labelno = GEN_INT (pic_labelno++);
6759   l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6760   l1 = gen_rtx_CONST (VOIDmode, l1);
6761
6762   /* On the ARM the PC register contains 'dot + 8' at the time of the
6763      addition, on the Thumb it is 'dot + 4'.  */
6764   offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6765   offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6766                                UNSPEC_SYMBOL_OFFSET);
6767   offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6768
6769   insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6770   return insn;
6771 }
6772
6773 /* Return nonzero if X is valid as an ARM state addressing register.  */
6774 static int
6775 arm_address_register_rtx_p (rtx x, int strict_p)
6776 {
6777   int regno;
6778
6779   if (!REG_P (x))
6780     return 0;
6781
6782   regno = REGNO (x);
6783
6784   if (strict_p)
6785     return ARM_REGNO_OK_FOR_BASE_P (regno);
6786
6787   return (regno <= LAST_ARM_REGNUM
6788           || regno >= FIRST_PSEUDO_REGISTER
6789           || regno == FRAME_POINTER_REGNUM
6790           || regno == ARG_POINTER_REGNUM);
6791 }
6792
6793 /* Return TRUE if this rtx is the difference of a symbol and a label,
6794    and will reduce to a PC-relative relocation in the object file.
6795    Expressions like this can be left alone when generating PIC, rather
6796    than forced through the GOT.  */
6797 static int
6798 pcrel_constant_p (rtx x)
6799 {
6800   if (GET_CODE (x) == MINUS)
6801     return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
6802
6803   return FALSE;
6804 }
6805
6806 /* Return true if X will surely end up in an index register after next
6807    splitting pass.  */
6808 static bool
6809 will_be_in_index_register (const_rtx x)
6810 {
6811   /* arm.md: calculate_pic_address will split this into a register.  */
6812   return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
6813 }
6814
6815 /* Return nonzero if X is a valid ARM state address operand.  */
6816 int
6817 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
6818                                 int strict_p)
6819 {
6820   bool use_ldrd;
6821   enum rtx_code code = GET_CODE (x);
6822
6823   if (arm_address_register_rtx_p (x, strict_p))
6824     return 1;
6825
6826   use_ldrd = (TARGET_LDRD
6827               && (mode == DImode
6828                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6829
6830   if (code == POST_INC || code == PRE_DEC
6831       || ((code == PRE_INC || code == POST_DEC)
6832           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6833     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6834
6835   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6836            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6837            && GET_CODE (XEXP (x, 1)) == PLUS
6838            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6839     {
6840       rtx addend = XEXP (XEXP (x, 1), 1);
6841
6842       /* Don't allow ldrd post increment by register because it's hard
6843          to fixup invalid register choices.  */
6844       if (use_ldrd
6845           && GET_CODE (x) == POST_MODIFY
6846           && REG_P (addend))
6847         return 0;
6848
6849       return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
6850               && arm_legitimate_index_p (mode, addend, outer, strict_p));
6851     }
6852
6853   /* After reload constants split into minipools will have addresses
6854      from a LABEL_REF.  */
6855   else if (reload_completed
6856            && (code == LABEL_REF
6857                || (code == CONST
6858                    && GET_CODE (XEXP (x, 0)) == PLUS
6859                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6860                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6861     return 1;
6862
6863   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6864     return 0;
6865
6866   else if (code == PLUS)
6867     {
6868       rtx xop0 = XEXP (x, 0);
6869       rtx xop1 = XEXP (x, 1);
6870
6871       return ((arm_address_register_rtx_p (xop0, strict_p)
6872                && ((CONST_INT_P (xop1)
6873                     && arm_legitimate_index_p (mode, xop1, outer, strict_p))
6874                    || (!strict_p && will_be_in_index_register (xop1))))
6875               || (arm_address_register_rtx_p (xop1, strict_p)
6876                   && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
6877     }
6878
6879 #if 0
6880   /* Reload currently can't handle MINUS, so disable this for now */
6881   else if (GET_CODE (x) == MINUS)
6882     {
6883       rtx xop0 = XEXP (x, 0);
6884       rtx xop1 = XEXP (x, 1);
6885
6886       return (arm_address_register_rtx_p (xop0, strict_p)
6887               && arm_legitimate_index_p (mode, xop1, outer, strict_p));
6888     }
6889 #endif
6890
6891   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6892            && code == SYMBOL_REF
6893            && CONSTANT_POOL_ADDRESS_P (x)
6894            && ! (flag_pic
6895                  && symbol_mentioned_p (get_pool_constant (x))
6896                  && ! pcrel_constant_p (get_pool_constant (x))))
6897     return 1;
6898
6899   return 0;
6900 }
6901
6902 /* Return nonzero if X is a valid Thumb-2 address operand.  */
6903 static int
6904 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
6905 {
6906   bool use_ldrd;
6907   enum rtx_code code = GET_CODE (x);
6908
6909   if (arm_address_register_rtx_p (x, strict_p))
6910     return 1;
6911
6912   use_ldrd = (TARGET_LDRD
6913               && (mode == DImode
6914                   || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
6915
6916   if (code == POST_INC || code == PRE_DEC
6917       || ((code == PRE_INC || code == POST_DEC)
6918           && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
6919     return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
6920
6921   else if ((code == POST_MODIFY || code == PRE_MODIFY)
6922            && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
6923            && GET_CODE (XEXP (x, 1)) == PLUS
6924            && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
6925     {
6926       /* Thumb-2 only has autoincrement by constant.  */
6927       rtx addend = XEXP (XEXP (x, 1), 1);
6928       HOST_WIDE_INT offset;
6929
6930       if (!CONST_INT_P (addend))
6931         return 0;
6932
6933       offset = INTVAL(addend);
6934       if (GET_MODE_SIZE (mode) <= 4)
6935         return (offset > -256 && offset < 256);
6936
6937       return (use_ldrd && offset > -1024 && offset < 1024
6938               && (offset & 3) == 0);
6939     }
6940
6941   /* After reload constants split into minipools will have addresses
6942      from a LABEL_REF.  */
6943   else if (reload_completed
6944            && (code == LABEL_REF
6945                || (code == CONST
6946                    && GET_CODE (XEXP (x, 0)) == PLUS
6947                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
6948                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
6949     return 1;
6950
6951   else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
6952     return 0;
6953
6954   else if (code == PLUS)
6955     {
6956       rtx xop0 = XEXP (x, 0);
6957       rtx xop1 = XEXP (x, 1);
6958
6959       return ((arm_address_register_rtx_p (xop0, strict_p)
6960                && (thumb2_legitimate_index_p (mode, xop1, strict_p)
6961                    || (!strict_p && will_be_in_index_register (xop1))))
6962               || (arm_address_register_rtx_p (xop1, strict_p)
6963                   && thumb2_legitimate_index_p (mode, xop0, strict_p)));
6964     }
6965
6966   /* Normally we can assign constant values to target registers without
6967      the help of constant pool.  But there are cases we have to use constant
6968      pool like:
6969      1) assign a label to register.
6970      2) sign-extend a 8bit value to 32bit and then assign to register.
6971
6972      Constant pool access in format:
6973      (set (reg r0) (mem (symbol_ref (".LC0"))))
6974      will cause the use of literal pool (later in function arm_reorg).
6975      So here we mark such format as an invalid format, then the compiler
6976      will adjust it into:
6977      (set (reg r0) (symbol_ref (".LC0")))
6978      (set (reg r0) (mem (reg r0))).
6979      No extra register is required, and (mem (reg r0)) won't cause the use
6980      of literal pools.  */
6981   else if (arm_disable_literal_pool && code == SYMBOL_REF
6982            && CONSTANT_POOL_ADDRESS_P (x))
6983     return 0;
6984
6985   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
6986            && code == SYMBOL_REF
6987            && CONSTANT_POOL_ADDRESS_P (x)
6988            && ! (flag_pic
6989                  && symbol_mentioned_p (get_pool_constant (x))
6990                  && ! pcrel_constant_p (get_pool_constant (x))))
6991     return 1;
6992
6993   return 0;
6994 }
6995
6996 /* Return nonzero if INDEX is valid for an address index operand in
6997    ARM state.  */
6998 static int
6999 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7000                         int strict_p)
7001 {
7002   HOST_WIDE_INT range;
7003   enum rtx_code code = GET_CODE (index);
7004
7005   /* Standard coprocessor addressing modes.  */
7006   if (TARGET_HARD_FLOAT
7007       && TARGET_VFP
7008       && (mode == SFmode || mode == DFmode))
7009     return (code == CONST_INT && INTVAL (index) < 1024
7010             && INTVAL (index) > -1024
7011             && (INTVAL (index) & 3) == 0);
7012
7013   /* For quad modes, we restrict the constant offset to be slightly less
7014      than what the instruction format permits.  We do this because for
7015      quad mode moves, we will actually decompose them into two separate
7016      double-mode reads or writes.  INDEX must therefore be a valid
7017      (double-mode) offset and so should INDEX+8.  */
7018   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7019     return (code == CONST_INT
7020             && INTVAL (index) < 1016
7021             && INTVAL (index) > -1024
7022             && (INTVAL (index) & 3) == 0);
7023
7024   /* We have no such constraint on double mode offsets, so we permit the
7025      full range of the instruction format.  */
7026   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7027     return (code == CONST_INT
7028             && INTVAL (index) < 1024
7029             && INTVAL (index) > -1024
7030             && (INTVAL (index) & 3) == 0);
7031
7032   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7033     return (code == CONST_INT
7034             && INTVAL (index) < 1024
7035             && INTVAL (index) > -1024
7036             && (INTVAL (index) & 3) == 0);
7037
7038   if (arm_address_register_rtx_p (index, strict_p)
7039       && (GET_MODE_SIZE (mode) <= 4))
7040     return 1;
7041
7042   if (mode == DImode || mode == DFmode)
7043     {
7044       if (code == CONST_INT)
7045         {
7046           HOST_WIDE_INT val = INTVAL (index);
7047
7048           if (TARGET_LDRD)
7049             return val > -256 && val < 256;
7050           else
7051             return val > -4096 && val < 4092;
7052         }
7053
7054       return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7055     }
7056
7057   if (GET_MODE_SIZE (mode) <= 4
7058       && ! (arm_arch4
7059             && (mode == HImode
7060                 || mode == HFmode
7061                 || (mode == QImode && outer == SIGN_EXTEND))))
7062     {
7063       if (code == MULT)
7064         {
7065           rtx xiop0 = XEXP (index, 0);
7066           rtx xiop1 = XEXP (index, 1);
7067
7068           return ((arm_address_register_rtx_p (xiop0, strict_p)
7069                    && power_of_two_operand (xiop1, SImode))
7070                   || (arm_address_register_rtx_p (xiop1, strict_p)
7071                       && power_of_two_operand (xiop0, SImode)));
7072         }
7073       else if (code == LSHIFTRT || code == ASHIFTRT
7074                || code == ASHIFT || code == ROTATERT)
7075         {
7076           rtx op = XEXP (index, 1);
7077
7078           return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7079                   && CONST_INT_P (op)
7080                   && INTVAL (op) > 0
7081                   && INTVAL (op) <= 31);
7082         }
7083     }
7084
7085   /* For ARM v4 we may be doing a sign-extend operation during the
7086      load.  */
7087   if (arm_arch4)
7088     {
7089       if (mode == HImode
7090           || mode == HFmode
7091           || (outer == SIGN_EXTEND && mode == QImode))
7092         range = 256;
7093       else
7094         range = 4096;
7095     }
7096   else
7097     range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7098
7099   return (code == CONST_INT
7100           && INTVAL (index) < range
7101           && INTVAL (index) > -range);
7102 }
7103
7104 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7105    index operand.  i.e. 1, 2, 4 or 8.  */
7106 static bool
7107 thumb2_index_mul_operand (rtx op)
7108 {
7109   HOST_WIDE_INT val;
7110
7111   if (!CONST_INT_P (op))
7112     return false;
7113
7114   val = INTVAL(op);
7115   return (val == 1 || val == 2 || val == 4 || val == 8);
7116 }
7117
7118 /* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
7119 static int
7120 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7121 {
7122   enum rtx_code code = GET_CODE (index);
7123
7124   /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
7125   /* Standard coprocessor addressing modes.  */
7126   if (TARGET_HARD_FLOAT
7127       && TARGET_VFP
7128       && (mode == SFmode || mode == DFmode))
7129     return (code == CONST_INT && INTVAL (index) < 1024
7130             /* Thumb-2 allows only > -256 index range for it's core register
7131                load/stores. Since we allow SF/DF in core registers, we have
7132                to use the intersection between -256~4096 (core) and -1024~1024
7133                (coprocessor).  */
7134             && INTVAL (index) > -256
7135             && (INTVAL (index) & 3) == 0);
7136
7137   if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7138     {
7139       /* For DImode assume values will usually live in core regs
7140          and only allow LDRD addressing modes.  */
7141       if (!TARGET_LDRD || mode != DImode)
7142         return (code == CONST_INT
7143                 && INTVAL (index) < 1024
7144                 && INTVAL (index) > -1024
7145                 && (INTVAL (index) & 3) == 0);
7146     }
7147
7148   /* For quad modes, we restrict the constant offset to be slightly less
7149      than what the instruction format permits.  We do this because for
7150      quad mode moves, we will actually decompose them into two separate
7151      double-mode reads or writes.  INDEX must therefore be a valid
7152      (double-mode) offset and so should INDEX+8.  */
7153   if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7154     return (code == CONST_INT
7155             && INTVAL (index) < 1016
7156             && INTVAL (index) > -1024
7157             && (INTVAL (index) & 3) == 0);
7158
7159   /* We have no such constraint on double mode offsets, so we permit the
7160      full range of the instruction format.  */
7161   if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7162     return (code == CONST_INT
7163             && INTVAL (index) < 1024
7164             && INTVAL (index) > -1024
7165             && (INTVAL (index) & 3) == 0);
7166
7167   if (arm_address_register_rtx_p (index, strict_p)
7168       && (GET_MODE_SIZE (mode) <= 4))
7169     return 1;
7170
7171   if (mode == DImode || mode == DFmode)
7172     {
7173       if (code == CONST_INT)
7174         {
7175           HOST_WIDE_INT val = INTVAL (index);
7176           /* ??? Can we assume ldrd for thumb2?  */
7177           /* Thumb-2 ldrd only has reg+const addressing modes.  */
7178           /* ldrd supports offsets of +-1020.
7179              However the ldr fallback does not.  */
7180           return val > -256 && val < 256 && (val & 3) == 0;
7181         }
7182       else
7183         return 0;
7184     }
7185
7186   if (code == MULT)
7187     {
7188       rtx xiop0 = XEXP (index, 0);
7189       rtx xiop1 = XEXP (index, 1);
7190
7191       return ((arm_address_register_rtx_p (xiop0, strict_p)
7192                && thumb2_index_mul_operand (xiop1))
7193               || (arm_address_register_rtx_p (xiop1, strict_p)
7194                   && thumb2_index_mul_operand (xiop0)));
7195     }
7196   else if (code == ASHIFT)
7197     {
7198       rtx op = XEXP (index, 1);
7199
7200       return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7201               && CONST_INT_P (op)
7202               && INTVAL (op) > 0
7203               && INTVAL (op) <= 3);
7204     }
7205
7206   return (code == CONST_INT
7207           && INTVAL (index) < 4096
7208           && INTVAL (index) > -256);
7209 }
7210
7211 /* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
7212 static int
7213 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7214 {
7215   int regno;
7216
7217   if (!REG_P (x))
7218     return 0;
7219
7220   regno = REGNO (x);
7221
7222   if (strict_p)
7223     return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7224
7225   return (regno <= LAST_LO_REGNUM
7226           || regno > LAST_VIRTUAL_REGISTER
7227           || regno == FRAME_POINTER_REGNUM
7228           || (GET_MODE_SIZE (mode) >= 4
7229               && (regno == STACK_POINTER_REGNUM
7230                   || regno >= FIRST_PSEUDO_REGISTER
7231                   || x == hard_frame_pointer_rtx
7232                   || x == arg_pointer_rtx)));
7233 }
7234
7235 /* Return nonzero if x is a legitimate index register.  This is the case
7236    for any base register that can access a QImode object.  */
7237 inline static int
7238 thumb1_index_register_rtx_p (rtx x, int strict_p)
7239 {
7240   return thumb1_base_register_rtx_p (x, QImode, strict_p);
7241 }
7242
7243 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7244
7245    The AP may be eliminated to either the SP or the FP, so we use the
7246    least common denominator, e.g. SImode, and offsets from 0 to 64.
7247
7248    ??? Verify whether the above is the right approach.
7249
7250    ??? Also, the FP may be eliminated to the SP, so perhaps that
7251    needs special handling also.
7252
7253    ??? Look at how the mips16 port solves this problem.  It probably uses
7254    better ways to solve some of these problems.
7255
7256    Although it is not incorrect, we don't accept QImode and HImode
7257    addresses based on the frame pointer or arg pointer until the
7258    reload pass starts.  This is so that eliminating such addresses
7259    into stack based ones won't produce impossible code.  */
7260 int
7261 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7262 {
7263   /* ??? Not clear if this is right.  Experiment.  */
7264   if (GET_MODE_SIZE (mode) < 4
7265       && !(reload_in_progress || reload_completed)
7266       && (reg_mentioned_p (frame_pointer_rtx, x)
7267           || reg_mentioned_p (arg_pointer_rtx, x)
7268           || reg_mentioned_p (virtual_incoming_args_rtx, x)
7269           || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7270           || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7271           || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7272     return 0;
7273
7274   /* Accept any base register.  SP only in SImode or larger.  */
7275   else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7276     return 1;
7277
7278   /* This is PC relative data before arm_reorg runs.  */
7279   else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7280            && GET_CODE (x) == SYMBOL_REF
7281            && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7282     return 1;
7283
7284   /* This is PC relative data after arm_reorg runs.  */
7285   else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7286            && reload_completed
7287            && (GET_CODE (x) == LABEL_REF
7288                || (GET_CODE (x) == CONST
7289                    && GET_CODE (XEXP (x, 0)) == PLUS
7290                    && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7291                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7292     return 1;
7293
7294   /* Post-inc indexing only supported for SImode and larger.  */
7295   else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7296            && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7297     return 1;
7298
7299   else if (GET_CODE (x) == PLUS)
7300     {
7301       /* REG+REG address can be any two index registers.  */
7302       /* We disallow FRAME+REG addressing since we know that FRAME
7303          will be replaced with STACK, and SP relative addressing only
7304          permits SP+OFFSET.  */
7305       if (GET_MODE_SIZE (mode) <= 4
7306           && XEXP (x, 0) != frame_pointer_rtx
7307           && XEXP (x, 1) != frame_pointer_rtx
7308           && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7309           && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7310               || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7311         return 1;
7312
7313       /* REG+const has 5-7 bit offset for non-SP registers.  */
7314       else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7315                 || XEXP (x, 0) == arg_pointer_rtx)
7316                && CONST_INT_P (XEXP (x, 1))
7317                && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7318         return 1;
7319
7320       /* REG+const has 10-bit offset for SP, but only SImode and
7321          larger is supported.  */
7322       /* ??? Should probably check for DI/DFmode overflow here
7323          just like GO_IF_LEGITIMATE_OFFSET does.  */
7324       else if (REG_P (XEXP (x, 0))
7325                && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7326                && GET_MODE_SIZE (mode) >= 4
7327                && CONST_INT_P (XEXP (x, 1))
7328                && INTVAL (XEXP (x, 1)) >= 0
7329                && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7330                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7331         return 1;
7332
7333       else if (REG_P (XEXP (x, 0))
7334                && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7335                    || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7336                    || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7337                        && REGNO (XEXP (x, 0))
7338                           <= LAST_VIRTUAL_POINTER_REGISTER))
7339                && GET_MODE_SIZE (mode) >= 4
7340                && CONST_INT_P (XEXP (x, 1))
7341                && (INTVAL (XEXP (x, 1)) & 3) == 0)
7342         return 1;
7343     }
7344
7345   else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7346            && GET_MODE_SIZE (mode) == 4
7347            && GET_CODE (x) == SYMBOL_REF
7348            && CONSTANT_POOL_ADDRESS_P (x)
7349            && ! (flag_pic
7350                  && symbol_mentioned_p (get_pool_constant (x))
7351                  && ! pcrel_constant_p (get_pool_constant (x))))
7352     return 1;
7353
7354   return 0;
7355 }
7356
7357 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7358    instruction of mode MODE.  */
7359 int
7360 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7361 {
7362   switch (GET_MODE_SIZE (mode))
7363     {
7364     case 1:
7365       return val >= 0 && val < 32;
7366
7367     case 2:
7368       return val >= 0 && val < 64 && (val & 1) == 0;
7369
7370     default:
7371       return (val >= 0
7372               && (val + GET_MODE_SIZE (mode)) <= 128
7373               && (val & 3) == 0);
7374     }
7375 }
7376
7377 bool
7378 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7379 {
7380   if (TARGET_ARM)
7381     return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7382   else if (TARGET_THUMB2)
7383     return thumb2_legitimate_address_p (mode, x, strict_p);
7384   else /* if (TARGET_THUMB1) */
7385     return thumb1_legitimate_address_p (mode, x, strict_p);
7386 }
7387
7388 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7389
7390    Given an rtx X being reloaded into a reg required to be
7391    in class CLASS, return the class of reg to actually use.
7392    In general this is just CLASS, but for the Thumb core registers and
7393    immediate constants we prefer a LO_REGS class or a subset.  */
7394
7395 static reg_class_t
7396 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7397 {
7398   if (TARGET_32BIT)
7399     return rclass;
7400   else
7401     {
7402       if (rclass == GENERAL_REGS)
7403         return LO_REGS;
7404       else
7405         return rclass;
7406     }
7407 }
7408
7409 /* Build the SYMBOL_REF for __tls_get_addr.  */
7410
7411 static GTY(()) rtx tls_get_addr_libfunc;
7412
7413 static rtx
7414 get_tls_get_addr (void)
7415 {
7416   if (!tls_get_addr_libfunc)
7417     tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7418   return tls_get_addr_libfunc;
7419 }
7420
7421 rtx
7422 arm_load_tp (rtx target)
7423 {
7424   if (!target)
7425     target = gen_reg_rtx (SImode);
7426
7427   if (TARGET_HARD_TP)
7428     {
7429       /* Can return in any reg.  */
7430       emit_insn (gen_load_tp_hard (target));
7431     }
7432   else
7433     {
7434       /* Always returned in r0.  Immediately copy the result into a pseudo,
7435          otherwise other uses of r0 (e.g. setting up function arguments) may
7436          clobber the value.  */
7437
7438       rtx tmp;
7439
7440       emit_insn (gen_load_tp_soft ());
7441
7442       tmp = gen_rtx_REG (SImode, 0);
7443       emit_move_insn (target, tmp);
7444     }
7445   return target;
7446 }
7447
7448 static rtx
7449 load_tls_operand (rtx x, rtx reg)
7450 {
7451   rtx tmp;
7452
7453   if (reg == NULL_RTX)
7454     reg = gen_reg_rtx (SImode);
7455
7456   tmp = gen_rtx_CONST (SImode, x);
7457
7458   emit_move_insn (reg, tmp);
7459
7460   return reg;
7461 }
7462
7463 static rtx
7464 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7465 {
7466   rtx insns, label, labelno, sum;
7467
7468   gcc_assert (reloc != TLS_DESCSEQ);
7469   start_sequence ();
7470
7471   labelno = GEN_INT (pic_labelno++);
7472   label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7473   label = gen_rtx_CONST (VOIDmode, label);
7474
7475   sum = gen_rtx_UNSPEC (Pmode,
7476                         gen_rtvec (4, x, GEN_INT (reloc), label,
7477                                    GEN_INT (TARGET_ARM ? 8 : 4)),
7478                         UNSPEC_TLS);
7479   reg = load_tls_operand (sum, reg);
7480
7481   if (TARGET_ARM)
7482     emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7483   else
7484     emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7485
7486   *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7487                                      LCT_PURE, /* LCT_CONST?  */
7488                                      Pmode, 1, reg, Pmode);
7489
7490   insns = get_insns ();
7491   end_sequence ();
7492
7493   return insns;
7494 }
7495
7496 static rtx
7497 arm_tls_descseq_addr (rtx x, rtx reg)
7498 {
7499   rtx labelno = GEN_INT (pic_labelno++);
7500   rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7501   rtx sum = gen_rtx_UNSPEC (Pmode,
7502                             gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7503                                        gen_rtx_CONST (VOIDmode, label),
7504                                        GEN_INT (!TARGET_ARM)),
7505                             UNSPEC_TLS);
7506   rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, 0));
7507
7508   emit_insn (gen_tlscall (x, labelno));
7509   if (!reg)
7510     reg = gen_reg_rtx (SImode);
7511   else
7512     gcc_assert (REGNO (reg) != 0);
7513
7514   emit_move_insn (reg, reg0);
7515
7516   return reg;
7517 }
7518
7519 rtx
7520 legitimize_tls_address (rtx x, rtx reg)
7521 {
7522   rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7523   unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7524
7525   switch (model)
7526     {
7527     case TLS_MODEL_GLOBAL_DYNAMIC:
7528       if (TARGET_GNU2_TLS)
7529         {
7530           reg = arm_tls_descseq_addr (x, reg);
7531
7532           tp = arm_load_tp (NULL_RTX);
7533
7534           dest = gen_rtx_PLUS (Pmode, tp, reg);
7535         }
7536       else
7537         {
7538           /* Original scheme */
7539           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7540           dest = gen_reg_rtx (Pmode);
7541           emit_libcall_block (insns, dest, ret, x);
7542         }
7543       return dest;
7544
7545     case TLS_MODEL_LOCAL_DYNAMIC:
7546       if (TARGET_GNU2_TLS)
7547         {
7548           reg = arm_tls_descseq_addr (x, reg);
7549
7550           tp = arm_load_tp (NULL_RTX);
7551
7552           dest = gen_rtx_PLUS (Pmode, tp, reg);
7553         }
7554       else
7555         {
7556           insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7557
7558           /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7559              share the LDM result with other LD model accesses.  */
7560           eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7561                                 UNSPEC_TLS);
7562           dest = gen_reg_rtx (Pmode);
7563           emit_libcall_block (insns, dest, ret, eqv);
7564
7565           /* Load the addend.  */
7566           addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7567                                                      GEN_INT (TLS_LDO32)),
7568                                    UNSPEC_TLS);
7569           addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7570           dest = gen_rtx_PLUS (Pmode, dest, addend);
7571         }
7572       return dest;
7573
7574     case TLS_MODEL_INITIAL_EXEC:
7575       labelno = GEN_INT (pic_labelno++);
7576       label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7577       label = gen_rtx_CONST (VOIDmode, label);
7578       sum = gen_rtx_UNSPEC (Pmode,
7579                             gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7580                                        GEN_INT (TARGET_ARM ? 8 : 4)),
7581                             UNSPEC_TLS);
7582       reg = load_tls_operand (sum, reg);
7583
7584       if (TARGET_ARM)
7585         emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7586       else if (TARGET_THUMB2)
7587         emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7588       else
7589         {
7590           emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7591           emit_move_insn (reg, gen_const_mem (SImode, reg));
7592         }
7593
7594       tp = arm_load_tp (NULL_RTX);
7595
7596       return gen_rtx_PLUS (Pmode, tp, reg);
7597
7598     case TLS_MODEL_LOCAL_EXEC:
7599       tp = arm_load_tp (NULL_RTX);
7600
7601       reg = gen_rtx_UNSPEC (Pmode,
7602                             gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7603                             UNSPEC_TLS);
7604       reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7605
7606       return gen_rtx_PLUS (Pmode, tp, reg);
7607
7608     default:
7609       abort ();
7610     }
7611 }
7612
7613 /* Try machine-dependent ways of modifying an illegitimate address
7614    to be legitimate.  If we find one, return the new, valid address.  */
7615 rtx
7616 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7617 {
7618   if (arm_tls_referenced_p (x))
7619     {
7620       rtx addend = NULL;
7621
7622       if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7623         {
7624           addend = XEXP (XEXP (x, 0), 1);
7625           x = XEXP (XEXP (x, 0), 0);
7626         }
7627
7628       if (GET_CODE (x) != SYMBOL_REF)
7629         return x;
7630
7631       gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7632
7633       x = legitimize_tls_address (x, NULL_RTX);
7634
7635       if (addend)
7636         {
7637           x = gen_rtx_PLUS (SImode, x, addend);
7638           orig_x = x;
7639         }
7640       else
7641         return x;
7642     }
7643
7644   if (!TARGET_ARM)
7645     {
7646       /* TODO: legitimize_address for Thumb2.  */
7647       if (TARGET_THUMB2)
7648         return x;
7649       return thumb_legitimize_address (x, orig_x, mode);
7650     }
7651
7652   if (GET_CODE (x) == PLUS)
7653     {
7654       rtx xop0 = XEXP (x, 0);
7655       rtx xop1 = XEXP (x, 1);
7656
7657       if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7658         xop0 = force_reg (SImode, xop0);
7659
7660       if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7661           && !symbol_mentioned_p (xop1))
7662         xop1 = force_reg (SImode, xop1);
7663
7664       if (ARM_BASE_REGISTER_RTX_P (xop0)
7665           && CONST_INT_P (xop1))
7666         {
7667           HOST_WIDE_INT n, low_n;
7668           rtx base_reg, val;
7669           n = INTVAL (xop1);
7670
7671           /* VFP addressing modes actually allow greater offsets, but for
7672              now we just stick with the lowest common denominator.  */
7673           if (mode == DImode
7674               || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7675             {
7676               low_n = n & 0x0f;
7677               n &= ~0x0f;
7678               if (low_n > 4)
7679                 {
7680                   n += 16;
7681                   low_n -= 16;
7682                 }
7683             }
7684           else
7685             {
7686               low_n = ((mode) == TImode ? 0
7687                        : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7688               n -= low_n;
7689             }
7690
7691           base_reg = gen_reg_rtx (SImode);
7692           val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7693           emit_move_insn (base_reg, val);
7694           x = plus_constant (Pmode, base_reg, low_n);
7695         }
7696       else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7697         x = gen_rtx_PLUS (SImode, xop0, xop1);
7698     }
7699
7700   /* XXX We don't allow MINUS any more -- see comment in
7701      arm_legitimate_address_outer_p ().  */
7702   else if (GET_CODE (x) == MINUS)
7703     {
7704       rtx xop0 = XEXP (x, 0);
7705       rtx xop1 = XEXP (x, 1);
7706
7707       if (CONSTANT_P (xop0))
7708         xop0 = force_reg (SImode, xop0);
7709
7710       if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7711         xop1 = force_reg (SImode, xop1);
7712
7713       if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7714         x = gen_rtx_MINUS (SImode, xop0, xop1);
7715     }
7716
7717   /* Make sure to take full advantage of the pre-indexed addressing mode
7718      with absolute addresses which often allows for the base register to
7719      be factorized for multiple adjacent memory references, and it might
7720      even allows for the mini pool to be avoided entirely. */
7721   else if (CONST_INT_P (x) && optimize > 0)
7722     {
7723       unsigned int bits;
7724       HOST_WIDE_INT mask, base, index;
7725       rtx base_reg;
7726
7727       /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7728          use a 8-bit index. So let's use a 12-bit index for SImode only and
7729          hope that arm_gen_constant will enable ldrb to use more bits. */
7730       bits = (mode == SImode) ? 12 : 8;
7731       mask = (1 << bits) - 1;
7732       base = INTVAL (x) & ~mask;
7733       index = INTVAL (x) & mask;
7734       if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7735         {
7736           /* It'll most probably be more efficient to generate the base
7737              with more bits set and use a negative index instead. */
7738           base |= mask;
7739           index -= mask;
7740         }
7741       base_reg = force_reg (SImode, GEN_INT (base));
7742       x = plus_constant (Pmode, base_reg, index);
7743     }
7744
7745   if (flag_pic)
7746     {
7747       /* We need to find and carefully transform any SYMBOL and LABEL
7748          references; so go back to the original address expression.  */
7749       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7750
7751       if (new_x != orig_x)
7752         x = new_x;
7753     }
7754
7755   return x;
7756 }
7757
7758
7759 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7760    to be legitimate.  If we find one, return the new, valid address.  */
7761 rtx
7762 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7763 {
7764   if (GET_CODE (x) == PLUS
7765       && CONST_INT_P (XEXP (x, 1))
7766       && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7767           || INTVAL (XEXP (x, 1)) < 0))
7768     {
7769       rtx xop0 = XEXP (x, 0);
7770       rtx xop1 = XEXP (x, 1);
7771       HOST_WIDE_INT offset = INTVAL (xop1);
7772
7773       /* Try and fold the offset into a biasing of the base register and
7774          then offsetting that.  Don't do this when optimizing for space
7775          since it can cause too many CSEs.  */
7776       if (optimize_size && offset >= 0
7777           && offset < 256 + 31 * GET_MODE_SIZE (mode))
7778         {
7779           HOST_WIDE_INT delta;
7780
7781           if (offset >= 256)
7782             delta = offset - (256 - GET_MODE_SIZE (mode));
7783           else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7784             delta = 31 * GET_MODE_SIZE (mode);
7785           else
7786             delta = offset & (~31 * GET_MODE_SIZE (mode));
7787
7788           xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7789                                 NULL_RTX);
7790           x = plus_constant (Pmode, xop0, delta);
7791         }
7792       else if (offset < 0 && offset > -256)
7793         /* Small negative offsets are best done with a subtract before the
7794            dereference, forcing these into a register normally takes two
7795            instructions.  */
7796         x = force_operand (x, NULL_RTX);
7797       else
7798         {
7799           /* For the remaining cases, force the constant into a register.  */
7800           xop1 = force_reg (SImode, xop1);
7801           x = gen_rtx_PLUS (SImode, xop0, xop1);
7802         }
7803     }
7804   else if (GET_CODE (x) == PLUS
7805            && s_register_operand (XEXP (x, 1), SImode)
7806            && !s_register_operand (XEXP (x, 0), SImode))
7807     {
7808       rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
7809
7810       x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
7811     }
7812
7813   if (flag_pic)
7814     {
7815       /* We need to find and carefully transform any SYMBOL and LABEL
7816          references; so go back to the original address expression.  */
7817       rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7818
7819       if (new_x != orig_x)
7820         x = new_x;
7821     }
7822
7823   return x;
7824 }
7825
7826 bool
7827 arm_legitimize_reload_address (rtx *p,
7828                                machine_mode mode,
7829                                int opnum, int type,
7830                                int ind_levels ATTRIBUTE_UNUSED)
7831 {
7832   /* We must recognize output that we have already generated ourselves.  */
7833   if (GET_CODE (*p) == PLUS
7834       && GET_CODE (XEXP (*p, 0)) == PLUS
7835       && REG_P (XEXP (XEXP (*p, 0), 0))
7836       && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
7837       && CONST_INT_P (XEXP (*p, 1)))
7838     {
7839       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
7840                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
7841                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
7842       return true;
7843     }
7844
7845   if (GET_CODE (*p) == PLUS
7846       && REG_P (XEXP (*p, 0))
7847       && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
7848       /* If the base register is equivalent to a constant, let the generic
7849          code handle it.  Otherwise we will run into problems if a future
7850          reload pass decides to rematerialize the constant.  */
7851       && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0)))
7852       && CONST_INT_P (XEXP (*p, 1)))
7853     {
7854       HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
7855       HOST_WIDE_INT low, high;
7856
7857       /* Detect coprocessor load/stores.  */
7858       bool coproc_p = ((TARGET_HARD_FLOAT
7859                         && TARGET_VFP
7860                         && (mode == SFmode || mode == DFmode))
7861                        || (TARGET_REALLY_IWMMXT
7862                            && VALID_IWMMXT_REG_MODE (mode))
7863                        || (TARGET_NEON
7864                            && (VALID_NEON_DREG_MODE (mode)
7865                                || VALID_NEON_QREG_MODE (mode))));
7866
7867       /* For some conditions, bail out when lower two bits are unaligned.  */
7868       if ((val & 0x3) != 0
7869           /* Coprocessor load/store indexes are 8-bits + '00' appended.  */
7870           && (coproc_p
7871               /* For DI, and DF under soft-float: */
7872               || ((mode == DImode || mode == DFmode)
7873                   /* Without ldrd, we use stm/ldm, which does not
7874                      fair well with unaligned bits.  */
7875                   && (! TARGET_LDRD
7876                       /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4.  */
7877                       || TARGET_THUMB2))))
7878         return false;
7879
7880       /* When breaking down a [reg+index] reload address into [(reg+high)+low],
7881          of which the (reg+high) gets turned into a reload add insn,
7882          we try to decompose the index into high/low values that can often
7883          also lead to better reload CSE.
7884          For example:
7885                  ldr r0, [r2, #4100]  // Offset too large
7886                  ldr r1, [r2, #4104]  // Offset too large
7887
7888          is best reloaded as:
7889                  add t1, r2, #4096
7890                  ldr r0, [t1, #4]
7891                  add t2, r2, #4096
7892                  ldr r1, [t2, #8]
7893
7894          which post-reload CSE can simplify in most cases to eliminate the
7895          second add instruction:
7896                  add t1, r2, #4096
7897                  ldr r0, [t1, #4]
7898                  ldr r1, [t1, #8]
7899
7900          The idea here is that we want to split out the bits of the constant
7901          as a mask, rather than as subtracting the maximum offset that the
7902          respective type of load/store used can handle.
7903
7904          When encountering negative offsets, we can still utilize it even if
7905          the overall offset is positive; sometimes this may lead to an immediate
7906          that can be constructed with fewer instructions.
7907          For example:
7908                  ldr r0, [r2, #0x3FFFFC]
7909
7910          This is best reloaded as:
7911                  add t1, r2, #0x400000
7912                  ldr r0, [t1, #-4]
7913
7914          The trick for spotting this for a load insn with N bits of offset
7915          (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
7916          negative offset that is going to make bit N and all the bits below
7917          it become zero in the remainder part.
7918
7919          The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
7920          to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
7921          used in most cases of ARM load/store instructions.  */
7922
7923 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N)                                  \
7924       (((VAL) & ((1 << (N)) - 1))                                       \
7925        ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N))   \
7926        : 0)
7927
7928       if (coproc_p)
7929         {
7930           low = SIGN_MAG_LOW_ADDR_BITS (val, 10);
7931
7932           /* NEON quad-word load/stores are made of two double-word accesses,
7933              so the valid index range is reduced by 8. Treat as 9-bit range if
7934              we go over it.  */
7935           if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016)
7936             low = SIGN_MAG_LOW_ADDR_BITS (val, 9);
7937         }
7938       else if (GET_MODE_SIZE (mode) == 8)
7939         {
7940           if (TARGET_LDRD)
7941             low = (TARGET_THUMB2
7942                    ? SIGN_MAG_LOW_ADDR_BITS (val, 10)
7943                    : SIGN_MAG_LOW_ADDR_BITS (val, 8));
7944           else
7945             /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
7946                to access doublewords. The supported load/store offsets are
7947                -8, -4, and 4, which we try to produce here.  */
7948             low = ((val & 0xf) ^ 0x8) - 0x8;
7949         }
7950       else if (GET_MODE_SIZE (mode) < 8)
7951         {
7952           /* NEON element load/stores do not have an offset.  */
7953           if (TARGET_NEON_FP16 && mode == HFmode)
7954             return false;
7955
7956           if (TARGET_THUMB2)
7957             {
7958               /* Thumb-2 has an asymmetrical index range of (-256,4096).
7959                  Try the wider 12-bit range first, and re-try if the result
7960                  is out of range.  */
7961               low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7962               if (low < -255)
7963                 low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7964             }
7965           else
7966             {
7967               if (mode == HImode || mode == HFmode)
7968                 {
7969                   if (arm_arch4)
7970                     low = SIGN_MAG_LOW_ADDR_BITS (val, 8);
7971                   else
7972                     {
7973                       /* The storehi/movhi_bytes fallbacks can use only
7974                          [-4094,+4094] of the full ldrb/strb index range.  */
7975                       low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7976                       if (low == 4095 || low == -4095)
7977                         return false;
7978                     }
7979                 }
7980               else
7981                 low = SIGN_MAG_LOW_ADDR_BITS (val, 12);
7982             }
7983         }
7984       else
7985         return false;
7986
7987       high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
7988                ^ (unsigned HOST_WIDE_INT) 0x80000000)
7989               - (unsigned HOST_WIDE_INT) 0x80000000);
7990       /* Check for overflow or zero */
7991       if (low == 0 || high == 0 || (high + low != val))
7992         return false;
7993
7994       /* Reload the high part into a base reg; leave the low part
7995          in the mem.
7996          Note that replacing this gen_rtx_PLUS with plus_constant is
7997          wrong in this case because we rely on the
7998          (plus (plus reg c1) c2) structure being preserved so that
7999          XEXP (*p, 0) in push_reload below uses the correct term.  */
8000       *p = gen_rtx_PLUS (GET_MODE (*p),
8001                          gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
8002                                        GEN_INT (high)),
8003                          GEN_INT (low));
8004       push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
8005                    MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
8006                    VOIDmode, 0, 0, opnum, (enum reload_type) type);
8007       return true;
8008     }
8009
8010   return false;
8011 }
8012
8013 rtx
8014 thumb_legitimize_reload_address (rtx *x_p,
8015                                  machine_mode mode,
8016                                  int opnum, int type,
8017                                  int ind_levels ATTRIBUTE_UNUSED)
8018 {
8019   rtx x = *x_p;
8020
8021   if (GET_CODE (x) == PLUS
8022       && GET_MODE_SIZE (mode) < 4
8023       && REG_P (XEXP (x, 0))
8024       && XEXP (x, 0) == stack_pointer_rtx
8025       && CONST_INT_P (XEXP (x, 1))
8026       && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8027     {
8028       rtx orig_x = x;
8029
8030       x = copy_rtx (x);
8031       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8032                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8033       return x;
8034     }
8035
8036   /* If both registers are hi-regs, then it's better to reload the
8037      entire expression rather than each register individually.  That
8038      only requires one reload register rather than two.  */
8039   if (GET_CODE (x) == PLUS
8040       && REG_P (XEXP (x, 0))
8041       && REG_P (XEXP (x, 1))
8042       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
8043       && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
8044     {
8045       rtx orig_x = x;
8046
8047       x = copy_rtx (x);
8048       push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
8049                    Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
8050       return x;
8051     }
8052
8053   return NULL;
8054 }
8055
8056 /* Return TRUE if X contains any TLS symbol references.  */
8057
8058 bool
8059 arm_tls_referenced_p (rtx x)
8060 {
8061   if (! TARGET_HAVE_TLS)
8062     return false;
8063
8064   subrtx_iterator::array_type array;
8065   FOR_EACH_SUBRTX (iter, array, x, ALL)
8066     {
8067       const_rtx x = *iter;
8068       if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8069         return true;
8070
8071       /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8072          TLS offsets, not real symbol references.  */
8073       if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8074         iter.skip_subrtxes ();
8075     }
8076   return false;
8077 }
8078
8079 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8080
8081    On the ARM, allow any integer (invalid ones are removed later by insn
8082    patterns), nice doubles and symbol_refs which refer to the function's
8083    constant pool XXX.
8084
8085    When generating pic allow anything.  */
8086
8087 static bool
8088 arm_legitimate_constant_p_1 (machine_mode mode, rtx x)
8089 {
8090   /* At present, we have no support for Neon structure constants, so forbid
8091      them here.  It might be possible to handle simple cases like 0 and -1
8092      in future.  */
8093   if (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))
8094     return false;
8095
8096   return flag_pic || !label_mentioned_p (x);
8097 }
8098
8099 static bool
8100 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8101 {
8102   return (CONST_INT_P (x)
8103           || CONST_DOUBLE_P (x)
8104           || CONSTANT_ADDRESS_P (x)
8105           || flag_pic);
8106 }
8107
8108 static bool
8109 arm_legitimate_constant_p (machine_mode mode, rtx x)
8110 {
8111   return (!arm_cannot_force_const_mem (mode, x)
8112           && (TARGET_32BIT
8113               ? arm_legitimate_constant_p_1 (mode, x)
8114               : thumb_legitimate_constant_p (mode, x)));
8115 }
8116
8117 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
8118
8119 static bool
8120 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8121 {
8122   rtx base, offset;
8123
8124   if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8125     {
8126       split_const (x, &base, &offset);
8127       if (GET_CODE (base) == SYMBOL_REF
8128           && !offset_within_block_p (base, INTVAL (offset)))
8129         return true;
8130     }
8131   return arm_tls_referenced_p (x);
8132 }
8133 \f
8134 #define REG_OR_SUBREG_REG(X)                                            \
8135   (REG_P (X)                                                    \
8136    || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8137
8138 #define REG_OR_SUBREG_RTX(X)                    \
8139    (REG_P (X) ? (X) : SUBREG_REG (X))
8140
8141 static inline int
8142 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8143 {
8144   machine_mode mode = GET_MODE (x);
8145   int total, words;
8146
8147   switch (code)
8148     {
8149     case ASHIFT:
8150     case ASHIFTRT:
8151     case LSHIFTRT:
8152     case ROTATERT:
8153       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8154
8155     case PLUS:
8156     case MINUS:
8157     case COMPARE:
8158     case NEG:
8159     case NOT:
8160       return COSTS_N_INSNS (1);
8161
8162     case MULT:
8163       if (CONST_INT_P (XEXP (x, 1)))
8164         {
8165           int cycles = 0;
8166           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8167
8168           while (i)
8169             {
8170               i >>= 2;
8171               cycles++;
8172             }
8173           return COSTS_N_INSNS (2) + cycles;
8174         }
8175       return COSTS_N_INSNS (1) + 16;
8176
8177     case SET:
8178       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8179          the mode.  */
8180       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8181       return (COSTS_N_INSNS (words)
8182               + 4 * ((MEM_P (SET_SRC (x)))
8183                      + MEM_P (SET_DEST (x))));
8184
8185     case CONST_INT:
8186       if (outer == SET)
8187         {
8188           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8189             return 0;
8190           if (thumb_shiftable_const (INTVAL (x)))
8191             return COSTS_N_INSNS (2);
8192           return COSTS_N_INSNS (3);
8193         }
8194       else if ((outer == PLUS || outer == COMPARE)
8195                && INTVAL (x) < 256 && INTVAL (x) > -256)
8196         return 0;
8197       else if ((outer == IOR || outer == XOR || outer == AND)
8198                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8199         return COSTS_N_INSNS (1);
8200       else if (outer == AND)
8201         {
8202           int i;
8203           /* This duplicates the tests in the andsi3 expander.  */
8204           for (i = 9; i <= 31; i++)
8205             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8206                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8207               return COSTS_N_INSNS (2);
8208         }
8209       else if (outer == ASHIFT || outer == ASHIFTRT
8210                || outer == LSHIFTRT)
8211         return 0;
8212       return COSTS_N_INSNS (2);
8213
8214     case CONST:
8215     case CONST_DOUBLE:
8216     case LABEL_REF:
8217     case SYMBOL_REF:
8218       return COSTS_N_INSNS (3);
8219
8220     case UDIV:
8221     case UMOD:
8222     case DIV:
8223     case MOD:
8224       return 100;
8225
8226     case TRUNCATE:
8227       return 99;
8228
8229     case AND:
8230     case XOR:
8231     case IOR:
8232       /* XXX guess.  */
8233       return 8;
8234
8235     case MEM:
8236       /* XXX another guess.  */
8237       /* Memory costs quite a lot for the first word, but subsequent words
8238          load at the equivalent of a single insn each.  */
8239       return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8240               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8241                  ? 4 : 0));
8242
8243     case IF_THEN_ELSE:
8244       /* XXX a guess.  */
8245       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8246         return 14;
8247       return 2;
8248
8249     case SIGN_EXTEND:
8250     case ZERO_EXTEND:
8251       total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8252       total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8253
8254       if (mode == SImode)
8255         return total;
8256
8257       if (arm_arch6)
8258         return total + COSTS_N_INSNS (1);
8259
8260       /* Assume a two-shift sequence.  Increase the cost slightly so
8261          we prefer actual shifts over an extend operation.  */
8262       return total + 1 + COSTS_N_INSNS (2);
8263
8264     default:
8265       return 99;
8266     }
8267 }
8268
8269 static inline bool
8270 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8271 {
8272   machine_mode mode = GET_MODE (x);
8273   enum rtx_code subcode;
8274   rtx operand;
8275   enum rtx_code code = GET_CODE (x);
8276   *total = 0;
8277
8278   switch (code)
8279     {
8280     case MEM:
8281       /* Memory costs quite a lot for the first word, but subsequent words
8282          load at the equivalent of a single insn each.  */
8283       *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8284       return true;
8285
8286     case DIV:
8287     case MOD:
8288     case UDIV:
8289     case UMOD:
8290       if (TARGET_HARD_FLOAT && mode == SFmode)
8291         *total = COSTS_N_INSNS (2);
8292       else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8293         *total = COSTS_N_INSNS (4);
8294       else
8295         *total = COSTS_N_INSNS (20);
8296       return false;
8297
8298     case ROTATE:
8299       if (REG_P (XEXP (x, 1)))
8300         *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8301       else if (!CONST_INT_P (XEXP (x, 1)))
8302         *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8303
8304       /* Fall through */
8305     case ROTATERT:
8306       if (mode != SImode)
8307         {
8308           *total += COSTS_N_INSNS (4);
8309           return true;
8310         }
8311
8312       /* Fall through */
8313     case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8314       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8315       if (mode == DImode)
8316         {
8317           *total += COSTS_N_INSNS (3);
8318           return true;
8319         }
8320
8321       *total += COSTS_N_INSNS (1);
8322       /* Increase the cost of complex shifts because they aren't any faster,
8323          and reduce dual issue opportunities.  */
8324       if (arm_tune_cortex_a9
8325           && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8326         ++*total;
8327
8328       return true;
8329
8330     case MINUS:
8331       if (mode == DImode)
8332         {
8333           *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8334           if (CONST_INT_P (XEXP (x, 0))
8335               && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8336             {
8337               *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8338               return true;
8339             }
8340
8341           if (CONST_INT_P (XEXP (x, 1))
8342               && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8343             {
8344               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8345               return true;
8346             }
8347
8348           return false;
8349         }
8350
8351       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8352         {
8353           if (TARGET_HARD_FLOAT
8354               && (mode == SFmode
8355                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8356             {
8357               *total = COSTS_N_INSNS (1);
8358               if (CONST_DOUBLE_P (XEXP (x, 0))
8359                   && arm_const_double_rtx (XEXP (x, 0)))
8360                 {
8361                   *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8362                   return true;
8363                 }
8364
8365               if (CONST_DOUBLE_P (XEXP (x, 1))
8366                   && arm_const_double_rtx (XEXP (x, 1)))
8367                 {
8368                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8369                   return true;
8370                 }
8371
8372               return false;
8373             }
8374           *total = COSTS_N_INSNS (20);
8375           return false;
8376         }
8377
8378       *total = COSTS_N_INSNS (1);
8379       if (CONST_INT_P (XEXP (x, 0))
8380           && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8381         {
8382           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8383           return true;
8384         }
8385
8386       subcode = GET_CODE (XEXP (x, 1));
8387       if (subcode == ASHIFT || subcode == ASHIFTRT
8388           || subcode == LSHIFTRT
8389           || subcode == ROTATE || subcode == ROTATERT)
8390         {
8391           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8392           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8393           return true;
8394         }
8395
8396       /* A shift as a part of RSB costs no more than RSB itself.  */
8397       if (GET_CODE (XEXP (x, 0)) == MULT
8398           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8399         {
8400           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8401           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8402           return true;
8403         }
8404
8405       if (subcode == MULT
8406           && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8407         {
8408           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8409           *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8410           return true;
8411         }
8412
8413       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8414           || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8415         {
8416           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8417           if (REG_P (XEXP (XEXP (x, 1), 0))
8418               && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8419             *total += COSTS_N_INSNS (1);
8420
8421           return true;
8422         }
8423
8424       /* Fall through */
8425
8426     case PLUS:
8427       if (code == PLUS && arm_arch6 && mode == SImode
8428           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8429               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8430         {
8431           *total = COSTS_N_INSNS (1);
8432           *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8433                               0, speed);
8434           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8435           return true;
8436         }
8437
8438       /* MLA: All arguments must be registers.  We filter out
8439          multiplication by a power of two, so that we fall down into
8440          the code below.  */
8441       if (GET_CODE (XEXP (x, 0)) == MULT
8442           && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8443         {
8444           /* The cost comes from the cost of the multiply.  */
8445           return false;
8446         }
8447
8448       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8449         {
8450           if (TARGET_HARD_FLOAT
8451               && (mode == SFmode
8452                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8453             {
8454               *total = COSTS_N_INSNS (1);
8455               if (CONST_DOUBLE_P (XEXP (x, 1))
8456                   && arm_const_double_rtx (XEXP (x, 1)))
8457                 {
8458                   *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8459                   return true;
8460                 }
8461
8462               return false;
8463             }
8464
8465           *total = COSTS_N_INSNS (20);
8466           return false;
8467         }
8468
8469       if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8470           || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8471         {
8472           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8473           if (REG_P (XEXP (XEXP (x, 0), 0))
8474               && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8475             *total += COSTS_N_INSNS (1);
8476           return true;
8477         }
8478
8479       /* Fall through */
8480
8481     case AND: case XOR: case IOR:
8482
8483       /* Normally the frame registers will be spilt into reg+const during
8484          reload, so it is a bad idea to combine them with other instructions,
8485          since then they might not be moved outside of loops.  As a compromise
8486          we allow integration with ops that have a constant as their second
8487          operand.  */
8488       if (REG_OR_SUBREG_REG (XEXP (x, 0))
8489           && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8490           && !CONST_INT_P (XEXP (x, 1)))
8491         *total = COSTS_N_INSNS (1);
8492
8493       if (mode == DImode)
8494         {
8495           *total += COSTS_N_INSNS (2);
8496           if (CONST_INT_P (XEXP (x, 1))
8497               && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8498             {
8499               *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8500               return true;
8501             }
8502
8503           return false;
8504         }
8505
8506       *total += COSTS_N_INSNS (1);
8507       if (CONST_INT_P (XEXP (x, 1))
8508           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8509         {
8510           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8511           return true;
8512         }
8513       subcode = GET_CODE (XEXP (x, 0));
8514       if (subcode == ASHIFT || subcode == ASHIFTRT
8515           || subcode == LSHIFTRT
8516           || subcode == ROTATE || subcode == ROTATERT)
8517         {
8518           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8519           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8520           return true;
8521         }
8522
8523       if (subcode == MULT
8524           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8525         {
8526           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8527           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8528           return true;
8529         }
8530
8531       if (subcode == UMIN || subcode == UMAX
8532           || subcode == SMIN || subcode == SMAX)
8533         {
8534           *total = COSTS_N_INSNS (3);
8535           return true;
8536         }
8537
8538       return false;
8539
8540     case MULT:
8541       /* This should have been handled by the CPU specific routines.  */
8542       gcc_unreachable ();
8543
8544     case TRUNCATE:
8545       if (arm_arch3m && mode == SImode
8546           && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8547           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8548           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8549               == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8550           && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8551               || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8552         {
8553           *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8554           return true;
8555         }
8556       *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8557       return false;
8558
8559     case NEG:
8560       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8561         {
8562           if (TARGET_HARD_FLOAT
8563               && (mode == SFmode
8564                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8565             {
8566               *total = COSTS_N_INSNS (1);
8567               return false;
8568             }
8569           *total = COSTS_N_INSNS (2);
8570           return false;
8571         }
8572
8573       /* Fall through */
8574     case NOT:
8575       *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8576       if (mode == SImode && code == NOT)
8577         {
8578           subcode = GET_CODE (XEXP (x, 0));
8579           if (subcode == ASHIFT || subcode == ASHIFTRT
8580               || subcode == LSHIFTRT
8581               || subcode == ROTATE || subcode == ROTATERT
8582               || (subcode == MULT
8583                   && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8584             {
8585               *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8586               /* Register shifts cost an extra cycle.  */
8587               if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8588                 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8589                                                         subcode, 1, speed);
8590               return true;
8591             }
8592         }
8593
8594       return false;
8595
8596     case IF_THEN_ELSE:
8597       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8598         {
8599           *total = COSTS_N_INSNS (4);
8600           return true;
8601         }
8602
8603       operand = XEXP (x, 0);
8604
8605       if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8606              || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8607             && REG_P (XEXP (operand, 0))
8608             && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8609         *total += COSTS_N_INSNS (1);
8610       *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8611                  + rtx_cost (XEXP (x, 2), code, 2, speed));
8612       return true;
8613
8614     case NE:
8615       if (mode == SImode && XEXP (x, 1) == const0_rtx)
8616         {
8617           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8618           return true;
8619         }
8620       goto scc_insn;
8621
8622     case GE:
8623       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8624           && mode == SImode && XEXP (x, 1) == const0_rtx)
8625         {
8626           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8627           return true;
8628         }
8629       goto scc_insn;
8630
8631     case LT:
8632       if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8633           && mode == SImode && XEXP (x, 1) == const0_rtx)
8634         {
8635           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8636           return true;
8637         }
8638       goto scc_insn;
8639
8640     case EQ:
8641     case GT:
8642     case LE:
8643     case GEU:
8644     case LTU:
8645     case GTU:
8646     case LEU:
8647     case UNORDERED:
8648     case ORDERED:
8649     case UNEQ:
8650     case UNGE:
8651     case UNLT:
8652     case UNGT:
8653     case UNLE:
8654     scc_insn:
8655       /* SCC insns.  In the case where the comparison has already been
8656          performed, then they cost 2 instructions.  Otherwise they need
8657          an additional comparison before them.  */
8658       *total = COSTS_N_INSNS (2);
8659       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8660         {
8661           return true;
8662         }
8663
8664       /* Fall through */
8665     case COMPARE:
8666       if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8667         {
8668           *total = 0;
8669           return true;
8670         }
8671
8672       *total += COSTS_N_INSNS (1);
8673       if (CONST_INT_P (XEXP (x, 1))
8674           && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8675         {
8676           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8677           return true;
8678         }
8679
8680       subcode = GET_CODE (XEXP (x, 0));
8681       if (subcode == ASHIFT || subcode == ASHIFTRT
8682           || subcode == LSHIFTRT
8683           || subcode == ROTATE || subcode == ROTATERT)
8684         {
8685           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8686           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8687           return true;
8688         }
8689
8690       if (subcode == MULT
8691           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8692         {
8693           *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8694           *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8695           return true;
8696         }
8697
8698       return false;
8699
8700     case UMIN:
8701     case UMAX:
8702     case SMIN:
8703     case SMAX:
8704       *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8705       if (!CONST_INT_P (XEXP (x, 1))
8706           || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8707         *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8708       return true;
8709
8710     case ABS:
8711       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8712         {
8713           if (TARGET_HARD_FLOAT
8714               && (mode == SFmode
8715                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
8716             {
8717               *total = COSTS_N_INSNS (1);
8718               return false;
8719             }
8720           *total = COSTS_N_INSNS (20);
8721           return false;
8722         }
8723       *total = COSTS_N_INSNS (1);
8724       if (mode == DImode)
8725         *total += COSTS_N_INSNS (3);
8726       return false;
8727
8728     case SIGN_EXTEND:
8729     case ZERO_EXTEND:
8730       *total = 0;
8731       if (GET_MODE_CLASS (mode) == MODE_INT)
8732         {
8733           rtx op = XEXP (x, 0);
8734           machine_mode opmode = GET_MODE (op);
8735
8736           if (mode == DImode)
8737             *total += COSTS_N_INSNS (1);
8738
8739           if (opmode != SImode)
8740             {
8741               if (MEM_P (op))
8742                 {
8743                   /* If !arm_arch4, we use one of the extendhisi2_mem
8744                      or movhi_bytes patterns for HImode.  For a QImode
8745                      sign extension, we first zero-extend from memory
8746                      and then perform a shift sequence.  */
8747                   if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8748                     *total += COSTS_N_INSNS (2);
8749                 }
8750               else if (arm_arch6)
8751                 *total += COSTS_N_INSNS (1);
8752
8753               /* We don't have the necessary insn, so we need to perform some
8754                  other operation.  */
8755               else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8756                 /* An and with constant 255.  */
8757                 *total += COSTS_N_INSNS (1);
8758               else
8759                 /* A shift sequence.  Increase costs slightly to avoid
8760                    combining two shifts into an extend operation.  */
8761                 *total += COSTS_N_INSNS (2) + 1;
8762             }
8763
8764           return false;
8765         }
8766
8767       switch (GET_MODE (XEXP (x, 0)))
8768         {
8769         case V8QImode:
8770         case V4HImode:
8771         case V2SImode:
8772         case V4QImode:
8773         case V2HImode:
8774           *total = COSTS_N_INSNS (1);
8775           return false;
8776
8777         default:
8778           gcc_unreachable ();
8779         }
8780       gcc_unreachable ();
8781
8782     case ZERO_EXTRACT:
8783     case SIGN_EXTRACT:
8784       *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8785       return true;
8786
8787     case CONST_INT:
8788       if (const_ok_for_arm (INTVAL (x))
8789           || const_ok_for_arm (~INTVAL (x)))
8790         *total = COSTS_N_INSNS (1);
8791       else
8792         *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8793                                                   INTVAL (x), NULL_RTX,
8794                                                   NULL_RTX, 0, 0));
8795       return true;
8796
8797     case CONST:
8798     case LABEL_REF:
8799     case SYMBOL_REF:
8800       *total = COSTS_N_INSNS (3);
8801       return true;
8802
8803     case HIGH:
8804       *total = COSTS_N_INSNS (1);
8805       return true;
8806
8807     case LO_SUM:
8808       *total = COSTS_N_INSNS (1);
8809       *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8810       return true;
8811
8812     case CONST_DOUBLE:
8813       if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8814           && (mode == SFmode || !TARGET_VFP_SINGLE))
8815         *total = COSTS_N_INSNS (1);
8816       else
8817         *total = COSTS_N_INSNS (4);
8818       return true;
8819
8820     case SET:
8821       /* The vec_extract patterns accept memory operands that require an
8822          address reload.  Account for the cost of that reload to give the
8823          auto-inc-dec pass an incentive to try to replace them.  */
8824       if (TARGET_NEON && MEM_P (SET_DEST (x))
8825           && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8826         {
8827           *total = rtx_cost (SET_DEST (x), code, 0, speed);
8828           if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8829             *total += COSTS_N_INSNS (1);
8830           return true;
8831         }
8832       /* Likewise for the vec_set patterns.  */
8833       if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8834           && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8835           && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8836         {
8837           rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8838           *total = rtx_cost (mem, code, 0, speed);
8839           if (!neon_vector_mem_operand (mem, 2, true))
8840             *total += COSTS_N_INSNS (1);
8841           return true;
8842         }
8843       return false;
8844
8845     case UNSPEC:
8846       /* We cost this as high as our memory costs to allow this to
8847          be hoisted from loops.  */
8848       if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8849         {
8850           *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8851         }
8852       return true;
8853
8854     case CONST_VECTOR:
8855       if (TARGET_NEON
8856           && TARGET_HARD_FLOAT
8857           && outer == SET
8858           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8859           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8860         *total = COSTS_N_INSNS (1);
8861       else
8862         *total = COSTS_N_INSNS (4);
8863       return true;
8864
8865     default:
8866       *total = COSTS_N_INSNS (4);
8867       return false;
8868     }
8869 }
8870
8871 /* Estimates the size cost of thumb1 instructions.
8872    For now most of the code is copied from thumb1_rtx_costs. We need more
8873    fine grain tuning when we have more related test cases.  */
8874 static inline int
8875 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8876 {
8877   machine_mode mode = GET_MODE (x);
8878   int words;
8879
8880   switch (code)
8881     {
8882     case ASHIFT:
8883     case ASHIFTRT:
8884     case LSHIFTRT:
8885     case ROTATERT:
8886       return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8887
8888     case PLUS:
8889     case MINUS:
8890       /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8891          defined by RTL expansion, especially for the expansion of
8892          multiplication.  */
8893       if ((GET_CODE (XEXP (x, 0)) == MULT
8894            && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8895           || (GET_CODE (XEXP (x, 1)) == MULT
8896               && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8897         return COSTS_N_INSNS (2);
8898       /* On purpose fall through for normal RTX.  */
8899     case COMPARE:
8900     case NEG:
8901     case NOT:
8902       return COSTS_N_INSNS (1);
8903
8904     case MULT:
8905       if (CONST_INT_P (XEXP (x, 1)))
8906         {
8907           /* Thumb1 mul instruction can't operate on const. We must Load it
8908              into a register first.  */
8909           int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8910           /* For the targets which have a very small and high-latency multiply
8911              unit, we prefer to synthesize the mult with up to 5 instructions,
8912              giving a good balance between size and performance.  */
8913           if (arm_arch6m && arm_m_profile_small_mul)
8914             return COSTS_N_INSNS (5);
8915           else
8916             return COSTS_N_INSNS (1) + const_size;
8917         }
8918       return COSTS_N_INSNS (1);
8919
8920     case SET:
8921       /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8922          the mode.  */
8923       words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8924       return COSTS_N_INSNS (words)
8925              + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8926                                     || satisfies_constraint_K (SET_SRC (x))
8927                                        /* thumb1_movdi_insn.  */
8928                                     || ((words > 1) && MEM_P (SET_SRC (x))));
8929
8930     case CONST_INT:
8931       if (outer == SET)
8932         {
8933           if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8934             return COSTS_N_INSNS (1);
8935           /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
8936           if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8937             return COSTS_N_INSNS (2);
8938           /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
8939           if (thumb_shiftable_const (INTVAL (x)))
8940             return COSTS_N_INSNS (2);
8941           return COSTS_N_INSNS (3);
8942         }
8943       else if ((outer == PLUS || outer == COMPARE)
8944                && INTVAL (x) < 256 && INTVAL (x) > -256)
8945         return 0;
8946       else if ((outer == IOR || outer == XOR || outer == AND)
8947                && INTVAL (x) < 256 && INTVAL (x) >= -256)
8948         return COSTS_N_INSNS (1);
8949       else if (outer == AND)
8950         {
8951           int i;
8952           /* This duplicates the tests in the andsi3 expander.  */
8953           for (i = 9; i <= 31; i++)
8954             if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8955                 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8956               return COSTS_N_INSNS (2);
8957         }
8958       else if (outer == ASHIFT || outer == ASHIFTRT
8959                || outer == LSHIFTRT)
8960         return 0;
8961       return COSTS_N_INSNS (2);
8962
8963     case CONST:
8964     case CONST_DOUBLE:
8965     case LABEL_REF:
8966     case SYMBOL_REF:
8967       return COSTS_N_INSNS (3);
8968
8969     case UDIV:
8970     case UMOD:
8971     case DIV:
8972     case MOD:
8973       return 100;
8974
8975     case TRUNCATE:
8976       return 99;
8977
8978     case AND:
8979     case XOR:
8980     case IOR:
8981       return COSTS_N_INSNS (1);
8982
8983     case MEM:
8984       return (COSTS_N_INSNS (1)
8985               + COSTS_N_INSNS (1)
8986                 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8987               + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8988                  ? COSTS_N_INSNS (1) : 0));
8989
8990     case IF_THEN_ELSE:
8991       /* XXX a guess.  */
8992       if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8993         return 14;
8994       return 2;
8995
8996     case ZERO_EXTEND:
8997       /* XXX still guessing.  */
8998       switch (GET_MODE (XEXP (x, 0)))
8999         {
9000           case QImode:
9001             return (1 + (mode == DImode ? 4 : 0)
9002                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9003
9004           case HImode:
9005             return (4 + (mode == DImode ? 4 : 0)
9006                     + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9007
9008           case SImode:
9009             return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9010
9011           default:
9012             return 99;
9013         }
9014
9015     default:
9016       return 99;
9017     }
9018 }
9019
9020 /* RTX costs when optimizing for size.  */
9021 static bool
9022 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9023                     int *total)
9024 {
9025   machine_mode mode = GET_MODE (x);
9026   if (TARGET_THUMB1)
9027     {
9028       *total = thumb1_size_rtx_costs (x, code, outer_code);
9029       return true;
9030     }
9031
9032   /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions.  */
9033   switch (code)
9034     {
9035     case MEM:
9036       /* A memory access costs 1 insn if the mode is small, or the address is
9037          a single register, otherwise it costs one insn per word.  */
9038       if (REG_P (XEXP (x, 0)))
9039         *total = COSTS_N_INSNS (1);
9040       else if (flag_pic
9041                && GET_CODE (XEXP (x, 0)) == PLUS
9042                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9043         /* This will be split into two instructions.
9044            See arm.md:calculate_pic_address.  */
9045         *total = COSTS_N_INSNS (2);
9046       else
9047         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9048       return true;
9049
9050     case DIV:
9051     case MOD:
9052     case UDIV:
9053     case UMOD:
9054       /* Needs a libcall, so it costs about this.  */
9055       *total = COSTS_N_INSNS (2);
9056       return false;
9057
9058     case ROTATE:
9059       if (mode == SImode && REG_P (XEXP (x, 1)))
9060         {
9061           *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9062           return true;
9063         }
9064       /* Fall through */
9065     case ROTATERT:
9066     case ASHIFT:
9067     case LSHIFTRT:
9068     case ASHIFTRT:
9069       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9070         {
9071           *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9072           return true;
9073         }
9074       else if (mode == SImode)
9075         {
9076           *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9077           /* Slightly disparage register shifts, but not by much.  */
9078           if (!CONST_INT_P (XEXP (x, 1)))
9079             *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9080           return true;
9081         }
9082
9083       /* Needs a libcall.  */
9084       *total = COSTS_N_INSNS (2);
9085       return false;
9086
9087     case MINUS:
9088       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9089           && (mode == SFmode || !TARGET_VFP_SINGLE))
9090         {
9091           *total = COSTS_N_INSNS (1);
9092           return false;
9093         }
9094
9095       if (mode == SImode)
9096         {
9097           enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9098           enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9099
9100           if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9101               || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9102               || subcode1 == ROTATE || subcode1 == ROTATERT
9103               || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9104               || subcode1 == ASHIFTRT)
9105             {
9106               /* It's just the cost of the two operands.  */
9107               *total = 0;
9108               return false;
9109             }
9110
9111           *total = COSTS_N_INSNS (1);
9112           return false;
9113         }
9114
9115       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9116       return false;
9117
9118     case PLUS:
9119       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9120           && (mode == SFmode || !TARGET_VFP_SINGLE))
9121         {
9122           *total = COSTS_N_INSNS (1);
9123           return false;
9124         }
9125
9126       /* A shift as a part of ADD costs nothing.  */
9127       if (GET_CODE (XEXP (x, 0)) == MULT
9128           && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9129         {
9130           *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9131           *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9132           *total += rtx_cost (XEXP (x, 1), code, 1, false);
9133           return true;
9134         }
9135
9136       /* Fall through */
9137     case AND: case XOR: case IOR:
9138       if (mode == SImode)
9139         {
9140           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9141
9142           if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9143               || subcode == LSHIFTRT || subcode == ASHIFTRT
9144               || (code == AND && subcode == NOT))
9145             {
9146               /* It's just the cost of the two operands.  */
9147               *total = 0;
9148               return false;
9149             }
9150         }
9151
9152       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9153       return false;
9154
9155     case MULT:
9156       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9157       return false;
9158
9159     case NEG:
9160       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9161           && (mode == SFmode || !TARGET_VFP_SINGLE))
9162         {
9163           *total = COSTS_N_INSNS (1);
9164           return false;
9165         }
9166
9167       /* Fall through */
9168     case NOT:
9169       *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9170
9171       return false;
9172
9173     case IF_THEN_ELSE:
9174       *total = 0;
9175       return false;
9176
9177     case COMPARE:
9178       if (cc_register (XEXP (x, 0), VOIDmode))
9179         * total = 0;
9180       else
9181         *total = COSTS_N_INSNS (1);
9182       return false;
9183
9184     case ABS:
9185       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9186           && (mode == SFmode || !TARGET_VFP_SINGLE))
9187         *total = COSTS_N_INSNS (1);
9188       else
9189         *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9190       return false;
9191
9192     case SIGN_EXTEND:
9193     case ZERO_EXTEND:
9194       return arm_rtx_costs_1 (x, outer_code, total, 0);
9195
9196     case CONST_INT:
9197       if (const_ok_for_arm (INTVAL (x)))
9198         /* A multiplication by a constant requires another instruction
9199            to load the constant to a register.  */
9200         *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9201                                 ? 1 : 0);
9202       else if (const_ok_for_arm (~INTVAL (x)))
9203         *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9204       else if (const_ok_for_arm (-INTVAL (x)))
9205         {
9206           if (outer_code == COMPARE || outer_code == PLUS
9207               || outer_code == MINUS)
9208             *total = 0;
9209           else
9210             *total = COSTS_N_INSNS (1);
9211         }
9212       else
9213         *total = COSTS_N_INSNS (2);
9214       return true;
9215
9216     case CONST:
9217     case LABEL_REF:
9218     case SYMBOL_REF:
9219       *total = COSTS_N_INSNS (2);
9220       return true;
9221
9222     case CONST_DOUBLE:
9223       *total = COSTS_N_INSNS (4);
9224       return true;
9225
9226     case CONST_VECTOR:
9227       if (TARGET_NEON
9228           && TARGET_HARD_FLOAT
9229           && outer_code == SET
9230           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9231           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9232         *total = COSTS_N_INSNS (1);
9233       else
9234         *total = COSTS_N_INSNS (4);
9235       return true;
9236
9237     case HIGH:
9238     case LO_SUM:
9239       /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9240          cost of these slightly.  */
9241       *total = COSTS_N_INSNS (1) + 1;
9242       return true;
9243
9244     case SET:
9245       return false;
9246
9247     default:
9248       if (mode != VOIDmode)
9249         *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9250       else
9251         *total = COSTS_N_INSNS (4); /* How knows?  */
9252       return false;
9253     }
9254 }
9255
9256 /* Helper function for arm_rtx_costs.  If the operand is a valid shift
9257    operand, then return the operand that is being shifted.  If the shift
9258    is not by a constant, then set SHIFT_REG to point to the operand.
9259    Return NULL if OP is not a shifter operand.  */
9260 static rtx
9261 shifter_op_p (rtx op, rtx *shift_reg)
9262 {
9263   enum rtx_code code = GET_CODE (op);
9264
9265   if (code == MULT && CONST_INT_P (XEXP (op, 1))
9266       && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9267     return XEXP (op, 0);
9268   else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9269     return XEXP (op, 0);
9270   else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9271            || code == ASHIFTRT)
9272     {
9273       if (!CONST_INT_P (XEXP (op, 1)))
9274         *shift_reg = XEXP (op, 1);
9275       return XEXP (op, 0);
9276     }
9277
9278   return NULL;
9279 }
9280
9281 static bool
9282 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9283 {
9284   const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9285   gcc_assert (GET_CODE (x) == UNSPEC);
9286
9287   switch (XINT (x, 1))
9288     {
9289     case UNSPEC_UNALIGNED_LOAD:
9290       /* We can only do unaligned loads into the integer unit, and we can't
9291          use LDM or LDRD.  */
9292       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9293       if (speed_p)
9294         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9295                   + extra_cost->ldst.load_unaligned);
9296
9297 #ifdef NOT_YET
9298       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9299                                  ADDR_SPACE_GENERIC, speed_p);
9300 #endif
9301       return true;
9302
9303     case UNSPEC_UNALIGNED_STORE:
9304       *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9305       if (speed_p)
9306         *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9307                   + extra_cost->ldst.store_unaligned);
9308
9309       *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9310 #ifdef NOT_YET
9311       *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9312                                  ADDR_SPACE_GENERIC, speed_p);
9313 #endif
9314       return true;
9315
9316     case UNSPEC_VRINTZ:
9317     case UNSPEC_VRINTP:
9318     case UNSPEC_VRINTM:
9319     case UNSPEC_VRINTR:
9320     case UNSPEC_VRINTX:
9321     case UNSPEC_VRINTA:
9322       *cost = COSTS_N_INSNS (1);
9323       if (speed_p)
9324         *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9325
9326       return true;
9327     default:
9328       *cost = COSTS_N_INSNS (2);
9329       break;
9330     }
9331   return false;
9332 }
9333
9334 /* Cost of a libcall.  We assume one insn per argument, an amount for the
9335    call (one insn for -Os) and then one for processing the result.  */
9336 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9337
9338 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)                              \
9339         do                                                              \
9340           {                                                             \
9341             shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);        \
9342             if (shift_op != NULL                                        \
9343                 && arm_rtx_shift_left_p (XEXP (x, IDX)))                \
9344               {                                                         \
9345                 if (shift_reg)                                          \
9346                   {                                                     \
9347                     if (speed_p)                                        \
9348                       *cost += extra_cost->alu.arith_shift_reg; \
9349                     *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);  \
9350                   }                                                     \
9351                 else if (speed_p)                                       \
9352                   *cost += extra_cost->alu.arith_shift;         \
9353                                                                         \
9354                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)     \
9355                           + rtx_cost (XEXP (x, 1 - IDX),                \
9356                                       OP, 1, speed_p));         \
9357                 return true;                                            \
9358               }                                                         \
9359           }                                                             \
9360         while (0);
9361
9362 /* RTX costs.  Make an estimate of the cost of executing the operation
9363    X, which is contained with an operation with code OUTER_CODE.
9364    SPEED_P indicates whether the cost desired is the performance cost,
9365    or the size cost.  The estimate is stored in COST and the return
9366    value is TRUE if the cost calculation is final, or FALSE if the
9367    caller should recurse through the operands of X to add additional
9368    costs.
9369
9370    We currently make no attempt to model the size savings of Thumb-2
9371    16-bit instructions.  At the normal points in compilation where
9372    this code is called we have no measure of whether the condition
9373    flags are live or not, and thus no realistic way to determine what
9374    the size will eventually be.  */
9375 static bool
9376 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9377                    const struct cpu_cost_table *extra_cost,
9378                    int *cost, bool speed_p)
9379 {
9380   machine_mode mode = GET_MODE (x);
9381
9382   if (TARGET_THUMB1)
9383     {
9384       if (speed_p)
9385         *cost = thumb1_rtx_costs (x, code, outer_code);
9386       else
9387         *cost = thumb1_size_rtx_costs (x, code, outer_code);
9388       return true;
9389     }
9390
9391   switch (code)
9392     {
9393     case SET:
9394       *cost = 0;
9395       /* SET RTXs don't have a mode so we get it from the destination.  */
9396       mode = GET_MODE (SET_DEST (x));
9397
9398       if (REG_P (SET_SRC (x))
9399           && REG_P (SET_DEST (x)))
9400         {
9401           /* Assume that most copies can be done with a single insn,
9402              unless we don't have HW FP, in which case everything
9403              larger than word mode will require two insns.  */
9404           *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9405                                    && GET_MODE_SIZE (mode) > 4)
9406                                   || mode == DImode)
9407                                  ? 2 : 1);
9408           /* Conditional register moves can be encoded
9409              in 16 bits in Thumb mode.  */
9410           if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9411             *cost >>= 1;
9412
9413           return true;
9414         }
9415
9416       if (CONST_INT_P (SET_SRC (x)))
9417         {
9418           /* Handle CONST_INT here, since the value doesn't have a mode
9419              and we would otherwise be unable to work out the true cost.  */
9420           *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9421           outer_code = SET;
9422           /* Slightly lower the cost of setting a core reg to a constant.
9423              This helps break up chains and allows for better scheduling.  */
9424           if (REG_P (SET_DEST (x))
9425               && REGNO (SET_DEST (x)) <= LR_REGNUM)
9426             *cost -= 1;
9427           x = SET_SRC (x);
9428           /* Immediate moves with an immediate in the range [0, 255] can be
9429              encoded in 16 bits in Thumb mode.  */
9430           if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9431               && INTVAL (x) >= 0 && INTVAL (x) <=255)
9432             *cost >>= 1;
9433           goto const_int_cost;
9434         }
9435
9436       return false;
9437
9438     case MEM:
9439       /* A memory access costs 1 insn if the mode is small, or the address is
9440          a single register, otherwise it costs one insn per word.  */
9441       if (REG_P (XEXP (x, 0)))
9442         *cost = COSTS_N_INSNS (1);
9443       else if (flag_pic
9444                && GET_CODE (XEXP (x, 0)) == PLUS
9445                && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9446         /* This will be split into two instructions.
9447            See arm.md:calculate_pic_address.  */
9448         *cost = COSTS_N_INSNS (2);
9449       else
9450         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9451
9452       /* For speed optimizations, add the costs of the address and
9453          accessing memory.  */
9454       if (speed_p)
9455 #ifdef NOT_YET
9456         *cost += (extra_cost->ldst.load
9457                   + arm_address_cost (XEXP (x, 0), mode,
9458                                       ADDR_SPACE_GENERIC, speed_p));
9459 #else
9460         *cost += extra_cost->ldst.load;
9461 #endif
9462       return true;
9463
9464     case PARALLEL:
9465     {
9466    /* Calculations of LDM costs are complex.  We assume an initial cost
9467    (ldm_1st) which will load the number of registers mentioned in
9468    ldm_regs_per_insn_1st registers; then each additional
9469    ldm_regs_per_insn_subsequent registers cost one more insn.  The
9470    formula for N regs is thus:
9471
9472    ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9473                              + ldm_regs_per_insn_subsequent - 1)
9474                             / ldm_regs_per_insn_subsequent).
9475
9476    Additional costs may also be added for addressing.  A similar
9477    formula is used for STM.  */
9478
9479       bool is_ldm = load_multiple_operation (x, SImode);
9480       bool is_stm = store_multiple_operation (x, SImode);
9481
9482       *cost = COSTS_N_INSNS (1);
9483
9484       if (is_ldm || is_stm)
9485         {
9486           if (speed_p)
9487             {
9488               HOST_WIDE_INT nregs = XVECLEN (x, 0);
9489               HOST_WIDE_INT regs_per_insn_1st = is_ldm
9490                                       ? extra_cost->ldst.ldm_regs_per_insn_1st
9491                                       : extra_cost->ldst.stm_regs_per_insn_1st;
9492               HOST_WIDE_INT regs_per_insn_sub = is_ldm
9493                                ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9494                                : extra_cost->ldst.stm_regs_per_insn_subsequent;
9495
9496               *cost += regs_per_insn_1st
9497                        + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9498                                             + regs_per_insn_sub - 1)
9499                                           / regs_per_insn_sub);
9500               return true;
9501             }
9502
9503         }
9504       return false;
9505     }
9506     case DIV:
9507     case UDIV:
9508       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9509           && (mode == SFmode || !TARGET_VFP_SINGLE))
9510         *cost = COSTS_N_INSNS (speed_p
9511                                ? extra_cost->fp[mode != SFmode].div : 1);
9512       else if (mode == SImode && TARGET_IDIV)
9513         *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9514       else
9515         *cost = LIBCALL_COST (2);
9516       return false;     /* All arguments must be in registers.  */
9517
9518     case MOD:
9519     case UMOD:
9520       *cost = LIBCALL_COST (2);
9521       return false;     /* All arguments must be in registers.  */
9522
9523     case ROTATE:
9524       if (mode == SImode && REG_P (XEXP (x, 1)))
9525         {
9526           *cost = (COSTS_N_INSNS (2)
9527                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9528           if (speed_p)
9529             *cost += extra_cost->alu.shift_reg;
9530           return true;
9531         }
9532       /* Fall through */
9533     case ROTATERT:
9534     case ASHIFT:
9535     case LSHIFTRT:
9536     case ASHIFTRT:
9537       if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9538         {
9539           *cost = (COSTS_N_INSNS (3)
9540                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9541           if (speed_p)
9542             *cost += 2 * extra_cost->alu.shift;
9543           return true;
9544         }
9545       else if (mode == SImode)
9546         {
9547           *cost = (COSTS_N_INSNS (1)
9548                    + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9549           /* Slightly disparage register shifts at -Os, but not by much.  */
9550           if (!CONST_INT_P (XEXP (x, 1)))
9551             *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9552                       + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9553           return true;
9554         }
9555       else if (GET_MODE_CLASS (mode) == MODE_INT
9556                && GET_MODE_SIZE (mode) < 4)
9557         {
9558           if (code == ASHIFT)
9559             {
9560               *cost = (COSTS_N_INSNS (1)
9561                        + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9562               /* Slightly disparage register shifts at -Os, but not by
9563                  much.  */
9564               if (!CONST_INT_P (XEXP (x, 1)))
9565                 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9566                           + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9567             }
9568           else if (code == LSHIFTRT || code == ASHIFTRT)
9569             {
9570               if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9571                 {
9572                   /* Can use SBFX/UBFX.  */
9573                   *cost = COSTS_N_INSNS (1);
9574                   if (speed_p)
9575                     *cost += extra_cost->alu.bfx;
9576                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9577                 }
9578               else
9579                 {
9580                   *cost = COSTS_N_INSNS (2);
9581                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9582                   if (speed_p)
9583                     {
9584                       if (CONST_INT_P (XEXP (x, 1)))
9585                         *cost += 2 * extra_cost->alu.shift;
9586                       else
9587                         *cost += (extra_cost->alu.shift
9588                                   + extra_cost->alu.shift_reg);
9589                     }
9590                   else
9591                     /* Slightly disparage register shifts.  */
9592                     *cost += !CONST_INT_P (XEXP (x, 1));
9593                 }
9594             }
9595           else /* Rotates.  */
9596             {
9597               *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9598               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9599               if (speed_p)
9600                 {
9601                   if (CONST_INT_P (XEXP (x, 1)))
9602                     *cost += (2 * extra_cost->alu.shift
9603                               + extra_cost->alu.log_shift);
9604                   else
9605                     *cost += (extra_cost->alu.shift
9606                               + extra_cost->alu.shift_reg
9607                               + extra_cost->alu.log_shift_reg);
9608                 }
9609             }
9610           return true;
9611         }
9612
9613       *cost = LIBCALL_COST (2);
9614       return false;
9615
9616     case BSWAP:
9617       if (arm_arch6)
9618         {
9619           if (mode == SImode)
9620             {
9621               *cost = COSTS_N_INSNS (1);
9622               if (speed_p)
9623                 *cost += extra_cost->alu.rev;
9624
9625               return false;
9626             }
9627         }
9628       else
9629         {
9630         /* No rev instruction available.  Look at arm_legacy_rev
9631            and thumb_legacy_rev for the form of RTL used then.  */
9632           if (TARGET_THUMB)
9633             {
9634               *cost = COSTS_N_INSNS (10);
9635
9636               if (speed_p)
9637                 {
9638                   *cost += 6 * extra_cost->alu.shift;
9639                   *cost += 3 * extra_cost->alu.logical;
9640                 }
9641             }
9642           else
9643             {
9644               *cost = COSTS_N_INSNS (5);
9645
9646               if (speed_p)
9647                 {
9648                   *cost += 2 * extra_cost->alu.shift;
9649                   *cost += extra_cost->alu.arith_shift;
9650                   *cost += 2 * extra_cost->alu.logical;
9651                 }
9652             }
9653           return true;
9654         }
9655       return false;
9656
9657     case MINUS:
9658       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9659           && (mode == SFmode || !TARGET_VFP_SINGLE))
9660         {
9661           *cost = COSTS_N_INSNS (1);
9662           if (GET_CODE (XEXP (x, 0)) == MULT
9663               || GET_CODE (XEXP (x, 1)) == MULT)
9664             {
9665               rtx mul_op0, mul_op1, sub_op;
9666
9667               if (speed_p)
9668                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9669
9670               if (GET_CODE (XEXP (x, 0)) == MULT)
9671                 {
9672                   mul_op0 = XEXP (XEXP (x, 0), 0);
9673                   mul_op1 = XEXP (XEXP (x, 0), 1);
9674                   sub_op = XEXP (x, 1);
9675                 }
9676               else
9677                 {
9678                   mul_op0 = XEXP (XEXP (x, 1), 0);
9679                   mul_op1 = XEXP (XEXP (x, 1), 1);
9680                   sub_op = XEXP (x, 0);
9681                 }
9682
9683               /* The first operand of the multiply may be optionally
9684                  negated.  */
9685               if (GET_CODE (mul_op0) == NEG)
9686                 mul_op0 = XEXP (mul_op0, 0);
9687
9688               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9689                         + rtx_cost (mul_op1, code, 0, speed_p)
9690                         + rtx_cost (sub_op, code, 0, speed_p));
9691
9692               return true;
9693             }
9694
9695           if (speed_p)
9696             *cost += extra_cost->fp[mode != SFmode].addsub;
9697           return false;
9698         }
9699
9700       if (mode == SImode)
9701         {
9702           rtx shift_by_reg = NULL;
9703           rtx shift_op;
9704           rtx non_shift_op;
9705
9706           *cost = COSTS_N_INSNS (1);
9707
9708           shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9709           if (shift_op == NULL)
9710             {
9711               shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9712               non_shift_op = XEXP (x, 0);
9713             }
9714           else
9715             non_shift_op = XEXP (x, 1);
9716
9717           if (shift_op != NULL)
9718             {
9719               if (shift_by_reg != NULL)
9720                 {
9721                   if (speed_p)
9722                     *cost += extra_cost->alu.arith_shift_reg;
9723                   *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9724                 }
9725               else if (speed_p)
9726                 *cost += extra_cost->alu.arith_shift;
9727
9728               *cost += (rtx_cost (shift_op, code, 0, speed_p)
9729                         + rtx_cost (non_shift_op, code, 0, speed_p));
9730               return true;
9731             }
9732
9733           if (arm_arch_thumb2
9734               && GET_CODE (XEXP (x, 1)) == MULT)
9735             {
9736               /* MLS.  */
9737               if (speed_p)
9738                 *cost += extra_cost->mult[0].add;
9739               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9740                         + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9741                         + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9742               return true;
9743             }
9744
9745           if (CONST_INT_P (XEXP (x, 0)))
9746             {
9747               int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9748                                             INTVAL (XEXP (x, 0)), NULL_RTX,
9749                                             NULL_RTX, 1, 0);
9750               *cost = COSTS_N_INSNS (insns);
9751               if (speed_p)
9752                 *cost += insns * extra_cost->alu.arith;
9753               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9754               return true;
9755             }
9756           else if (speed_p)
9757             *cost += extra_cost->alu.arith;
9758
9759           return false;
9760         }
9761
9762       if (GET_MODE_CLASS (mode) == MODE_INT
9763           && GET_MODE_SIZE (mode) < 4)
9764         {
9765           rtx shift_op, shift_reg;
9766           shift_reg = NULL;
9767
9768           /* We check both sides of the MINUS for shifter operands since,
9769              unlike PLUS, it's not commutative.  */
9770
9771           HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9772           HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9773
9774           /* Slightly disparage, as we might need to widen the result.  */
9775           *cost = 1 + COSTS_N_INSNS (1);
9776           if (speed_p)
9777             *cost += extra_cost->alu.arith;
9778
9779           if (CONST_INT_P (XEXP (x, 0)))
9780             {
9781               *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9782               return true;
9783             }
9784
9785           return false;
9786         }
9787
9788       if (mode == DImode)
9789         {
9790           *cost = COSTS_N_INSNS (2);
9791
9792           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9793             {
9794               rtx op1 = XEXP (x, 1);
9795
9796               if (speed_p)
9797                 *cost += 2 * extra_cost->alu.arith;
9798
9799               if (GET_CODE (op1) == ZERO_EXTEND)
9800                 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9801               else
9802                 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9803               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9804                                  0, speed_p);
9805               return true;
9806             }
9807           else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9808             {
9809               if (speed_p)
9810                 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9811               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9812                                   0, speed_p)
9813                         + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9814               return true;
9815             }
9816           else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9817                    || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9818             {
9819               if (speed_p)
9820                 *cost += (extra_cost->alu.arith
9821                           + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9822                              ? extra_cost->alu.arith
9823                              : extra_cost->alu.arith_shift));
9824               *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9825                         + rtx_cost (XEXP (XEXP (x, 1), 0),
9826                                     GET_CODE (XEXP (x, 1)), 0, speed_p));
9827               return true;
9828             }
9829
9830           if (speed_p)
9831             *cost += 2 * extra_cost->alu.arith;
9832           return false;
9833         }
9834
9835       /* Vector mode?  */
9836
9837       *cost = LIBCALL_COST (2);
9838       return false;
9839
9840     case PLUS:
9841       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9842           && (mode == SFmode || !TARGET_VFP_SINGLE))
9843         {
9844           *cost = COSTS_N_INSNS (1);
9845           if (GET_CODE (XEXP (x, 0)) == MULT)
9846             {
9847               rtx mul_op0, mul_op1, add_op;
9848
9849               if (speed_p)
9850                 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9851
9852               mul_op0 = XEXP (XEXP (x, 0), 0);
9853               mul_op1 = XEXP (XEXP (x, 0), 1);
9854               add_op = XEXP (x, 1);
9855
9856               *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9857                         + rtx_cost (mul_op1, code, 0, speed_p)
9858                         + rtx_cost (add_op, code, 0, speed_p));
9859
9860               return true;
9861             }
9862
9863           if (speed_p)
9864             *cost += extra_cost->fp[mode != SFmode].addsub;
9865           return false;
9866         }
9867       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9868         {
9869           *cost = LIBCALL_COST (2);
9870           return false;
9871         }
9872
9873         /* Narrow modes can be synthesized in SImode, but the range
9874            of useful sub-operations is limited.  Check for shift operations
9875            on one of the operands.  Only left shifts can be used in the
9876            narrow modes.  */
9877       if (GET_MODE_CLASS (mode) == MODE_INT
9878           && GET_MODE_SIZE (mode) < 4)
9879         {
9880           rtx shift_op, shift_reg;
9881           shift_reg = NULL;
9882
9883           HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9884
9885           if (CONST_INT_P (XEXP (x, 1)))
9886             {
9887               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9888                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9889                                             NULL_RTX, 1, 0);
9890               *cost = COSTS_N_INSNS (insns);
9891               if (speed_p)
9892                 *cost += insns * extra_cost->alu.arith;
9893               /* Slightly penalize a narrow operation as the result may
9894                  need widening.  */
9895               *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9896               return true;
9897             }
9898
9899           /* Slightly penalize a narrow operation as the result may
9900              need widening.  */
9901           *cost = 1 + COSTS_N_INSNS (1);
9902           if (speed_p)
9903             *cost += extra_cost->alu.arith;
9904
9905           return false;
9906         }
9907
9908       if (mode == SImode)
9909         {
9910           rtx shift_op, shift_reg;
9911
9912           *cost = COSTS_N_INSNS (1);
9913           if (TARGET_INT_SIMD
9914               && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9915                   || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9916             {
9917               /* UXTA[BH] or SXTA[BH].  */
9918               if (speed_p)
9919                 *cost += extra_cost->alu.extend_arith;
9920               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9921                                   speed_p)
9922                         + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9923               return true;
9924             }
9925
9926           shift_reg = NULL;
9927           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9928           if (shift_op != NULL)
9929             {
9930               if (shift_reg)
9931                 {
9932                   if (speed_p)
9933                     *cost += extra_cost->alu.arith_shift_reg;
9934                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9935                 }
9936               else if (speed_p)
9937                 *cost += extra_cost->alu.arith_shift;
9938
9939               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9940                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9941               return true;
9942             }
9943           if (GET_CODE (XEXP (x, 0)) == MULT)
9944             {
9945               rtx mul_op = XEXP (x, 0);
9946
9947               *cost = COSTS_N_INSNS (1);
9948
9949               if (TARGET_DSP_MULTIPLY
9950                   && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9951                        && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9952                            || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9953                                && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9954                                && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9955                       || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9956                           && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9957                           && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9958                           && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9959                               || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9960                                   && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9961                                   && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9962                                       == 16))))))
9963                 {
9964                   /* SMLA[BT][BT].  */
9965                   if (speed_p)
9966                     *cost += extra_cost->mult[0].extend_add;
9967                   *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9968                                       SIGN_EXTEND, 0, speed_p)
9969                             + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9970                                         SIGN_EXTEND, 0, speed_p)
9971                             + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9972                   return true;
9973                 }
9974
9975               if (speed_p)
9976                 *cost += extra_cost->mult[0].add;
9977               *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9978                         + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9979                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9980               return true;
9981             }
9982           if (CONST_INT_P (XEXP (x, 1)))
9983             {
9984               int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9985                                             INTVAL (XEXP (x, 1)), NULL_RTX,
9986                                             NULL_RTX, 1, 0);
9987               *cost = COSTS_N_INSNS (insns);
9988               if (speed_p)
9989                 *cost += insns * extra_cost->alu.arith;
9990               *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9991               return true;
9992             }
9993           else if (speed_p)
9994             *cost += extra_cost->alu.arith;
9995
9996           return false;
9997         }
9998
9999       if (mode == DImode)
10000         {
10001           if (arm_arch3m
10002               && GET_CODE (XEXP (x, 0)) == MULT
10003               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10004                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10005                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10006                       && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10007             {
10008               *cost = COSTS_N_INSNS (1);
10009               if (speed_p)
10010                 *cost += extra_cost->mult[1].extend_add;
10011               *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
10012                                   ZERO_EXTEND, 0, speed_p)
10013                         + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
10014                                     ZERO_EXTEND, 0, speed_p)
10015                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10016               return true;
10017             }
10018
10019           *cost = COSTS_N_INSNS (2);
10020
10021           if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10022               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10023             {
10024               if (speed_p)
10025                 *cost += (extra_cost->alu.arith
10026                           + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10027                              ? extra_cost->alu.arith
10028                              : extra_cost->alu.arith_shift));
10029
10030               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10031                                   speed_p)
10032                         + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10033               return true;
10034             }
10035
10036           if (speed_p)
10037             *cost += 2 * extra_cost->alu.arith;
10038           return false;
10039         }
10040
10041       /* Vector mode?  */
10042       *cost = LIBCALL_COST (2);
10043       return false;
10044     case IOR:
10045       if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10046         {
10047           *cost = COSTS_N_INSNS (1);
10048           if (speed_p)
10049             *cost += extra_cost->alu.rev;
10050
10051           return true;
10052         }
10053     /* Fall through.  */
10054     case AND: case XOR:
10055       if (mode == SImode)
10056         {
10057           enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10058           rtx op0 = XEXP (x, 0);
10059           rtx shift_op, shift_reg;
10060
10061           *cost = COSTS_N_INSNS (1);
10062
10063           if (subcode == NOT
10064               && (code == AND
10065                   || (code == IOR && TARGET_THUMB2)))
10066             op0 = XEXP (op0, 0);
10067
10068           shift_reg = NULL;
10069           shift_op = shifter_op_p (op0, &shift_reg);
10070           if (shift_op != NULL)
10071             {
10072               if (shift_reg)
10073                 {
10074                   if (speed_p)
10075                     *cost += extra_cost->alu.log_shift_reg;
10076                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10077                 }
10078               else if (speed_p)
10079                 *cost += extra_cost->alu.log_shift;
10080
10081               *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10082                         + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10083               return true;
10084             }
10085
10086           if (CONST_INT_P (XEXP (x, 1)))
10087             {
10088               int insns = arm_gen_constant (code, SImode, NULL_RTX,
10089                                             INTVAL (XEXP (x, 1)), NULL_RTX,
10090                                             NULL_RTX, 1, 0);
10091
10092               *cost = COSTS_N_INSNS (insns);
10093               if (speed_p)
10094                 *cost += insns * extra_cost->alu.logical;
10095               *cost += rtx_cost (op0, code, 0, speed_p);
10096               return true;
10097             }
10098
10099           if (speed_p)
10100             *cost += extra_cost->alu.logical;
10101           *cost += (rtx_cost (op0, code, 0, speed_p)
10102                     + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10103           return true;
10104         }
10105
10106       if (mode == DImode)
10107         {
10108           rtx op0 = XEXP (x, 0);
10109           enum rtx_code subcode = GET_CODE (op0);
10110
10111           *cost = COSTS_N_INSNS (2);
10112
10113           if (subcode == NOT
10114               && (code == AND
10115                   || (code == IOR && TARGET_THUMB2)))
10116             op0 = XEXP (op0, 0);
10117
10118           if (GET_CODE (op0) == ZERO_EXTEND)
10119             {
10120               if (speed_p)
10121                 *cost += 2 * extra_cost->alu.logical;
10122
10123               *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10124                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10125               return true;
10126             }
10127           else if (GET_CODE (op0) == SIGN_EXTEND)
10128             {
10129               if (speed_p)
10130                 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10131
10132               *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10133                         + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10134               return true;
10135             }
10136
10137           if (speed_p)
10138             *cost += 2 * extra_cost->alu.logical;
10139
10140           return true;
10141         }
10142       /* Vector mode?  */
10143
10144       *cost = LIBCALL_COST (2);
10145       return false;
10146
10147     case MULT:
10148       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10149           && (mode == SFmode || !TARGET_VFP_SINGLE))
10150         {
10151           rtx op0 = XEXP (x, 0);
10152
10153           *cost = COSTS_N_INSNS (1);
10154
10155           if (GET_CODE (op0) == NEG)
10156             op0 = XEXP (op0, 0);
10157
10158           if (speed_p)
10159             *cost += extra_cost->fp[mode != SFmode].mult;
10160
10161           *cost += (rtx_cost (op0, MULT, 0, speed_p)
10162                     + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10163           return true;
10164         }
10165       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10166         {
10167           *cost = LIBCALL_COST (2);
10168           return false;
10169         }
10170
10171       if (mode == SImode)
10172         {
10173           *cost = COSTS_N_INSNS (1);
10174           if (TARGET_DSP_MULTIPLY
10175               && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10176                    && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10177                        || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10178                            && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10179                            && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10180                   || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10181                       && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10182                       && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10183                       && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10184                           || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10185                               && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10186                               && (INTVAL (XEXP (XEXP (x, 1), 1))
10187                                   == 16))))))
10188             {
10189               /* SMUL[TB][TB].  */
10190               if (speed_p)
10191                 *cost += extra_cost->mult[0].extend;
10192               *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10193                         + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10194               return true;
10195             }
10196           if (speed_p)
10197             *cost += extra_cost->mult[0].simple;
10198           return false;
10199         }
10200
10201       if (mode == DImode)
10202         {
10203           if (arm_arch3m
10204               && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10205                    && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10206                   || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10207                       && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10208             {
10209               *cost = COSTS_N_INSNS (1);
10210               if (speed_p)
10211                 *cost += extra_cost->mult[1].extend;
10212               *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10213                                   ZERO_EXTEND, 0, speed_p)
10214                         + rtx_cost (XEXP (XEXP (x, 1), 0),
10215                                     ZERO_EXTEND, 0, speed_p));
10216               return true;
10217             }
10218
10219           *cost = LIBCALL_COST (2);
10220           return false;
10221         }
10222
10223       /* Vector mode?  */
10224       *cost = LIBCALL_COST (2);
10225       return false;
10226
10227     case NEG:
10228       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10229           && (mode == SFmode || !TARGET_VFP_SINGLE))
10230         {
10231           *cost = COSTS_N_INSNS (1);
10232           if (speed_p)
10233             *cost += extra_cost->fp[mode != SFmode].neg;
10234
10235           return false;
10236         }
10237       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10238         {
10239           *cost = LIBCALL_COST (1);
10240           return false;
10241         }
10242
10243       if (mode == SImode)
10244         {
10245           if (GET_CODE (XEXP (x, 0)) == ABS)
10246             {
10247               *cost = COSTS_N_INSNS (2);
10248               /* Assume the non-flag-changing variant.  */
10249               if (speed_p)
10250                 *cost += (extra_cost->alu.log_shift
10251                           + extra_cost->alu.arith_shift);
10252               *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10253               return true;
10254             }
10255
10256           if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10257               || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10258             {
10259               *cost = COSTS_N_INSNS (2);
10260               /* No extra cost for MOV imm and MVN imm.  */
10261               /* If the comparison op is using the flags, there's no further
10262                  cost, otherwise we need to add the cost of the comparison.  */
10263               if (!(REG_P (XEXP (XEXP (x, 0), 0))
10264                     && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10265                     && XEXP (XEXP (x, 0), 1) == const0_rtx))
10266                 {
10267                   *cost += (COSTS_N_INSNS (1)
10268                             + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10269                                         speed_p)
10270                             + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10271                                         speed_p));
10272                   if (speed_p)
10273                     *cost += extra_cost->alu.arith;
10274                 }
10275               return true;
10276             }
10277           *cost = COSTS_N_INSNS (1);
10278           if (speed_p)
10279             *cost += extra_cost->alu.arith;
10280           return false;
10281         }
10282
10283       if (GET_MODE_CLASS (mode) == MODE_INT
10284           && GET_MODE_SIZE (mode) < 4)
10285         {
10286           /* Slightly disparage, as we might need an extend operation.  */
10287           *cost = 1 + COSTS_N_INSNS (1);
10288           if (speed_p)
10289             *cost += extra_cost->alu.arith;
10290           return false;
10291         }
10292
10293       if (mode == DImode)
10294         {
10295           *cost = COSTS_N_INSNS (2);
10296           if (speed_p)
10297             *cost += 2 * extra_cost->alu.arith;
10298           return false;
10299         }
10300
10301       /* Vector mode?  */
10302       *cost = LIBCALL_COST (1);
10303       return false;
10304
10305     case NOT:
10306       if (mode == SImode)
10307         {
10308           rtx shift_op;
10309           rtx shift_reg = NULL;
10310
10311           *cost = COSTS_N_INSNS (1);
10312           shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10313
10314           if (shift_op)
10315             {
10316               if (shift_reg != NULL)
10317                 {
10318                   if (speed_p)
10319                     *cost += extra_cost->alu.log_shift_reg;
10320                   *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10321                 }
10322               else if (speed_p)
10323                 *cost += extra_cost->alu.log_shift;
10324               *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10325               return true;
10326             }
10327
10328           if (speed_p)
10329             *cost += extra_cost->alu.logical;
10330           return false;
10331         }
10332       if (mode == DImode)
10333         {
10334           *cost = COSTS_N_INSNS (2);
10335           return false;
10336         }
10337
10338       /* Vector mode?  */
10339
10340       *cost += LIBCALL_COST (1);
10341       return false;
10342
10343     case IF_THEN_ELSE:
10344       {
10345         if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10346           {
10347             *cost = COSTS_N_INSNS (4);
10348             return true;
10349           }
10350         int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10351         int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10352
10353         *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10354         /* Assume that if one arm of the if_then_else is a register,
10355            that it will be tied with the result and eliminate the
10356            conditional insn.  */
10357         if (REG_P (XEXP (x, 1)))
10358           *cost += op2cost;
10359         else if (REG_P (XEXP (x, 2)))
10360           *cost += op1cost;
10361         else
10362           {
10363             if (speed_p)
10364               {
10365                 if (extra_cost->alu.non_exec_costs_exec)
10366                   *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10367                 else
10368                   *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10369               }
10370             else
10371               *cost += op1cost + op2cost;
10372           }
10373       }
10374       return true;
10375
10376     case COMPARE:
10377       if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10378         *cost = 0;
10379       else
10380         {
10381           machine_mode op0mode;
10382           /* We'll mostly assume that the cost of a compare is the cost of the
10383              LHS.  However, there are some notable exceptions.  */
10384
10385           /* Floating point compares are never done as side-effects.  */
10386           op0mode = GET_MODE (XEXP (x, 0));
10387           if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10388               && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10389             {
10390               *cost = COSTS_N_INSNS (1);
10391               if (speed_p)
10392                 *cost += extra_cost->fp[op0mode != SFmode].compare;
10393
10394               if (XEXP (x, 1) == CONST0_RTX (op0mode))
10395                 {
10396                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10397                   return true;
10398                 }
10399
10400               return false;
10401             }
10402           else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10403             {
10404               *cost = LIBCALL_COST (2);
10405               return false;
10406             }
10407
10408           /* DImode compares normally take two insns.  */
10409           if (op0mode == DImode)
10410             {
10411               *cost = COSTS_N_INSNS (2);
10412               if (speed_p)
10413                 *cost += 2 * extra_cost->alu.arith;
10414               return false;
10415             }
10416
10417           if (op0mode == SImode)
10418             {
10419               rtx shift_op;
10420               rtx shift_reg;
10421
10422               if (XEXP (x, 1) == const0_rtx
10423                   && !(REG_P (XEXP (x, 0))
10424                        || (GET_CODE (XEXP (x, 0)) == SUBREG
10425                            && REG_P (SUBREG_REG (XEXP (x, 0))))))
10426                 {
10427                   *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10428
10429                   /* Multiply operations that set the flags are often
10430                      significantly more expensive.  */
10431                   if (speed_p
10432                       && GET_CODE (XEXP (x, 0)) == MULT
10433                       && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10434                     *cost += extra_cost->mult[0].flag_setting;
10435
10436                   if (speed_p
10437                       && GET_CODE (XEXP (x, 0)) == PLUS
10438                       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10439                       && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10440                                                             0), 1), mode))
10441                     *cost += extra_cost->mult[0].flag_setting;
10442                   return true;
10443                 }
10444
10445               shift_reg = NULL;
10446               shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10447               if (shift_op != NULL)
10448                 {
10449                   *cost = COSTS_N_INSNS (1);
10450                   if (shift_reg != NULL)
10451                     {
10452                       *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10453                       if (speed_p)
10454                         *cost += extra_cost->alu.arith_shift_reg;
10455                     }
10456                   else if (speed_p)
10457                     *cost += extra_cost->alu.arith_shift;
10458                   *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10459                             + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10460                   return true;
10461                 }
10462
10463               *cost = COSTS_N_INSNS (1);
10464               if (speed_p)
10465                 *cost += extra_cost->alu.arith;
10466               if (CONST_INT_P (XEXP (x, 1))
10467                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10468                 {
10469                   *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10470                   return true;
10471                 }
10472               return false;
10473             }
10474
10475           /* Vector mode?  */
10476
10477           *cost = LIBCALL_COST (2);
10478           return false;
10479         }
10480       return true;
10481
10482     case EQ:
10483     case NE:
10484     case LT:
10485     case LE:
10486     case GT:
10487     case GE:
10488     case LTU:
10489     case LEU:
10490     case GEU:
10491     case GTU:
10492     case ORDERED:
10493     case UNORDERED:
10494     case UNEQ:
10495     case UNLE:
10496     case UNLT:
10497     case UNGE:
10498     case UNGT:
10499     case LTGT:
10500       if (outer_code == SET)
10501         {
10502           /* Is it a store-flag operation?  */
10503           if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10504               && XEXP (x, 1) == const0_rtx)
10505             {
10506               /* Thumb also needs an IT insn.  */
10507               *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10508               return true;
10509             }
10510           if (XEXP (x, 1) == const0_rtx)
10511             {
10512               switch (code)
10513                 {
10514                 case LT:
10515                   /* LSR Rd, Rn, #31.  */
10516                   *cost = COSTS_N_INSNS (1);
10517                   if (speed_p)
10518                     *cost += extra_cost->alu.shift;
10519                   break;
10520
10521                 case EQ:
10522                   /* RSBS T1, Rn, #0
10523                      ADC  Rd, Rn, T1.  */
10524
10525                 case NE:
10526                   /* SUBS T1, Rn, #1
10527                      SBC  Rd, Rn, T1.  */
10528                   *cost = COSTS_N_INSNS (2);
10529                   break;
10530
10531                 case LE:
10532                   /* RSBS T1, Rn, Rn, LSR #31
10533                      ADC  Rd, Rn, T1. */
10534                   *cost = COSTS_N_INSNS (2);
10535                   if (speed_p)
10536                     *cost += extra_cost->alu.arith_shift;
10537                   break;
10538
10539                 case GT:
10540                   /* RSB  Rd, Rn, Rn, ASR #1
10541                      LSR  Rd, Rd, #31.  */
10542                   *cost = COSTS_N_INSNS (2);
10543                   if (speed_p)
10544                     *cost += (extra_cost->alu.arith_shift
10545                               + extra_cost->alu.shift);
10546                   break;
10547
10548                 case GE:
10549                   /* ASR  Rd, Rn, #31
10550                      ADD  Rd, Rn, #1.  */
10551                   *cost = COSTS_N_INSNS (2);
10552                   if (speed_p)
10553                     *cost += extra_cost->alu.shift;
10554                   break;
10555
10556                 default:
10557                   /* Remaining cases are either meaningless or would take
10558                      three insns anyway.  */
10559                   *cost = COSTS_N_INSNS (3);
10560                   break;
10561                 }
10562               *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10563               return true;
10564             }
10565           else
10566             {
10567               *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10568               if (CONST_INT_P (XEXP (x, 1))
10569                   && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10570                 {
10571                   *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10572                   return true;
10573                 }
10574
10575               return false;
10576             }
10577         }
10578       /* Not directly inside a set.  If it involves the condition code
10579          register it must be the condition for a branch, cond_exec or
10580          I_T_E operation.  Since the comparison is performed elsewhere
10581          this is just the control part which has no additional
10582          cost.  */
10583       else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10584                && XEXP (x, 1) == const0_rtx)
10585         {
10586           *cost = 0;
10587           return true;
10588         }
10589       return false;
10590
10591     case ABS:
10592       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10593           && (mode == SFmode || !TARGET_VFP_SINGLE))
10594         {
10595           *cost = COSTS_N_INSNS (1);
10596           if (speed_p)
10597             *cost += extra_cost->fp[mode != SFmode].neg;
10598
10599           return false;
10600         }
10601       else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10602         {
10603           *cost = LIBCALL_COST (1);
10604           return false;
10605         }
10606
10607       if (mode == SImode)
10608         {
10609           *cost = COSTS_N_INSNS (1);
10610           if (speed_p)
10611             *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10612           return false;
10613         }
10614       /* Vector mode?  */
10615       *cost = LIBCALL_COST (1);
10616       return false;
10617
10618     case SIGN_EXTEND:
10619       if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10620           && MEM_P (XEXP (x, 0)))
10621         {
10622           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10623
10624           if (mode == DImode)
10625             *cost += COSTS_N_INSNS (1);
10626
10627           if (!speed_p)
10628             return true;
10629
10630           if (GET_MODE (XEXP (x, 0)) == SImode)
10631             *cost += extra_cost->ldst.load;
10632           else
10633             *cost += extra_cost->ldst.load_sign_extend;
10634
10635           if (mode == DImode)
10636             *cost += extra_cost->alu.shift;
10637
10638           return true;
10639         }
10640
10641       /* Widening from less than 32-bits requires an extend operation.  */
10642       if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10643         {
10644           /* We have SXTB/SXTH.  */
10645           *cost = COSTS_N_INSNS (1);
10646           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10647           if (speed_p)
10648             *cost += extra_cost->alu.extend;
10649         }
10650       else if (GET_MODE (XEXP (x, 0)) != SImode)
10651         {
10652           /* Needs two shifts.  */
10653           *cost = COSTS_N_INSNS (2);
10654           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10655           if (speed_p)
10656             *cost += 2 * extra_cost->alu.shift;
10657         }
10658
10659       /* Widening beyond 32-bits requires one more insn.  */
10660       if (mode == DImode)
10661         {
10662           *cost += COSTS_N_INSNS (1);
10663           if (speed_p)
10664             *cost += extra_cost->alu.shift;
10665         }
10666
10667       return true;
10668
10669     case ZERO_EXTEND:
10670       if ((arm_arch4
10671            || GET_MODE (XEXP (x, 0)) == SImode
10672            || GET_MODE (XEXP (x, 0)) == QImode)
10673           && MEM_P (XEXP (x, 0)))
10674         {
10675           *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10676
10677           if (mode == DImode)
10678             *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */
10679
10680           return true;
10681         }
10682
10683       /* Widening from less than 32-bits requires an extend operation.  */
10684       if (GET_MODE (XEXP (x, 0)) == QImode)
10685         {
10686           /* UXTB can be a shorter instruction in Thumb2, but it might
10687              be slower than the AND Rd, Rn, #255 alternative.  When
10688              optimizing for speed it should never be slower to use
10689              AND, and we don't really model 16-bit vs 32-bit insns
10690              here.  */
10691           *cost = COSTS_N_INSNS (1);
10692           if (speed_p)
10693             *cost += extra_cost->alu.logical;
10694         }
10695       else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10696         {
10697           /* We have UXTB/UXTH.  */
10698           *cost = COSTS_N_INSNS (1);
10699           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10700           if (speed_p)
10701             *cost += extra_cost->alu.extend;
10702         }
10703       else if (GET_MODE (XEXP (x, 0)) != SImode)
10704         {
10705           /* Needs two shifts.  It's marginally preferable to use
10706              shifts rather than two BIC instructions as the second
10707              shift may merge with a subsequent insn as a shifter
10708              op.  */
10709           *cost = COSTS_N_INSNS (2);
10710           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10711           if (speed_p)
10712             *cost += 2 * extra_cost->alu.shift;
10713         }
10714       else  /* GET_MODE (XEXP (x, 0)) == SImode.  */
10715         *cost = COSTS_N_INSNS (1);
10716
10717       /* Widening beyond 32-bits requires one more insn.  */
10718       if (mode == DImode)
10719         {
10720           *cost += COSTS_N_INSNS (1);   /* No speed penalty.  */
10721         }
10722
10723       return true;
10724
10725     case CONST_INT:
10726       *cost = 0;
10727       /* CONST_INT has no mode, so we cannot tell for sure how many
10728          insns are really going to be needed.  The best we can do is
10729          look at the value passed.  If it fits in SImode, then assume
10730          that's the mode it will be used for.  Otherwise assume it
10731          will be used in DImode.  */
10732       if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10733         mode = SImode;
10734       else
10735         mode = DImode;
10736
10737       /* Avoid blowing up in arm_gen_constant ().  */
10738       if (!(outer_code == PLUS
10739             || outer_code == AND
10740             || outer_code == IOR
10741             || outer_code == XOR
10742             || outer_code == MINUS))
10743         outer_code = SET;
10744
10745     const_int_cost:
10746       if (mode == SImode)
10747         {
10748           *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10749                                                     INTVAL (x), NULL, NULL,
10750                                                     0, 0));
10751           /* Extra costs?  */
10752         }
10753       else
10754         {
10755           *cost += COSTS_N_INSNS (arm_gen_constant
10756                                   (outer_code, SImode, NULL,
10757                                    trunc_int_for_mode (INTVAL (x), SImode),
10758                                    NULL, NULL, 0, 0)
10759                                   + arm_gen_constant (outer_code, SImode, NULL,
10760                                                       INTVAL (x) >> 32, NULL,
10761                                                       NULL, 0, 0));
10762           /* Extra costs?  */
10763         }
10764
10765       return true;
10766
10767     case CONST:
10768     case LABEL_REF:
10769     case SYMBOL_REF:
10770       if (speed_p)
10771         {
10772           if (arm_arch_thumb2 && !flag_pic)
10773             *cost = COSTS_N_INSNS (2);
10774           else
10775             *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10776         }
10777       else
10778         *cost = COSTS_N_INSNS (2);
10779
10780       if (flag_pic)
10781         {
10782           *cost += COSTS_N_INSNS (1);
10783           if (speed_p)
10784             *cost += extra_cost->alu.arith;
10785         }
10786
10787       return true;
10788
10789     case CONST_FIXED:
10790       *cost = COSTS_N_INSNS (4);
10791       /* Fixme.  */
10792       return true;
10793
10794     case CONST_DOUBLE:
10795       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10796           && (mode == SFmode || !TARGET_VFP_SINGLE))
10797         {
10798           if (vfp3_const_double_rtx (x))
10799             {
10800               *cost = COSTS_N_INSNS (1);
10801               if (speed_p)
10802                 *cost += extra_cost->fp[mode == DFmode].fpconst;
10803               return true;
10804             }
10805
10806           if (speed_p)
10807             {
10808               *cost = COSTS_N_INSNS (1);
10809               if (mode == DFmode)
10810                 *cost += extra_cost->ldst.loadd;
10811               else
10812                 *cost += extra_cost->ldst.loadf;
10813             }
10814           else
10815             *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10816
10817           return true;
10818         }
10819       *cost = COSTS_N_INSNS (4);
10820       return true;
10821
10822     case CONST_VECTOR:
10823       /* Fixme.  */
10824       if (TARGET_NEON
10825           && TARGET_HARD_FLOAT
10826           && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10827           && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10828         *cost = COSTS_N_INSNS (1);
10829       else
10830         *cost = COSTS_N_INSNS (4);
10831       return true;
10832
10833     case HIGH:
10834     case LO_SUM:
10835       *cost = COSTS_N_INSNS (1);
10836       /* When optimizing for size, we prefer constant pool entries to
10837          MOVW/MOVT pairs, so bump the cost of these slightly.  */
10838       if (!speed_p)
10839         *cost += 1;
10840       return true;
10841
10842     case CLZ:
10843       *cost = COSTS_N_INSNS (1);
10844       if (speed_p)
10845         *cost += extra_cost->alu.clz;
10846       return false;
10847
10848     case SMIN:
10849       if (XEXP (x, 1) == const0_rtx)
10850         {
10851           *cost = COSTS_N_INSNS (1);
10852           if (speed_p)
10853             *cost += extra_cost->alu.log_shift;
10854           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10855           return true;
10856         }
10857       /* Fall through.  */
10858     case SMAX:
10859     case UMIN:
10860     case UMAX:
10861       *cost = COSTS_N_INSNS (2);
10862       return false;
10863
10864     case TRUNCATE:
10865       if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10866           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10867           && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10868           && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10869           && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10870                && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10871               || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10872                   && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10873                       == ZERO_EXTEND))))
10874         {
10875           *cost = COSTS_N_INSNS (1);
10876           if (speed_p)
10877             *cost += extra_cost->mult[1].extend;
10878           *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10879                               speed_p)
10880                     + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10881                                 0, speed_p));
10882           return true;
10883         }
10884       *cost = LIBCALL_COST (1);
10885       return false;
10886
10887     case UNSPEC:
10888       return arm_unspec_cost (x, outer_code, speed_p, cost);
10889
10890     case PC:
10891       /* Reading the PC is like reading any other register.  Writing it
10892          is more expensive, but we take that into account elsewhere.  */
10893       *cost = 0;
10894       return true;
10895
10896     case ZERO_EXTRACT:
10897       /* TODO: Simple zero_extract of bottom bits using AND.  */
10898       /* Fall through.  */
10899     case SIGN_EXTRACT:
10900       if (arm_arch6
10901           && mode == SImode
10902           && CONST_INT_P (XEXP (x, 1))
10903           && CONST_INT_P (XEXP (x, 2)))
10904         {
10905           *cost = COSTS_N_INSNS (1);
10906           if (speed_p)
10907             *cost += extra_cost->alu.bfx;
10908           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10909           return true;
10910         }
10911       /* Without UBFX/SBFX, need to resort to shift operations.  */
10912       *cost = COSTS_N_INSNS (2);
10913       if (speed_p)
10914         *cost += 2 * extra_cost->alu.shift;
10915       *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10916       return true;
10917
10918     case FLOAT_EXTEND:
10919       if (TARGET_HARD_FLOAT)
10920         {
10921           *cost = COSTS_N_INSNS (1);
10922           if (speed_p)
10923             *cost += extra_cost->fp[mode == DFmode].widen;
10924           if (!TARGET_FPU_ARMV8
10925               && GET_MODE (XEXP (x, 0)) == HFmode)
10926             {
10927               /* Pre v8, widening HF->DF is a two-step process, first
10928                  widening to SFmode.  */
10929               *cost += COSTS_N_INSNS (1);
10930               if (speed_p)
10931                 *cost += extra_cost->fp[0].widen;
10932             }
10933           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10934           return true;
10935         }
10936
10937       *cost = LIBCALL_COST (1);
10938       return false;
10939
10940     case FLOAT_TRUNCATE:
10941       if (TARGET_HARD_FLOAT)
10942         {
10943           *cost = COSTS_N_INSNS (1);
10944           if (speed_p)
10945             *cost += extra_cost->fp[mode == DFmode].narrow;
10946           *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10947           return true;
10948           /* Vector modes?  */
10949         }
10950       *cost = LIBCALL_COST (1);
10951       return false;
10952
10953     case FMA:
10954       if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10955         {
10956           rtx op0 = XEXP (x, 0);
10957           rtx op1 = XEXP (x, 1);
10958           rtx op2 = XEXP (x, 2);
10959
10960           *cost = COSTS_N_INSNS (1);
10961
10962           /* vfms or vfnma.  */
10963           if (GET_CODE (op0) == NEG)
10964             op0 = XEXP (op0, 0);
10965
10966           /* vfnms or vfnma.  */
10967           if (GET_CODE (op2) == NEG)
10968             op2 = XEXP (op2, 0);
10969
10970           *cost += rtx_cost (op0, FMA, 0, speed_p);
10971           *cost += rtx_cost (op1, FMA, 1, speed_p);
10972           *cost += rtx_cost (op2, FMA, 2, speed_p);
10973
10974           if (speed_p)
10975             *cost += extra_cost->fp[mode ==DFmode].fma;
10976
10977           return true;
10978         }
10979
10980       *cost = LIBCALL_COST (3);
10981       return false;
10982
10983     case FIX:
10984     case UNSIGNED_FIX:
10985       if (TARGET_HARD_FLOAT)
10986         {
10987           if (GET_MODE_CLASS (mode) == MODE_INT)
10988             {
10989               *cost = COSTS_N_INSNS (1);
10990               if (speed_p)
10991                 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10992               /* Strip of the 'cost' of rounding towards zero.  */
10993               if (GET_CODE (XEXP (x, 0)) == FIX)
10994                 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10995               else
10996                 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10997               /* ??? Increase the cost to deal with transferring from
10998                  FP -> CORE registers?  */
10999               return true;
11000             }
11001           else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11002                    && TARGET_FPU_ARMV8)
11003             {
11004               *cost = COSTS_N_INSNS (1);
11005               if (speed_p)
11006                 *cost += extra_cost->fp[mode == DFmode].roundint;
11007               return false;
11008             }
11009           /* Vector costs? */
11010         }
11011       *cost = LIBCALL_COST (1);
11012       return false;
11013
11014     case FLOAT:
11015     case UNSIGNED_FLOAT:
11016       if (TARGET_HARD_FLOAT)
11017         {
11018           /* ??? Increase the cost to deal with transferring from CORE
11019              -> FP registers?  */
11020           *cost = COSTS_N_INSNS (1);
11021           if (speed_p)
11022             *cost += extra_cost->fp[mode == DFmode].fromint;
11023           return false;
11024         }
11025       *cost = LIBCALL_COST (1);
11026       return false;
11027
11028     case CALL:
11029       *cost = COSTS_N_INSNS (1);
11030       return true;
11031
11032     case ASM_OPERANDS:
11033       {
11034       /* Just a guess.  Guess number of instructions in the asm
11035          plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
11036          though (see PR60663).  */
11037         int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11038         int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11039
11040         *cost = COSTS_N_INSNS (asm_length + num_operands);
11041         return true;
11042       }
11043     default:
11044       if (mode != VOIDmode)
11045         *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11046       else
11047         *cost = COSTS_N_INSNS (4); /* Who knows?  */
11048       return false;
11049     }
11050 }
11051
11052 #undef HANDLE_NARROW_SHIFT_ARITH
11053
11054 /* RTX costs when optimizing for size.  */
11055 static bool
11056 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11057                int *total, bool speed)
11058 {
11059   bool result;
11060
11061   if (TARGET_OLD_RTX_COSTS
11062       || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11063     {
11064       /* Old way.  (Deprecated.)  */
11065       if (!speed)
11066         result = arm_size_rtx_costs (x, (enum rtx_code) code,
11067                                      (enum rtx_code) outer_code, total);
11068       else
11069         result = current_tune->rtx_costs (x,  (enum rtx_code) code,
11070                                           (enum rtx_code) outer_code, total,
11071                                           speed);
11072     }
11073   else
11074     {
11075     /* New way.  */
11076       if (current_tune->insn_extra_cost)
11077         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11078                                      (enum rtx_code) outer_code,
11079                                      current_tune->insn_extra_cost,
11080                                      total, speed);
11081     /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11082        && current_tune->insn_extra_cost != NULL  */
11083       else
11084         result =  arm_new_rtx_costs (x, (enum rtx_code) code,
11085                                     (enum rtx_code) outer_code,
11086                                     &generic_extra_costs, total, speed);
11087     }
11088
11089   if (dump_file && (dump_flags & TDF_DETAILS))
11090     {
11091       print_rtl_single (dump_file, x);
11092       fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11093                *total, result ? "final" : "partial");
11094     }
11095   return result;
11096 }
11097
11098 /* RTX costs for cores with a slow MUL implementation.  Thumb-2 is not
11099    supported on any "slowmul" cores, so it can be ignored.  */
11100
11101 static bool
11102 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11103                        int *total, bool speed)
11104 {
11105   machine_mode mode = GET_MODE (x);
11106
11107   if (TARGET_THUMB)
11108     {
11109       *total = thumb1_rtx_costs (x, code, outer_code);
11110       return true;
11111     }
11112
11113   switch (code)
11114     {
11115     case MULT:
11116       if (GET_MODE_CLASS (mode) == MODE_FLOAT
11117           || mode == DImode)
11118         {
11119           *total = COSTS_N_INSNS (20);
11120           return false;
11121         }
11122
11123       if (CONST_INT_P (XEXP (x, 1)))
11124         {
11125           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11126                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11127           int cost, const_ok = const_ok_for_arm (i);
11128           int j, booth_unit_size;
11129
11130           /* Tune as appropriate.  */
11131           cost = const_ok ? 4 : 8;
11132           booth_unit_size = 2;
11133           for (j = 0; i && j < 32; j += booth_unit_size)
11134             {
11135               i >>= booth_unit_size;
11136               cost++;
11137             }
11138
11139           *total = COSTS_N_INSNS (cost);
11140           *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11141           return true;
11142         }
11143
11144       *total = COSTS_N_INSNS (20);
11145       return false;
11146
11147     default:
11148       return arm_rtx_costs_1 (x, outer_code, total, speed);;
11149     }
11150 }
11151
11152
11153 /* RTX cost for cores with a fast multiply unit (M variants).  */
11154
11155 static bool
11156 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11157                        int *total, bool speed)
11158 {
11159   machine_mode mode = GET_MODE (x);
11160
11161   if (TARGET_THUMB1)
11162     {
11163       *total = thumb1_rtx_costs (x, code, outer_code);
11164       return true;
11165     }
11166
11167   /* ??? should thumb2 use different costs?  */
11168   switch (code)
11169     {
11170     case MULT:
11171       /* There is no point basing this on the tuning, since it is always the
11172          fast variant if it exists at all.  */
11173       if (mode == DImode
11174           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11175           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11176               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11177         {
11178           *total = COSTS_N_INSNS(2);
11179           return false;
11180         }
11181
11182
11183       if (mode == DImode)
11184         {
11185           *total = COSTS_N_INSNS (5);
11186           return false;
11187         }
11188
11189       if (CONST_INT_P (XEXP (x, 1)))
11190         {
11191           unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11192                                       & (unsigned HOST_WIDE_INT) 0xffffffff);
11193           int cost, const_ok = const_ok_for_arm (i);
11194           int j, booth_unit_size;
11195
11196           /* Tune as appropriate.  */
11197           cost = const_ok ? 4 : 8;
11198           booth_unit_size = 8;
11199           for (j = 0; i && j < 32; j += booth_unit_size)
11200             {
11201               i >>= booth_unit_size;
11202               cost++;
11203             }
11204
11205           *total = COSTS_N_INSNS(cost);
11206           return false;
11207         }
11208
11209       if (mode == SImode)
11210         {
11211           *total = COSTS_N_INSNS (4);
11212           return false;
11213         }
11214
11215       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11216         {
11217           if (TARGET_HARD_FLOAT
11218               && (mode == SFmode
11219                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11220             {
11221               *total = COSTS_N_INSNS (1);
11222               return false;
11223             }
11224         }
11225
11226       /* Requires a lib call */
11227       *total = COSTS_N_INSNS (20);
11228       return false;
11229
11230     default:
11231       return arm_rtx_costs_1 (x, outer_code, total, speed);
11232     }
11233 }
11234
11235
11236 /* RTX cost for XScale CPUs.  Thumb-2 is not supported on any xscale cores,
11237    so it can be ignored.  */
11238
11239 static bool
11240 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11241                       int *total, bool speed)
11242 {
11243   machine_mode mode = GET_MODE (x);
11244
11245   if (TARGET_THUMB)
11246     {
11247       *total = thumb1_rtx_costs (x, code, outer_code);
11248       return true;
11249     }
11250
11251   switch (code)
11252     {
11253     case COMPARE:
11254       if (GET_CODE (XEXP (x, 0)) != MULT)
11255         return arm_rtx_costs_1 (x, outer_code, total, speed);
11256
11257       /* A COMPARE of a MULT is slow on XScale; the muls instruction
11258          will stall until the multiplication is complete.  */
11259       *total = COSTS_N_INSNS (3);
11260       return false;
11261
11262     case MULT:
11263       /* There is no point basing this on the tuning, since it is always the
11264          fast variant if it exists at all.  */
11265       if (mode == DImode
11266           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11267           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11268               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11269         {
11270           *total = COSTS_N_INSNS (2);
11271           return false;
11272         }
11273
11274
11275       if (mode == DImode)
11276         {
11277           *total = COSTS_N_INSNS (5);
11278           return false;
11279         }
11280
11281       if (CONST_INT_P (XEXP (x, 1)))
11282         {
11283           /* If operand 1 is a constant we can more accurately
11284              calculate the cost of the multiply.  The multiplier can
11285              retire 15 bits on the first cycle and a further 12 on the
11286              second.  We do, of course, have to load the constant into
11287              a register first.  */
11288           unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11289           /* There's a general overhead of one cycle.  */
11290           int cost = 1;
11291           unsigned HOST_WIDE_INT masked_const;
11292
11293           if (i & 0x80000000)
11294             i = ~i;
11295
11296           i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11297
11298           masked_const = i & 0xffff8000;
11299           if (masked_const != 0)
11300             {
11301               cost++;
11302               masked_const = i & 0xf8000000;
11303               if (masked_const != 0)
11304                 cost++;
11305             }
11306           *total = COSTS_N_INSNS (cost);
11307           return false;
11308         }
11309
11310       if (mode == SImode)
11311         {
11312           *total = COSTS_N_INSNS (3);
11313           return false;
11314         }
11315
11316       /* Requires a lib call */
11317       *total = COSTS_N_INSNS (20);
11318       return false;
11319
11320     default:
11321       return arm_rtx_costs_1 (x, outer_code, total, speed);
11322     }
11323 }
11324
11325
11326 /* RTX costs for 9e (and later) cores.  */
11327
11328 static bool
11329 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11330                   int *total, bool speed)
11331 {
11332   machine_mode mode = GET_MODE (x);
11333
11334   if (TARGET_THUMB1)
11335     {
11336       switch (code)
11337         {
11338         case MULT:
11339           /* Small multiply: 32 cycles for an integer multiply inst.  */
11340           if (arm_arch6m && arm_m_profile_small_mul)
11341             *total = COSTS_N_INSNS (32);
11342           else
11343             *total = COSTS_N_INSNS (3);
11344           return true;
11345
11346         default:
11347           *total = thumb1_rtx_costs (x, code, outer_code);
11348           return true;
11349         }
11350     }
11351
11352   switch (code)
11353     {
11354     case MULT:
11355       /* There is no point basing this on the tuning, since it is always the
11356          fast variant if it exists at all.  */
11357       if (mode == DImode
11358           && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11359           && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11360               || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11361         {
11362           *total = COSTS_N_INSNS (2);
11363           return false;
11364         }
11365
11366
11367       if (mode == DImode)
11368         {
11369           *total = COSTS_N_INSNS (5);
11370           return false;
11371         }
11372
11373       if (mode == SImode)
11374         {
11375           *total = COSTS_N_INSNS (2);
11376           return false;
11377         }
11378
11379       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11380         {
11381           if (TARGET_HARD_FLOAT
11382               && (mode == SFmode
11383                   || (mode == DFmode && !TARGET_VFP_SINGLE)))
11384             {
11385               *total = COSTS_N_INSNS (1);
11386               return false;
11387             }
11388         }
11389
11390       *total = COSTS_N_INSNS (20);
11391       return false;
11392
11393     default:
11394       return arm_rtx_costs_1 (x, outer_code, total, speed);
11395     }
11396 }
11397 /* All address computations that can be done are free, but rtx cost returns
11398    the same for practically all of them.  So we weight the different types
11399    of address here in the order (most pref first):
11400    PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
11401 static inline int
11402 arm_arm_address_cost (rtx x)
11403 {
11404   enum rtx_code c  = GET_CODE (x);
11405
11406   if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11407     return 0;
11408   if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11409     return 10;
11410
11411   if (c == PLUS)
11412     {
11413       if (CONST_INT_P (XEXP (x, 1)))
11414         return 2;
11415
11416       if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11417         return 3;
11418
11419       return 4;
11420     }
11421
11422   return 6;
11423 }
11424
11425 static inline int
11426 arm_thumb_address_cost (rtx x)
11427 {
11428   enum rtx_code c  = GET_CODE (x);
11429
11430   if (c == REG)
11431     return 1;
11432   if (c == PLUS
11433       && REG_P (XEXP (x, 0))
11434       && CONST_INT_P (XEXP (x, 1)))
11435     return 1;
11436
11437   return 2;
11438 }
11439
11440 static int
11441 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11442                   addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11443 {
11444   return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11445 }
11446
11447 /* Adjust cost hook for XScale.  */
11448 static bool
11449 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11450 {
11451   /* Some true dependencies can have a higher cost depending
11452      on precisely how certain input operands are used.  */
11453   if (REG_NOTE_KIND(link) == 0
11454       && recog_memoized (insn) >= 0
11455       && recog_memoized (dep) >= 0)
11456     {
11457       int shift_opnum = get_attr_shift (insn);
11458       enum attr_type attr_type = get_attr_type (dep);
11459
11460       /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11461          operand for INSN.  If we have a shifted input operand and the
11462          instruction we depend on is another ALU instruction, then we may
11463          have to account for an additional stall.  */
11464       if (shift_opnum != 0
11465           && (attr_type == TYPE_ALU_SHIFT_IMM
11466               || attr_type == TYPE_ALUS_SHIFT_IMM
11467               || attr_type == TYPE_LOGIC_SHIFT_IMM
11468               || attr_type == TYPE_LOGICS_SHIFT_IMM
11469               || attr_type == TYPE_ALU_SHIFT_REG
11470               || attr_type == TYPE_ALUS_SHIFT_REG
11471               || attr_type == TYPE_LOGIC_SHIFT_REG
11472               || attr_type == TYPE_LOGICS_SHIFT_REG
11473               || attr_type == TYPE_MOV_SHIFT
11474               || attr_type == TYPE_MVN_SHIFT
11475               || attr_type == TYPE_MOV_SHIFT_REG
11476               || attr_type == TYPE_MVN_SHIFT_REG))
11477         {
11478           rtx shifted_operand;
11479           int opno;
11480
11481           /* Get the shifted operand.  */
11482           extract_insn (insn);
11483           shifted_operand = recog_data.operand[shift_opnum];
11484
11485           /* Iterate over all the operands in DEP.  If we write an operand
11486              that overlaps with SHIFTED_OPERAND, then we have increase the
11487              cost of this dependency.  */
11488           extract_insn (dep);
11489           preprocess_constraints (dep);
11490           for (opno = 0; opno < recog_data.n_operands; opno++)
11491             {
11492               /* We can ignore strict inputs.  */
11493               if (recog_data.operand_type[opno] == OP_IN)
11494                 continue;
11495
11496               if (reg_overlap_mentioned_p (recog_data.operand[opno],
11497                                            shifted_operand))
11498                 {
11499                   *cost = 2;
11500                   return false;
11501                 }
11502             }
11503         }
11504     }
11505   return true;
11506 }
11507
11508 /* Adjust cost hook for Cortex A9.  */
11509 static bool
11510 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11511 {
11512   switch (REG_NOTE_KIND (link))
11513     {
11514     case REG_DEP_ANTI:
11515       *cost = 0;
11516       return false;
11517
11518     case REG_DEP_TRUE:
11519     case REG_DEP_OUTPUT:
11520         if (recog_memoized (insn) >= 0
11521             && recog_memoized (dep) >= 0)
11522           {
11523             if (GET_CODE (PATTERN (insn)) == SET)
11524               {
11525                 if (GET_MODE_CLASS
11526                     (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11527                   || GET_MODE_CLASS
11528                     (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11529                   {
11530                     enum attr_type attr_type_insn = get_attr_type (insn);
11531                     enum attr_type attr_type_dep = get_attr_type (dep);
11532
11533                     /* By default all dependencies of the form
11534                        s0 = s0 <op> s1
11535                        s0 = s0 <op> s2
11536                        have an extra latency of 1 cycle because
11537                        of the input and output dependency in this
11538                        case. However this gets modeled as an true
11539                        dependency and hence all these checks.  */
11540                     if (REG_P (SET_DEST (PATTERN (insn)))
11541                         && REG_P (SET_DEST (PATTERN (dep)))
11542                         && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11543                                                     SET_DEST (PATTERN (dep))))
11544                       {
11545                         /* FMACS is a special case where the dependent
11546                            instruction can be issued 3 cycles before
11547                            the normal latency in case of an output
11548                            dependency.  */
11549                         if ((attr_type_insn == TYPE_FMACS
11550                              || attr_type_insn == TYPE_FMACD)
11551                             && (attr_type_dep == TYPE_FMACS
11552                                 || attr_type_dep == TYPE_FMACD))
11553                           {
11554                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11555                               *cost = insn_default_latency (dep) - 3;
11556                             else
11557                               *cost = insn_default_latency (dep);
11558                             return false;
11559                           }
11560                         else
11561                           {
11562                             if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11563                               *cost = insn_default_latency (dep) + 1;
11564                             else
11565                               *cost = insn_default_latency (dep);
11566                           }
11567                         return false;
11568                       }
11569                   }
11570               }
11571           }
11572         break;
11573
11574     default:
11575       gcc_unreachable ();
11576     }
11577
11578   return true;
11579 }
11580
11581 /* Adjust cost hook for FA726TE.  */
11582 static bool
11583 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11584 {
11585   /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11586      have penalty of 3.  */
11587   if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11588       && recog_memoized (insn) >= 0
11589       && recog_memoized (dep) >= 0
11590       && get_attr_conds (dep) == CONDS_SET)
11591     {
11592       /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
11593       if (get_attr_conds (insn) == CONDS_USE
11594           && get_attr_type (insn) != TYPE_BRANCH)
11595         {
11596           *cost = 3;
11597           return false;
11598         }
11599
11600       if (GET_CODE (PATTERN (insn)) == COND_EXEC
11601           || get_attr_conds (insn) == CONDS_USE)
11602         {
11603           *cost = 0;
11604           return false;
11605         }
11606     }
11607
11608   return true;
11609 }
11610
11611 /* Implement TARGET_REGISTER_MOVE_COST.
11612
11613    Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11614    it is typically more expensive than a single memory access.  We set
11615    the cost to less than two memory accesses so that floating
11616    point to integer conversion does not go through memory.  */
11617
11618 int
11619 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11620                         reg_class_t from, reg_class_t to)
11621 {
11622   if (TARGET_32BIT)
11623     {
11624       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11625           || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11626         return 15;
11627       else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11628                || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11629         return 4;
11630       else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11631         return 20;
11632       else
11633         return 2;
11634     }
11635   else
11636     {
11637       if (from == HI_REGS || to == HI_REGS)
11638         return 4;
11639       else
11640         return 2;
11641     }
11642 }
11643
11644 /* Implement TARGET_MEMORY_MOVE_COST.  */
11645
11646 int
11647 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11648                       bool in ATTRIBUTE_UNUSED)
11649 {
11650   if (TARGET_32BIT)
11651     return 10;
11652   else
11653     {
11654       if (GET_MODE_SIZE (mode) < 4)
11655         return 8;
11656       else
11657         return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11658     }
11659 }
11660
11661 /* Vectorizer cost model implementation.  */
11662
11663 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
11664 static int
11665 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11666                                 tree vectype,
11667                                 int misalign ATTRIBUTE_UNUSED)
11668 {
11669   unsigned elements;
11670
11671   switch (type_of_cost)
11672     {
11673       case scalar_stmt:
11674         return current_tune->vec_costs->scalar_stmt_cost;
11675
11676       case scalar_load:
11677         return current_tune->vec_costs->scalar_load_cost;
11678
11679       case scalar_store:
11680         return current_tune->vec_costs->scalar_store_cost;
11681
11682       case vector_stmt:
11683         return current_tune->vec_costs->vec_stmt_cost;
11684
11685       case vector_load:
11686         return current_tune->vec_costs->vec_align_load_cost;
11687
11688       case vector_store:
11689         return current_tune->vec_costs->vec_store_cost;
11690
11691       case vec_to_scalar:
11692         return current_tune->vec_costs->vec_to_scalar_cost;
11693
11694       case scalar_to_vec:
11695         return current_tune->vec_costs->scalar_to_vec_cost;
11696
11697       case unaligned_load:
11698         return current_tune->vec_costs->vec_unalign_load_cost;
11699
11700       case unaligned_store:
11701         return current_tune->vec_costs->vec_unalign_store_cost;
11702
11703       case cond_branch_taken:
11704         return current_tune->vec_costs->cond_taken_branch_cost;
11705
11706       case cond_branch_not_taken:
11707         return current_tune->vec_costs->cond_not_taken_branch_cost;
11708
11709       case vec_perm:
11710       case vec_promote_demote:
11711         return current_tune->vec_costs->vec_stmt_cost;
11712
11713       case vec_construct:
11714         elements = TYPE_VECTOR_SUBPARTS (vectype);
11715         return elements / 2 + 1;
11716
11717       default:
11718         gcc_unreachable ();
11719     }
11720 }
11721
11722 /* Implement targetm.vectorize.add_stmt_cost.  */
11723
11724 static unsigned
11725 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11726                    struct _stmt_vec_info *stmt_info, int misalign,
11727                    enum vect_cost_model_location where)
11728 {
11729   unsigned *cost = (unsigned *) data;
11730   unsigned retval = 0;
11731
11732   if (flag_vect_cost_model)
11733     {
11734       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11735       int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11736
11737       /* Statements in an inner loop relative to the loop being
11738          vectorized are weighted more heavily.  The value here is
11739          arbitrary and could potentially be improved with analysis.  */
11740       if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11741         count *= 50;  /* FIXME.  */
11742
11743       retval = (unsigned) (count * stmt_cost);
11744       cost[where] += retval;
11745     }
11746
11747   return retval;
11748 }
11749
11750 /* Return true if and only if this insn can dual-issue only as older.  */
11751 static bool
11752 cortexa7_older_only (rtx_insn *insn)
11753 {
11754   if (recog_memoized (insn) < 0)
11755     return false;
11756
11757   switch (get_attr_type (insn))
11758     {
11759     case TYPE_ALU_DSP_REG:
11760     case TYPE_ALU_SREG:
11761     case TYPE_ALUS_SREG:
11762     case TYPE_LOGIC_REG:
11763     case TYPE_LOGICS_REG:
11764     case TYPE_ADC_REG:
11765     case TYPE_ADCS_REG:
11766     case TYPE_ADR:
11767     case TYPE_BFM:
11768     case TYPE_REV:
11769     case TYPE_MVN_REG:
11770     case TYPE_SHIFT_IMM:
11771     case TYPE_SHIFT_REG:
11772     case TYPE_LOAD_BYTE:
11773     case TYPE_LOAD1:
11774     case TYPE_STORE1:
11775     case TYPE_FFARITHS:
11776     case TYPE_FADDS:
11777     case TYPE_FFARITHD:
11778     case TYPE_FADDD:
11779     case TYPE_FMOV:
11780     case TYPE_F_CVT:
11781     case TYPE_FCMPS:
11782     case TYPE_FCMPD:
11783     case TYPE_FCONSTS:
11784     case TYPE_FCONSTD:
11785     case TYPE_FMULS:
11786     case TYPE_FMACS:
11787     case TYPE_FMULD:
11788     case TYPE_FMACD:
11789     case TYPE_FDIVS:
11790     case TYPE_FDIVD:
11791     case TYPE_F_MRC:
11792     case TYPE_F_MRRC:
11793     case TYPE_F_FLAG:
11794     case TYPE_F_LOADS:
11795     case TYPE_F_STORES:
11796       return true;
11797     default:
11798       return false;
11799     }
11800 }
11801
11802 /* Return true if and only if this insn can dual-issue as younger.  */
11803 static bool
11804 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11805 {
11806   if (recog_memoized (insn) < 0)
11807     {
11808       if (verbose > 5)
11809         fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11810       return false;
11811     }
11812
11813   switch (get_attr_type (insn))
11814     {
11815     case TYPE_ALU_IMM:
11816     case TYPE_ALUS_IMM:
11817     case TYPE_LOGIC_IMM:
11818     case TYPE_LOGICS_IMM:
11819     case TYPE_EXTEND:
11820     case TYPE_MVN_IMM:
11821     case TYPE_MOV_IMM:
11822     case TYPE_MOV_REG:
11823     case TYPE_MOV_SHIFT:
11824     case TYPE_MOV_SHIFT_REG:
11825     case TYPE_BRANCH:
11826     case TYPE_CALL:
11827       return true;
11828     default:
11829       return false;
11830     }
11831 }
11832
11833
11834 /* Look for an instruction that can dual issue only as an older
11835    instruction, and move it in front of any instructions that can
11836    dual-issue as younger, while preserving the relative order of all
11837    other instructions in the ready list.  This is a hueuristic to help
11838    dual-issue in later cycles, by postponing issue of more flexible
11839    instructions.  This heuristic may affect dual issue opportunities
11840    in the current cycle.  */
11841 static void
11842 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11843                         int *n_readyp, int clock)
11844 {
11845   int i;
11846   int first_older_only = -1, first_younger = -1;
11847
11848   if (verbose > 5)
11849     fprintf (file,
11850              ";; sched_reorder for cycle %d with %d insns in ready list\n",
11851              clock,
11852              *n_readyp);
11853
11854   /* Traverse the ready list from the head (the instruction to issue
11855      first), and looking for the first instruction that can issue as
11856      younger and the first instruction that can dual-issue only as
11857      older.  */
11858   for (i = *n_readyp - 1; i >= 0; i--)
11859     {
11860       rtx_insn *insn = ready[i];
11861       if (cortexa7_older_only (insn))
11862         {
11863           first_older_only = i;
11864           if (verbose > 5)
11865             fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11866           break;
11867         }
11868       else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11869         first_younger = i;
11870     }
11871
11872   /* Nothing to reorder because either no younger insn found or insn
11873      that can dual-issue only as older appears before any insn that
11874      can dual-issue as younger.  */
11875   if (first_younger == -1)
11876     {
11877       if (verbose > 5)
11878         fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11879       return;
11880     }
11881
11882   /* Nothing to reorder because no older-only insn in the ready list.  */
11883   if (first_older_only == -1)
11884     {
11885       if (verbose > 5)
11886         fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11887       return;
11888     }
11889
11890   /* Move first_older_only insn before first_younger.  */
11891   if (verbose > 5)
11892     fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11893              INSN_UID(ready [first_older_only]),
11894              INSN_UID(ready [first_younger]));
11895   rtx_insn *first_older_only_insn = ready [first_older_only];
11896   for (i = first_older_only; i < first_younger; i++)
11897     {
11898       ready[i] = ready[i+1];
11899     }
11900
11901   ready[i] = first_older_only_insn;
11902   return;
11903 }
11904
11905 /* Implement TARGET_SCHED_REORDER. */
11906 static int
11907 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11908                    int clock)
11909 {
11910   switch (arm_tune)
11911     {
11912     case cortexa7:
11913       cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11914       break;
11915     default:
11916       /* Do nothing for other cores.  */
11917       break;
11918     }
11919
11920   return arm_issue_rate ();
11921 }
11922
11923 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11924    It corrects the value of COST based on the relationship between
11925    INSN and DEP through the dependence LINK.  It returns the new
11926    value. There is a per-core adjust_cost hook to adjust scheduler costs
11927    and the per-core hook can choose to completely override the generic
11928    adjust_cost function. Only put bits of code into arm_adjust_cost that
11929    are common across all cores.  */
11930 static int
11931 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11932 {
11933   rtx i_pat, d_pat;
11934
11935  /* When generating Thumb-1 code, we want to place flag-setting operations
11936     close to a conditional branch which depends on them, so that we can
11937     omit the comparison. */
11938   if (TARGET_THUMB1
11939       && REG_NOTE_KIND (link) == 0
11940       && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11941       && recog_memoized (dep) >= 0
11942       && get_attr_conds (dep) == CONDS_SET)
11943     return 0;
11944
11945   if (current_tune->sched_adjust_cost != NULL)
11946     {
11947       if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11948         return cost;
11949     }
11950
11951   /* XXX Is this strictly true?  */
11952   if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11953       || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11954     return 0;
11955
11956   /* Call insns don't incur a stall, even if they follow a load.  */
11957   if (REG_NOTE_KIND (link) == 0
11958       && CALL_P (insn))
11959     return 1;
11960
11961   if ((i_pat = single_set (insn)) != NULL
11962       && MEM_P (SET_SRC (i_pat))
11963       && (d_pat = single_set (dep)) != NULL
11964       && MEM_P (SET_DEST (d_pat)))
11965     {
11966       rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11967       /* This is a load after a store, there is no conflict if the load reads
11968          from a cached area.  Assume that loads from the stack, and from the
11969          constant pool are cached, and that others will miss.  This is a
11970          hack.  */
11971
11972       if ((GET_CODE (src_mem) == SYMBOL_REF
11973            && CONSTANT_POOL_ADDRESS_P (src_mem))
11974           || reg_mentioned_p (stack_pointer_rtx, src_mem)
11975           || reg_mentioned_p (frame_pointer_rtx, src_mem)
11976           || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11977         return 1;
11978     }
11979
11980   return cost;
11981 }
11982
11983 int
11984 arm_max_conditional_execute (void)
11985 {
11986   return max_insns_skipped;
11987 }
11988
11989 static int
11990 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11991 {
11992   if (TARGET_32BIT)
11993     return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11994   else
11995     return (optimize > 0) ? 2 : 0;
11996 }
11997
11998 static int
11999 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
12000 {
12001   return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
12002 }
12003
12004 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
12005    on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
12006    sequences of non-executed instructions in IT blocks probably take the same
12007    amount of time as executed instructions (and the IT instruction itself takes
12008    space in icache).  This function was experimentally determined to give good
12009    results on a popular embedded benchmark.  */
12010
12011 static int
12012 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
12013 {
12014   return (TARGET_32BIT && speed_p) ? 1
12015          : arm_default_branch_cost (speed_p, predictable_p);
12016 }
12017
12018 static bool fp_consts_inited = false;
12019
12020 static REAL_VALUE_TYPE value_fp0;
12021
12022 static void
12023 init_fp_table (void)
12024 {
12025   REAL_VALUE_TYPE r;
12026
12027   r = REAL_VALUE_ATOF ("0", DFmode);
12028   value_fp0 = r;
12029   fp_consts_inited = true;
12030 }
12031
12032 /* Return TRUE if rtx X is a valid immediate FP constant.  */
12033 int
12034 arm_const_double_rtx (rtx x)
12035 {
12036   REAL_VALUE_TYPE r;
12037
12038   if (!fp_consts_inited)
12039     init_fp_table ();
12040
12041   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12042   if (REAL_VALUE_MINUS_ZERO (r))
12043     return 0;
12044
12045   if (REAL_VALUES_EQUAL (r, value_fp0))
12046     return 1;
12047
12048   return 0;
12049 }
12050
12051 /* VFPv3 has a fairly wide range of representable immediates, formed from
12052    "quarter-precision" floating-point values. These can be evaluated using this
12053    formula (with ^ for exponentiation):
12054
12055      -1^s * n * 2^-r
12056
12057    Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12058    16 <= n <= 31 and 0 <= r <= 7.
12059
12060    These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12061
12062      - A (most-significant) is the sign bit.
12063      - BCD are the exponent (encoded as r XOR 3).
12064      - EFGH are the mantissa (encoded as n - 16).
12065 */
12066
12067 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12068    fconst[sd] instruction, or -1 if X isn't suitable.  */
12069 static int
12070 vfp3_const_double_index (rtx x)
12071 {
12072   REAL_VALUE_TYPE r, m;
12073   int sign, exponent;
12074   unsigned HOST_WIDE_INT mantissa, mant_hi;
12075   unsigned HOST_WIDE_INT mask;
12076   int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12077   bool fail;
12078
12079   if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12080     return -1;
12081
12082   REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12083
12084   /* We can't represent these things, so detect them first.  */
12085   if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12086     return -1;
12087
12088   /* Extract sign, exponent and mantissa.  */
12089   sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12090   r = real_value_abs (&r);
12091   exponent = REAL_EXP (&r);
12092   /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12093      highest (sign) bit, with a fixed binary point at bit point_pos.
12094      WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12095      bits for the mantissa, this may fail (low bits would be lost).  */
12096   real_ldexp (&m, &r, point_pos - exponent);
12097   wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12098   mantissa = w.elt (0);
12099   mant_hi = w.elt (1);
12100
12101   /* If there are bits set in the low part of the mantissa, we can't
12102      represent this value.  */
12103   if (mantissa != 0)
12104     return -1;
12105
12106   /* Now make it so that mantissa contains the most-significant bits, and move
12107      the point_pos to indicate that the least-significant bits have been
12108      discarded.  */
12109   point_pos -= HOST_BITS_PER_WIDE_INT;
12110   mantissa = mant_hi;
12111
12112   /* We can permit four significant bits of mantissa only, plus a high bit
12113      which is always 1.  */
12114   mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12115   if ((mantissa & mask) != 0)
12116     return -1;
12117
12118   /* Now we know the mantissa is in range, chop off the unneeded bits.  */
12119   mantissa >>= point_pos - 5;
12120
12121   /* The mantissa may be zero. Disallow that case. (It's possible to load the
12122      floating-point immediate zero with Neon using an integer-zero load, but
12123      that case is handled elsewhere.)  */
12124   if (mantissa == 0)
12125     return -1;
12126
12127   gcc_assert (mantissa >= 16 && mantissa <= 31);
12128
12129   /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12130      normalized significands are in the range [1, 2). (Our mantissa is shifted
12131      left 4 places at this point relative to normalized IEEE754 values).  GCC
12132      internally uses [0.5, 1) (see real.c), so the exponent returned from
12133      REAL_EXP must be altered.  */
12134   exponent = 5 - exponent;
12135
12136   if (exponent < 0 || exponent > 7)
12137     return -1;
12138
12139   /* Sign, mantissa and exponent are now in the correct form to plug into the
12140      formula described in the comment above.  */
12141   return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12142 }
12143
12144 /* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
12145 int
12146 vfp3_const_double_rtx (rtx x)
12147 {
12148   if (!TARGET_VFP3)
12149     return 0;
12150
12151   return vfp3_const_double_index (x) != -1;
12152 }
12153
12154 /* Recognize immediates which can be used in various Neon instructions. Legal
12155    immediates are described by the following table (for VMVN variants, the
12156    bitwise inverse of the constant shown is recognized. In either case, VMOV
12157    is output and the correct instruction to use for a given constant is chosen
12158    by the assembler). The constant shown is replicated across all elements of
12159    the destination vector.
12160
12161    insn elems variant constant (binary)
12162    ---- ----- ------- -----------------
12163    vmov  i32     0    00000000 00000000 00000000 abcdefgh
12164    vmov  i32     1    00000000 00000000 abcdefgh 00000000
12165    vmov  i32     2    00000000 abcdefgh 00000000 00000000
12166    vmov  i32     3    abcdefgh 00000000 00000000 00000000
12167    vmov  i16     4    00000000 abcdefgh
12168    vmov  i16     5    abcdefgh 00000000
12169    vmvn  i32     6    00000000 00000000 00000000 abcdefgh
12170    vmvn  i32     7    00000000 00000000 abcdefgh 00000000
12171    vmvn  i32     8    00000000 abcdefgh 00000000 00000000
12172    vmvn  i32     9    abcdefgh 00000000 00000000 00000000
12173    vmvn  i16    10    00000000 abcdefgh
12174    vmvn  i16    11    abcdefgh 00000000
12175    vmov  i32    12    00000000 00000000 abcdefgh 11111111
12176    vmvn  i32    13    00000000 00000000 abcdefgh 11111111
12177    vmov  i32    14    00000000 abcdefgh 11111111 11111111
12178    vmvn  i32    15    00000000 abcdefgh 11111111 11111111
12179    vmov   i8    16    abcdefgh
12180    vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
12181                       eeeeeeee ffffffff gggggggg hhhhhhhh
12182    vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
12183    vmov  f32    19    00000000 00000000 00000000 00000000
12184
12185    For case 18, B = !b. Representable values are exactly those accepted by
12186    vfp3_const_double_index, but are output as floating-point numbers rather
12187    than indices.
12188
12189    For case 19, we will change it to vmov.i32 when assembling.
12190
12191    Variants 0-5 (inclusive) may also be used as immediates for the second
12192    operand of VORR/VBIC instructions.
12193
12194    The INVERSE argument causes the bitwise inverse of the given operand to be
12195    recognized instead (used for recognizing legal immediates for the VAND/VORN
12196    pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12197    *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12198    output, rather than the real insns vbic/vorr).
12199
12200    INVERSE makes no difference to the recognition of float vectors.
12201
12202    The return value is the variant of immediate as shown in the above table, or
12203    -1 if the given value doesn't match any of the listed patterns.
12204 */
12205 static int
12206 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12207                       rtx *modconst, int *elementwidth)
12208 {
12209 #define CHECK(STRIDE, ELSIZE, CLASS, TEST)      \
12210   matches = 1;                                  \
12211   for (i = 0; i < idx; i += (STRIDE))           \
12212     if (!(TEST))                                \
12213       matches = 0;                              \
12214   if (matches)                                  \
12215     {                                           \
12216       immtype = (CLASS);                        \
12217       elsize = (ELSIZE);                        \
12218       break;                                    \
12219     }
12220
12221   unsigned int i, elsize = 0, idx = 0, n_elts;
12222   unsigned int innersize;
12223   unsigned char bytes[16];
12224   int immtype = -1, matches;
12225   unsigned int invmask = inverse ? 0xff : 0;
12226   bool vector = GET_CODE (op) == CONST_VECTOR;
12227
12228   if (vector)
12229     {
12230       n_elts = CONST_VECTOR_NUNITS (op);
12231       innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12232     }
12233   else
12234     {
12235       n_elts = 1;
12236       if (mode == VOIDmode)
12237         mode = DImode;
12238       innersize = GET_MODE_SIZE (mode);
12239     }
12240
12241   /* Vectors of float constants.  */
12242   if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12243     {
12244       rtx el0 = CONST_VECTOR_ELT (op, 0);
12245       REAL_VALUE_TYPE r0;
12246
12247       if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12248         return -1;
12249
12250       REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12251
12252       for (i = 1; i < n_elts; i++)
12253         {
12254           rtx elt = CONST_VECTOR_ELT (op, i);
12255           REAL_VALUE_TYPE re;
12256
12257           REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12258
12259           if (!REAL_VALUES_EQUAL (r0, re))
12260             return -1;
12261         }
12262
12263       if (modconst)
12264         *modconst = CONST_VECTOR_ELT (op, 0);
12265
12266       if (elementwidth)
12267         *elementwidth = 0;
12268
12269       if (el0 == CONST0_RTX (GET_MODE (el0)))
12270         return 19;
12271       else
12272         return 18;
12273     }
12274
12275   /* Splat vector constant out into a byte vector.  */
12276   for (i = 0; i < n_elts; i++)
12277     {
12278       rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12279       unsigned HOST_WIDE_INT elpart;
12280       unsigned int part, parts;
12281
12282       if (CONST_INT_P (el))
12283         {
12284           elpart = INTVAL (el);
12285           parts = 1;
12286         }
12287       else if (CONST_DOUBLE_P (el))
12288         {
12289           elpart = CONST_DOUBLE_LOW (el);
12290           parts = 2;
12291         }
12292       else
12293         gcc_unreachable ();
12294
12295       for (part = 0; part < parts; part++)
12296         {
12297           unsigned int byte;
12298           for (byte = 0; byte < innersize; byte++)
12299             {
12300               bytes[idx++] = (elpart & 0xff) ^ invmask;
12301               elpart >>= BITS_PER_UNIT;
12302             }
12303           if (CONST_DOUBLE_P (el))
12304             elpart = CONST_DOUBLE_HIGH (el);
12305         }
12306     }
12307
12308   /* Sanity check.  */
12309   gcc_assert (idx == GET_MODE_SIZE (mode));
12310
12311   do
12312     {
12313       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12314                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12315
12316       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12317                        && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12318
12319       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12320                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12321
12322       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12323                        && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12324
12325       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12326
12327       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12328
12329       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12330                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12331
12332       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12333                        && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12334
12335       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12336                        && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12337
12338       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12339                        && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12340
12341       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12342
12343       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12344
12345       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12346                         && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12347
12348       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12349                         && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12350
12351       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12352                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12353
12354       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12355                         && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12356
12357       CHECK (1, 8, 16, bytes[i] == bytes[0]);
12358
12359       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12360                         && bytes[i] == bytes[(i + 8) % idx]);
12361     }
12362   while (0);
12363
12364   if (immtype == -1)
12365     return -1;
12366
12367   if (elementwidth)
12368     *elementwidth = elsize;
12369
12370   if (modconst)
12371     {
12372       unsigned HOST_WIDE_INT imm = 0;
12373
12374       /* Un-invert bytes of recognized vector, if necessary.  */
12375       if (invmask != 0)
12376         for (i = 0; i < idx; i++)
12377           bytes[i] ^= invmask;
12378
12379       if (immtype == 17)
12380         {
12381           /* FIXME: Broken on 32-bit H_W_I hosts.  */
12382           gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12383
12384           for (i = 0; i < 8; i++)
12385             imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12386                    << (i * BITS_PER_UNIT);
12387
12388           *modconst = GEN_INT (imm);
12389         }
12390       else
12391         {
12392           unsigned HOST_WIDE_INT imm = 0;
12393
12394           for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12395             imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12396
12397           *modconst = GEN_INT (imm);
12398         }
12399     }
12400
12401   return immtype;
12402 #undef CHECK
12403 }
12404
12405 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12406    VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12407    float elements), and a modified constant (whatever should be output for a
12408    VMOV) in *MODCONST.  */
12409
12410 int
12411 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12412                                rtx *modconst, int *elementwidth)
12413 {
12414   rtx tmpconst;
12415   int tmpwidth;
12416   int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12417
12418   if (retval == -1)
12419     return 0;
12420
12421   if (modconst)
12422     *modconst = tmpconst;
12423
12424   if (elementwidth)
12425     *elementwidth = tmpwidth;
12426
12427   return 1;
12428 }
12429
12430 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
12431    the immediate is valid, write a constant suitable for using as an operand
12432    to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12433    *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */
12434
12435 int
12436 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12437                                 rtx *modconst, int *elementwidth)
12438 {
12439   rtx tmpconst;
12440   int tmpwidth;
12441   int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12442
12443   if (retval < 0 || retval > 5)
12444     return 0;
12445
12446   if (modconst)
12447     *modconst = tmpconst;
12448
12449   if (elementwidth)
12450     *elementwidth = tmpwidth;
12451
12452   return 1;
12453 }
12454
12455 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
12456    the immediate is valid, write a constant suitable for using as an operand
12457    to VSHR/VSHL to *MODCONST and the corresponding element width to
12458    *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12459    because they have different limitations.  */
12460
12461 int
12462 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12463                                 rtx *modconst, int *elementwidth,
12464                                 bool isleftshift)
12465 {
12466   unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12467   unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12468   unsigned HOST_WIDE_INT last_elt = 0;
12469   unsigned HOST_WIDE_INT maxshift;
12470
12471   /* Split vector constant out into a byte vector.  */
12472   for (i = 0; i < n_elts; i++)
12473     {
12474       rtx el = CONST_VECTOR_ELT (op, i);
12475       unsigned HOST_WIDE_INT elpart;
12476
12477       if (CONST_INT_P (el))
12478         elpart = INTVAL (el);
12479       else if (CONST_DOUBLE_P (el))
12480         return 0;
12481       else
12482         gcc_unreachable ();
12483
12484       if (i != 0 && elpart != last_elt)
12485         return 0;
12486
12487       last_elt = elpart;
12488     }
12489
12490   /* Shift less than element size.  */
12491   maxshift = innersize * 8;
12492
12493   if (isleftshift)
12494     {
12495       /* Left shift immediate value can be from 0 to <size>-1.  */
12496       if (last_elt >= maxshift)
12497         return 0;
12498     }
12499   else
12500     {
12501       /* Right shift immediate value can be from 1 to <size>.  */
12502       if (last_elt == 0 || last_elt > maxshift)
12503         return 0;
12504     }
12505
12506   if (elementwidth)
12507     *elementwidth = innersize * 8;
12508
12509   if (modconst)
12510     *modconst = CONST_VECTOR_ELT (op, 0);
12511
12512   return 1;
12513 }
12514
12515 /* Return a string suitable for output of Neon immediate logic operation
12516    MNEM.  */
12517
12518 char *
12519 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12520                              int inverse, int quad)
12521 {
12522   int width, is_valid;
12523   static char templ[40];
12524
12525   is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12526
12527   gcc_assert (is_valid != 0);
12528
12529   if (quad)
12530     sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12531   else
12532     sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12533
12534   return templ;
12535 }
12536
12537 /* Return a string suitable for output of Neon immediate shift operation
12538    (VSHR or VSHL) MNEM.  */
12539
12540 char *
12541 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12542                              machine_mode mode, int quad,
12543                              bool isleftshift)
12544 {
12545   int width, is_valid;
12546   static char templ[40];
12547
12548   is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12549   gcc_assert (is_valid != 0);
12550
12551   if (quad)
12552     sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12553   else
12554     sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12555
12556   return templ;
12557 }
12558
12559 /* Output a sequence of pairwise operations to implement a reduction.
12560    NOTE: We do "too much work" here, because pairwise operations work on two
12561    registers-worth of operands in one go. Unfortunately we can't exploit those
12562    extra calculations to do the full operation in fewer steps, I don't think.
12563    Although all vector elements of the result but the first are ignored, we
12564    actually calculate the same result in each of the elements. An alternative
12565    such as initially loading a vector with zero to use as each of the second
12566    operands would use up an additional register and take an extra instruction,
12567    for no particular gain.  */
12568
12569 void
12570 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12571                       rtx (*reduc) (rtx, rtx, rtx))
12572 {
12573   machine_mode inner = GET_MODE_INNER (mode);
12574   unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12575   rtx tmpsum = op1;
12576
12577   for (i = parts / 2; i >= 1; i /= 2)
12578     {
12579       rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12580       emit_insn (reduc (dest, tmpsum, tmpsum));
12581       tmpsum = dest;
12582     }
12583 }
12584
12585 /* If VALS is a vector constant that can be loaded into a register
12586    using VDUP, generate instructions to do so and return an RTX to
12587    assign to the register.  Otherwise return NULL_RTX.  */
12588
12589 static rtx
12590 neon_vdup_constant (rtx vals)
12591 {
12592   machine_mode mode = GET_MODE (vals);
12593   machine_mode inner_mode = GET_MODE_INNER (mode);
12594   int n_elts = GET_MODE_NUNITS (mode);
12595   bool all_same = true;
12596   rtx x;
12597   int i;
12598
12599   if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12600     return NULL_RTX;
12601
12602   for (i = 0; i < n_elts; ++i)
12603     {
12604       x = XVECEXP (vals, 0, i);
12605       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12606         all_same = false;
12607     }
12608
12609   if (!all_same)
12610     /* The elements are not all the same.  We could handle repeating
12611        patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12612        {0, C, 0, C, 0, C, 0, C} which can be loaded using
12613        vdup.i16).  */
12614     return NULL_RTX;
12615
12616   /* We can load this constant by using VDUP and a constant in a
12617      single ARM register.  This will be cheaper than a vector
12618      load.  */
12619
12620   x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12621   return gen_rtx_VEC_DUPLICATE (mode, x);
12622 }
12623
12624 /* Generate code to load VALS, which is a PARALLEL containing only
12625    constants (for vec_init) or CONST_VECTOR, efficiently into a
12626    register.  Returns an RTX to copy into the register, or NULL_RTX
12627    for a PARALLEL that can not be converted into a CONST_VECTOR.  */
12628
12629 rtx
12630 neon_make_constant (rtx vals)
12631 {
12632   machine_mode mode = GET_MODE (vals);
12633   rtx target;
12634   rtx const_vec = NULL_RTX;
12635   int n_elts = GET_MODE_NUNITS (mode);
12636   int n_const = 0;
12637   int i;
12638
12639   if (GET_CODE (vals) == CONST_VECTOR)
12640     const_vec = vals;
12641   else if (GET_CODE (vals) == PARALLEL)
12642     {
12643       /* A CONST_VECTOR must contain only CONST_INTs and
12644          CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12645          Only store valid constants in a CONST_VECTOR.  */
12646       for (i = 0; i < n_elts; ++i)
12647         {
12648           rtx x = XVECEXP (vals, 0, i);
12649           if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12650             n_const++;
12651         }
12652       if (n_const == n_elts)
12653         const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12654     }
12655   else
12656     gcc_unreachable ();
12657
12658   if (const_vec != NULL
12659       && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12660     /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
12661     return const_vec;
12662   else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12663     /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
12664        pipeline cycle; creating the constant takes one or two ARM
12665        pipeline cycles.  */
12666     return target;
12667   else if (const_vec != NULL_RTX)
12668     /* Load from constant pool.  On Cortex-A8 this takes two cycles
12669        (for either double or quad vectors).  We can not take advantage
12670        of single-cycle VLD1 because we need a PC-relative addressing
12671        mode.  */
12672     return const_vec;
12673   else
12674     /* A PARALLEL containing something not valid inside CONST_VECTOR.
12675        We can not construct an initializer.  */
12676     return NULL_RTX;
12677 }
12678
12679 /* Initialize vector TARGET to VALS.  */
12680
12681 void
12682 neon_expand_vector_init (rtx target, rtx vals)
12683 {
12684   machine_mode mode = GET_MODE (target);
12685   machine_mode inner_mode = GET_MODE_INNER (mode);
12686   int n_elts = GET_MODE_NUNITS (mode);
12687   int n_var = 0, one_var = -1;
12688   bool all_same = true;
12689   rtx x, mem;
12690   int i;
12691
12692   for (i = 0; i < n_elts; ++i)
12693     {
12694       x = XVECEXP (vals, 0, i);
12695       if (!CONSTANT_P (x))
12696         ++n_var, one_var = i;
12697
12698       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12699         all_same = false;
12700     }
12701
12702   if (n_var == 0)
12703     {
12704       rtx constant = neon_make_constant (vals);
12705       if (constant != NULL_RTX)
12706         {
12707           emit_move_insn (target, constant);
12708           return;
12709         }
12710     }
12711
12712   /* Splat a single non-constant element if we can.  */
12713   if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12714     {
12715       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12716       emit_insn (gen_rtx_SET (VOIDmode, target,
12717                               gen_rtx_VEC_DUPLICATE (mode, x)));
12718       return;
12719     }
12720
12721   /* One field is non-constant.  Load constant then overwrite varying
12722      field.  This is more efficient than using the stack.  */
12723   if (n_var == 1)
12724     {
12725       rtx copy = copy_rtx (vals);
12726       rtx index = GEN_INT (one_var);
12727
12728       /* Load constant part of vector, substitute neighboring value for
12729          varying element.  */
12730       XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12731       neon_expand_vector_init (target, copy);
12732
12733       /* Insert variable.  */
12734       x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12735       switch (mode)
12736         {
12737         case V8QImode:
12738           emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12739           break;
12740         case V16QImode:
12741           emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12742           break;
12743         case V4HImode:
12744           emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12745           break;
12746         case V8HImode:
12747           emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12748           break;
12749         case V2SImode:
12750           emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12751           break;
12752         case V4SImode:
12753           emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12754           break;
12755         case V2SFmode:
12756           emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12757           break;
12758         case V4SFmode:
12759           emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12760           break;
12761         case V2DImode:
12762           emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12763           break;
12764         default:
12765           gcc_unreachable ();
12766         }
12767       return;
12768     }
12769
12770   /* Construct the vector in memory one field at a time
12771      and load the whole vector.  */
12772   mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12773   for (i = 0; i < n_elts; i++)
12774     emit_move_insn (adjust_address_nv (mem, inner_mode,
12775                                     i * GET_MODE_SIZE (inner_mode)),
12776                     XVECEXP (vals, 0, i));
12777   emit_move_insn (target, mem);
12778 }
12779
12780 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
12781    ERR if it doesn't.  FIXME: NEON bounds checks occur late in compilation, so
12782    reported source locations are bogus.  */
12783
12784 static void
12785 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12786               const char *err)
12787 {
12788   HOST_WIDE_INT lane;
12789
12790   gcc_assert (CONST_INT_P (operand));
12791
12792   lane = INTVAL (operand);
12793
12794   if (lane < low || lane >= high)
12795     error (err);
12796 }
12797
12798 /* Bounds-check lanes.  */
12799
12800 void
12801 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12802 {
12803   bounds_check (operand, low, high, "lane out of range");
12804 }
12805
12806 /* Bounds-check constants.  */
12807
12808 void
12809 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12810 {
12811   bounds_check (operand, low, high, "constant out of range");
12812 }
12813
12814 HOST_WIDE_INT
12815 neon_element_bits (machine_mode mode)
12816 {
12817   if (mode == DImode)
12818     return GET_MODE_BITSIZE (mode);
12819   else
12820     return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12821 }
12822
12823 \f
12824 /* Predicates for `match_operand' and `match_operator'.  */
12825
12826 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12827    WB is true if full writeback address modes are allowed and is false
12828    if limited writeback address modes (POST_INC and PRE_DEC) are
12829    allowed.  */
12830
12831 int
12832 arm_coproc_mem_operand (rtx op, bool wb)
12833 {
12834   rtx ind;
12835
12836   /* Reject eliminable registers.  */
12837   if (! (reload_in_progress || reload_completed || lra_in_progress)
12838       && (   reg_mentioned_p (frame_pointer_rtx, op)
12839           || reg_mentioned_p (arg_pointer_rtx, op)
12840           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12841           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12842           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12843           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12844     return FALSE;
12845
12846   /* Constants are converted into offsets from labels.  */
12847   if (!MEM_P (op))
12848     return FALSE;
12849
12850   ind = XEXP (op, 0);
12851
12852   if (reload_completed
12853       && (GET_CODE (ind) == LABEL_REF
12854           || (GET_CODE (ind) == CONST
12855               && GET_CODE (XEXP (ind, 0)) == PLUS
12856               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12857               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12858     return TRUE;
12859
12860   /* Match: (mem (reg)).  */
12861   if (REG_P (ind))
12862     return arm_address_register_rtx_p (ind, 0);
12863
12864   /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
12865      acceptable in any case (subject to verification by
12866      arm_address_register_rtx_p).  We need WB to be true to accept
12867      PRE_INC and POST_DEC.  */
12868   if (GET_CODE (ind) == POST_INC
12869       || GET_CODE (ind) == PRE_DEC
12870       || (wb
12871           && (GET_CODE (ind) == PRE_INC
12872               || GET_CODE (ind) == POST_DEC)))
12873     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12874
12875   if (wb
12876       && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12877       && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12878       && GET_CODE (XEXP (ind, 1)) == PLUS
12879       && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12880     ind = XEXP (ind, 1);
12881
12882   /* Match:
12883      (plus (reg)
12884            (const)).  */
12885   if (GET_CODE (ind) == PLUS
12886       && REG_P (XEXP (ind, 0))
12887       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12888       && CONST_INT_P (XEXP (ind, 1))
12889       && INTVAL (XEXP (ind, 1)) > -1024
12890       && INTVAL (XEXP (ind, 1)) <  1024
12891       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12892     return TRUE;
12893
12894   return FALSE;
12895 }
12896
12897 /* Return TRUE if OP is a memory operand which we can load or store a vector
12898    to/from. TYPE is one of the following values:
12899     0 - Vector load/stor (vldr)
12900     1 - Core registers (ldm)
12901     2 - Element/structure loads (vld1)
12902  */
12903 int
12904 neon_vector_mem_operand (rtx op, int type, bool strict)
12905 {
12906   rtx ind;
12907
12908   /* Reject eliminable registers.  */
12909   if (! (reload_in_progress || reload_completed)
12910       && (   reg_mentioned_p (frame_pointer_rtx, op)
12911           || reg_mentioned_p (arg_pointer_rtx, op)
12912           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12913           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12914           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12915           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12916     return !strict;
12917
12918   /* Constants are converted into offsets from labels.  */
12919   if (!MEM_P (op))
12920     return FALSE;
12921
12922   ind = XEXP (op, 0);
12923
12924   if (reload_completed
12925       && (GET_CODE (ind) == LABEL_REF
12926           || (GET_CODE (ind) == CONST
12927               && GET_CODE (XEXP (ind, 0)) == PLUS
12928               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12929               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12930     return TRUE;
12931
12932   /* Match: (mem (reg)).  */
12933   if (REG_P (ind))
12934     return arm_address_register_rtx_p (ind, 0);
12935
12936   /* Allow post-increment with Neon registers.  */
12937   if ((type != 1 && GET_CODE (ind) == POST_INC)
12938       || (type == 0 && GET_CODE (ind) == PRE_DEC))
12939     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12940
12941   /* Allow post-increment by register for VLDn */
12942   if (type == 2 && GET_CODE (ind) == POST_MODIFY
12943       && GET_CODE (XEXP (ind, 1)) == PLUS
12944       && REG_P (XEXP (XEXP (ind, 1), 1)))
12945      return true;
12946
12947   /* Match:
12948      (plus (reg)
12949           (const)).  */
12950   if (type == 0
12951       && GET_CODE (ind) == PLUS
12952       && REG_P (XEXP (ind, 0))
12953       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12954       && CONST_INT_P (XEXP (ind, 1))
12955       && INTVAL (XEXP (ind, 1)) > -1024
12956       /* For quad modes, we restrict the constant offset to be slightly less
12957          than what the instruction format permits.  We have no such constraint
12958          on double mode offsets.  (This must match arm_legitimate_index_p.)  */
12959       && (INTVAL (XEXP (ind, 1))
12960           < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12961       && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12962     return TRUE;
12963
12964   return FALSE;
12965 }
12966
12967 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12968    type.  */
12969 int
12970 neon_struct_mem_operand (rtx op)
12971 {
12972   rtx ind;
12973
12974   /* Reject eliminable registers.  */
12975   if (! (reload_in_progress || reload_completed)
12976       && (   reg_mentioned_p (frame_pointer_rtx, op)
12977           || reg_mentioned_p (arg_pointer_rtx, op)
12978           || reg_mentioned_p (virtual_incoming_args_rtx, op)
12979           || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12980           || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12981           || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12982     return FALSE;
12983
12984   /* Constants are converted into offsets from labels.  */
12985   if (!MEM_P (op))
12986     return FALSE;
12987
12988   ind = XEXP (op, 0);
12989
12990   if (reload_completed
12991       && (GET_CODE (ind) == LABEL_REF
12992           || (GET_CODE (ind) == CONST
12993               && GET_CODE (XEXP (ind, 0)) == PLUS
12994               && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12995               && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12996     return TRUE;
12997
12998   /* Match: (mem (reg)).  */
12999   if (REG_P (ind))
13000     return arm_address_register_rtx_p (ind, 0);
13001
13002   /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
13003   if (GET_CODE (ind) == POST_INC
13004       || GET_CODE (ind) == PRE_DEC)
13005     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
13006
13007   return FALSE;
13008 }
13009
13010 /* Return true if X is a register that will be eliminated later on.  */
13011 int
13012 arm_eliminable_register (rtx x)
13013 {
13014   return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
13015                        || REGNO (x) == ARG_POINTER_REGNUM
13016                        || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
13017                            && REGNO (x) <= LAST_VIRTUAL_REGISTER));
13018 }
13019
13020 /* Return GENERAL_REGS if a scratch register required to reload x to/from
13021    coprocessor registers.  Otherwise return NO_REGS.  */
13022
13023 enum reg_class
13024 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13025 {
13026   if (mode == HFmode)
13027     {
13028       if (!TARGET_NEON_FP16)
13029         return GENERAL_REGS;
13030       if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13031         return NO_REGS;
13032       return GENERAL_REGS;
13033     }
13034
13035   /* The neon move patterns handle all legitimate vector and struct
13036      addresses.  */
13037   if (TARGET_NEON
13038       && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13039       && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13040           || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13041           || VALID_NEON_STRUCT_MODE (mode)))
13042     return NO_REGS;
13043
13044   if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13045     return NO_REGS;
13046
13047   return GENERAL_REGS;
13048 }
13049
13050 /* Values which must be returned in the most-significant end of the return
13051    register.  */
13052
13053 static bool
13054 arm_return_in_msb (const_tree valtype)
13055 {
13056   return (TARGET_AAPCS_BASED
13057           && BYTES_BIG_ENDIAN
13058           && (AGGREGATE_TYPE_P (valtype)
13059               || TREE_CODE (valtype) == COMPLEX_TYPE
13060               || FIXED_POINT_TYPE_P (valtype)));
13061 }
13062
13063 /* Return TRUE if X references a SYMBOL_REF.  */
13064 int
13065 symbol_mentioned_p (rtx x)
13066 {
13067   const char * fmt;
13068   int i;
13069
13070   if (GET_CODE (x) == SYMBOL_REF)
13071     return 1;
13072
13073   /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13074      are constant offsets, not symbols.  */
13075   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13076     return 0;
13077
13078   fmt = GET_RTX_FORMAT (GET_CODE (x));
13079
13080   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13081     {
13082       if (fmt[i] == 'E')
13083         {
13084           int j;
13085
13086           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13087             if (symbol_mentioned_p (XVECEXP (x, i, j)))
13088               return 1;
13089         }
13090       else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13091         return 1;
13092     }
13093
13094   return 0;
13095 }
13096
13097 /* Return TRUE if X references a LABEL_REF.  */
13098 int
13099 label_mentioned_p (rtx x)
13100 {
13101   const char * fmt;
13102   int i;
13103
13104   if (GET_CODE (x) == LABEL_REF)
13105     return 1;
13106
13107   /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13108      instruction, but they are constant offsets, not symbols.  */
13109   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13110     return 0;
13111
13112   fmt = GET_RTX_FORMAT (GET_CODE (x));
13113   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13114     {
13115       if (fmt[i] == 'E')
13116         {
13117           int j;
13118
13119           for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13120             if (label_mentioned_p (XVECEXP (x, i, j)))
13121               return 1;
13122         }
13123       else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13124         return 1;
13125     }
13126
13127   return 0;
13128 }
13129
13130 int
13131 tls_mentioned_p (rtx x)
13132 {
13133   switch (GET_CODE (x))
13134     {
13135     case CONST:
13136       return tls_mentioned_p (XEXP (x, 0));
13137
13138     case UNSPEC:
13139       if (XINT (x, 1) == UNSPEC_TLS)
13140         return 1;
13141
13142     default:
13143       return 0;
13144     }
13145 }
13146
13147 /* Must not copy any rtx that uses a pc-relative address.  */
13148
13149 static bool
13150 arm_cannot_copy_insn_p (rtx_insn *insn)
13151 {
13152   /* The tls call insn cannot be copied, as it is paired with a data
13153      word.  */
13154   if (recog_memoized (insn) == CODE_FOR_tlscall)
13155     return true;
13156
13157   subrtx_iterator::array_type array;
13158   FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13159     {
13160       const_rtx x = *iter;
13161       if (GET_CODE (x) == UNSPEC
13162           && (XINT (x, 1) == UNSPEC_PIC_BASE
13163               || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13164         return true;
13165     }
13166   return false;
13167 }
13168
13169 enum rtx_code
13170 minmax_code (rtx x)
13171 {
13172   enum rtx_code code = GET_CODE (x);
13173
13174   switch (code)
13175     {
13176     case SMAX:
13177       return GE;
13178     case SMIN:
13179       return LE;
13180     case UMIN:
13181       return LEU;
13182     case UMAX:
13183       return GEU;
13184     default:
13185       gcc_unreachable ();
13186     }
13187 }
13188
13189 /* Match pair of min/max operators that can be implemented via usat/ssat.  */
13190
13191 bool
13192 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13193                         int *mask, bool *signed_sat)
13194 {
13195   /* The high bound must be a power of two minus one.  */
13196   int log = exact_log2 (INTVAL (hi_bound) + 1);
13197   if (log == -1)
13198     return false;
13199
13200   /* The low bound is either zero (for usat) or one less than the
13201      negation of the high bound (for ssat).  */
13202   if (INTVAL (lo_bound) == 0)
13203     {
13204       if (mask)
13205         *mask = log;
13206       if (signed_sat)
13207         *signed_sat = false;
13208
13209       return true;
13210     }
13211
13212   if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13213     {
13214       if (mask)
13215         *mask = log + 1;
13216       if (signed_sat)
13217         *signed_sat = true;
13218
13219       return true;
13220     }
13221
13222   return false;
13223 }
13224
13225 /* Return 1 if memory locations are adjacent.  */
13226 int
13227 adjacent_mem_locations (rtx a, rtx b)
13228 {
13229   /* We don't guarantee to preserve the order of these memory refs.  */
13230   if (volatile_refs_p (a) || volatile_refs_p (b))
13231     return 0;
13232
13233   if ((REG_P (XEXP (a, 0))
13234        || (GET_CODE (XEXP (a, 0)) == PLUS
13235            && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13236       && (REG_P (XEXP (b, 0))
13237           || (GET_CODE (XEXP (b, 0)) == PLUS
13238               && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13239     {
13240       HOST_WIDE_INT val0 = 0, val1 = 0;
13241       rtx reg0, reg1;
13242       int val_diff;
13243
13244       if (GET_CODE (XEXP (a, 0)) == PLUS)
13245         {
13246           reg0 = XEXP (XEXP (a, 0), 0);
13247           val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13248         }
13249       else
13250         reg0 = XEXP (a, 0);
13251
13252       if (GET_CODE (XEXP (b, 0)) == PLUS)
13253         {
13254           reg1 = XEXP (XEXP (b, 0), 0);
13255           val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13256         }
13257       else
13258         reg1 = XEXP (b, 0);
13259
13260       /* Don't accept any offset that will require multiple
13261          instructions to handle, since this would cause the
13262          arith_adjacentmem pattern to output an overlong sequence.  */
13263       if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13264         return 0;
13265
13266       /* Don't allow an eliminable register: register elimination can make
13267          the offset too large.  */
13268       if (arm_eliminable_register (reg0))
13269         return 0;
13270
13271       val_diff = val1 - val0;
13272
13273       if (arm_ld_sched)
13274         {
13275           /* If the target has load delay slots, then there's no benefit
13276              to using an ldm instruction unless the offset is zero and
13277              we are optimizing for size.  */
13278           return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13279                   && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13280                   && (val_diff == 4 || val_diff == -4));
13281         }
13282
13283       return ((REGNO (reg0) == REGNO (reg1))
13284               && (val_diff == 4 || val_diff == -4));
13285     }
13286
13287   return 0;
13288 }
13289
13290 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
13291    for load operations, false for store operations.  CONSECUTIVE is true
13292    if the register numbers in the operation must be consecutive in the register
13293    bank. RETURN_PC is true if value is to be loaded in PC.
13294    The pattern we are trying to match for load is:
13295      [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13296       (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13297        :
13298        :
13299       (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13300      ]
13301      where
13302      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13303      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13304      3.  If consecutive is TRUE, then for kth register being loaded,
13305          REGNO (R_dk) = REGNO (R_d0) + k.
13306    The pattern for store is similar.  */
13307 bool
13308 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13309                      bool consecutive, bool return_pc)
13310 {
13311   HOST_WIDE_INT count = XVECLEN (op, 0);
13312   rtx reg, mem, addr;
13313   unsigned regno;
13314   unsigned first_regno;
13315   HOST_WIDE_INT i = 1, base = 0, offset = 0;
13316   rtx elt;
13317   bool addr_reg_in_reglist = false;
13318   bool update = false;
13319   int reg_increment;
13320   int offset_adj;
13321   int regs_per_val;
13322
13323   /* If not in SImode, then registers must be consecutive
13324      (e.g., VLDM instructions for DFmode).  */
13325   gcc_assert ((mode == SImode) || consecutive);
13326   /* Setting return_pc for stores is illegal.  */
13327   gcc_assert (!return_pc || load);
13328
13329   /* Set up the increments and the regs per val based on the mode.  */
13330   reg_increment = GET_MODE_SIZE (mode);
13331   regs_per_val = reg_increment / 4;
13332   offset_adj = return_pc ? 1 : 0;
13333
13334   if (count <= 1
13335       || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13336       || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13337     return false;
13338
13339   /* Check if this is a write-back.  */
13340   elt = XVECEXP (op, 0, offset_adj);
13341   if (GET_CODE (SET_SRC (elt)) == PLUS)
13342     {
13343       i++;
13344       base = 1;
13345       update = true;
13346
13347       /* The offset adjustment must be the number of registers being
13348          popped times the size of a single register.  */
13349       if (!REG_P (SET_DEST (elt))
13350           || !REG_P (XEXP (SET_SRC (elt), 0))
13351           || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13352           || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13353           || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13354              ((count - 1 - offset_adj) * reg_increment))
13355         return false;
13356     }
13357
13358   i = i + offset_adj;
13359   base = base + offset_adj;
13360   /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13361      success depends on the type: VLDM can do just one reg,
13362      LDM must do at least two.  */
13363   if ((count <= i) && (mode == SImode))
13364       return false;
13365
13366   elt = XVECEXP (op, 0, i - 1);
13367   if (GET_CODE (elt) != SET)
13368     return false;
13369
13370   if (load)
13371     {
13372       reg = SET_DEST (elt);
13373       mem = SET_SRC (elt);
13374     }
13375   else
13376     {
13377       reg = SET_SRC (elt);
13378       mem = SET_DEST (elt);
13379     }
13380
13381   if (!REG_P (reg) || !MEM_P (mem))
13382     return false;
13383
13384   regno = REGNO (reg);
13385   first_regno = regno;
13386   addr = XEXP (mem, 0);
13387   if (GET_CODE (addr) == PLUS)
13388     {
13389       if (!CONST_INT_P (XEXP (addr, 1)))
13390         return false;
13391
13392       offset = INTVAL (XEXP (addr, 1));
13393       addr = XEXP (addr, 0);
13394     }
13395
13396   if (!REG_P (addr))
13397     return false;
13398
13399   /* Don't allow SP to be loaded unless it is also the base register. It
13400      guarantees that SP is reset correctly when an LDM instruction
13401      is interrupted. Otherwise, we might end up with a corrupt stack.  */
13402   if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13403     return false;
13404
13405   for (; i < count; i++)
13406     {
13407       elt = XVECEXP (op, 0, i);
13408       if (GET_CODE (elt) != SET)
13409         return false;
13410
13411       if (load)
13412         {
13413           reg = SET_DEST (elt);
13414           mem = SET_SRC (elt);
13415         }
13416       else
13417         {
13418           reg = SET_SRC (elt);
13419           mem = SET_DEST (elt);
13420         }
13421
13422       if (!REG_P (reg)
13423           || GET_MODE (reg) != mode
13424           || REGNO (reg) <= regno
13425           || (consecutive
13426               && (REGNO (reg) !=
13427                   (unsigned int) (first_regno + regs_per_val * (i - base))))
13428           /* Don't allow SP to be loaded unless it is also the base register. It
13429              guarantees that SP is reset correctly when an LDM instruction
13430              is interrupted. Otherwise, we might end up with a corrupt stack.  */
13431           || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13432           || !MEM_P (mem)
13433           || GET_MODE (mem) != mode
13434           || ((GET_CODE (XEXP (mem, 0)) != PLUS
13435                || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13436                || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13437                || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13438                    offset + (i - base) * reg_increment))
13439               && (!REG_P (XEXP (mem, 0))
13440                   || offset + (i - base) * reg_increment != 0)))
13441         return false;
13442
13443       regno = REGNO (reg);
13444       if (regno == REGNO (addr))
13445         addr_reg_in_reglist = true;
13446     }
13447
13448   if (load)
13449     {
13450       if (update && addr_reg_in_reglist)
13451         return false;
13452
13453       /* For Thumb-1, address register is always modified - either by write-back
13454          or by explicit load.  If the pattern does not describe an update,
13455          then the address register must be in the list of loaded registers.  */
13456       if (TARGET_THUMB1)
13457         return update || addr_reg_in_reglist;
13458     }
13459
13460   return true;
13461 }
13462
13463 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13464    or stores (depending on IS_STORE) into a load-multiple or store-multiple
13465    instruction.  ADD_OFFSET is nonzero if the base address register needs
13466    to be modified with an add instruction before we can use it.  */
13467
13468 static bool
13469 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13470                                  int nops, HOST_WIDE_INT add_offset)
13471  {
13472   /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13473      if the offset isn't small enough.  The reason 2 ldrs are faster
13474      is because these ARMs are able to do more than one cache access
13475      in a single cycle.  The ARM9 and StrongARM have Harvard caches,
13476      whilst the ARM8 has a double bandwidth cache.  This means that
13477      these cores can do both an instruction fetch and a data fetch in
13478      a single cycle, so the trick of calculating the address into a
13479      scratch register (one of the result regs) and then doing a load
13480      multiple actually becomes slower (and no smaller in code size).
13481      That is the transformation
13482
13483         ldr     rd1, [rbase + offset]
13484         ldr     rd2, [rbase + offset + 4]
13485
13486      to
13487
13488         add     rd1, rbase, offset
13489         ldmia   rd1, {rd1, rd2}
13490
13491      produces worse code -- '3 cycles + any stalls on rd2' instead of
13492      '2 cycles + any stalls on rd2'.  On ARMs with only one cache
13493      access per cycle, the first sequence could never complete in less
13494      than 6 cycles, whereas the ldm sequence would only take 5 and
13495      would make better use of sequential accesses if not hitting the
13496      cache.
13497
13498      We cheat here and test 'arm_ld_sched' which we currently know to
13499      only be true for the ARM8, ARM9 and StrongARM.  If this ever
13500      changes, then the test below needs to be reworked.  */
13501   if (nops == 2 && arm_ld_sched && add_offset != 0)
13502     return false;
13503
13504   /* XScale has load-store double instructions, but they have stricter
13505      alignment requirements than load-store multiple, so we cannot
13506      use them.
13507
13508      For XScale ldm requires 2 + NREGS cycles to complete and blocks
13509      the pipeline until completion.
13510
13511         NREGS           CYCLES
13512           1               3
13513           2               4
13514           3               5
13515           4               6
13516
13517      An ldr instruction takes 1-3 cycles, but does not block the
13518      pipeline.
13519
13520         NREGS           CYCLES
13521           1              1-3
13522           2              2-6
13523           3              3-9
13524           4              4-12
13525
13526      Best case ldr will always win.  However, the more ldr instructions
13527      we issue, the less likely we are to be able to schedule them well.
13528      Using ldr instructions also increases code size.
13529
13530      As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13531      for counts of 3 or 4 regs.  */
13532   if (nops <= 2 && arm_tune_xscale && !optimize_size)
13533     return false;
13534   return true;
13535 }
13536
13537 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13538    Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13539    an array ORDER which describes the sequence to use when accessing the
13540    offsets that produces an ascending order.  In this sequence, each
13541    offset must be larger by exactly 4 than the previous one.  ORDER[0]
13542    must have been filled in with the lowest offset by the caller.
13543    If UNSORTED_REGS is nonnull, it is an array of register numbers that
13544    we use to verify that ORDER produces an ascending order of registers.
13545    Return true if it was possible to construct such an order, false if
13546    not.  */
13547
13548 static bool
13549 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13550                       int *unsorted_regs)
13551 {
13552   int i;
13553   for (i = 1; i < nops; i++)
13554     {
13555       int j;
13556
13557       order[i] = order[i - 1];
13558       for (j = 0; j < nops; j++)
13559         if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13560           {
13561             /* We must find exactly one offset that is higher than the
13562                previous one by 4.  */
13563             if (order[i] != order[i - 1])
13564               return false;
13565             order[i] = j;
13566           }
13567       if (order[i] == order[i - 1])
13568         return false;
13569       /* The register numbers must be ascending.  */
13570       if (unsorted_regs != NULL
13571           && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13572         return false;
13573     }
13574   return true;
13575 }
13576
13577 /* Used to determine in a peephole whether a sequence of load
13578    instructions can be changed into a load-multiple instruction.
13579    NOPS is the number of separate load instructions we are examining.  The
13580    first NOPS entries in OPERANDS are the destination registers, the
13581    next NOPS entries are memory operands.  If this function is
13582    successful, *BASE is set to the common base register of the memory
13583    accesses; *LOAD_OFFSET is set to the first memory location's offset
13584    from that base register.
13585    REGS is an array filled in with the destination register numbers.
13586    SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13587    insn numbers to an ascending order of stores.  If CHECK_REGS is true,
13588    the sequence of registers in REGS matches the loads from ascending memory
13589    locations, and the function verifies that the register numbers are
13590    themselves ascending.  If CHECK_REGS is false, the register numbers
13591    are stored in the order they are found in the operands.  */
13592 static int
13593 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13594                         int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13595 {
13596   int unsorted_regs[MAX_LDM_STM_OPS];
13597   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13598   int order[MAX_LDM_STM_OPS];
13599   rtx base_reg_rtx = NULL;
13600   int base_reg = -1;
13601   int i, ldm_case;
13602
13603   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13604      easily extended if required.  */
13605   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13606
13607   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13608
13609   /* Loop over the operands and check that the memory references are
13610      suitable (i.e. immediate offsets from the same base register).  At
13611      the same time, extract the target register, and the memory
13612      offsets.  */
13613   for (i = 0; i < nops; i++)
13614     {
13615       rtx reg;
13616       rtx offset;
13617
13618       /* Convert a subreg of a mem into the mem itself.  */
13619       if (GET_CODE (operands[nops + i]) == SUBREG)
13620         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13621
13622       gcc_assert (MEM_P (operands[nops + i]));
13623
13624       /* Don't reorder volatile memory references; it doesn't seem worth
13625          looking for the case where the order is ok anyway.  */
13626       if (MEM_VOLATILE_P (operands[nops + i]))
13627         return 0;
13628
13629       offset = const0_rtx;
13630
13631       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13632            || (GET_CODE (reg) == SUBREG
13633                && REG_P (reg = SUBREG_REG (reg))))
13634           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13635               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13636                   || (GET_CODE (reg) == SUBREG
13637                       && REG_P (reg = SUBREG_REG (reg))))
13638               && (CONST_INT_P (offset
13639                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13640         {
13641           if (i == 0)
13642             {
13643               base_reg = REGNO (reg);
13644               base_reg_rtx = reg;
13645               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13646                 return 0;
13647             }
13648           else if (base_reg != (int) REGNO (reg))
13649             /* Not addressed from the same base register.  */
13650             return 0;
13651
13652           unsorted_regs[i] = (REG_P (operands[i])
13653                               ? REGNO (operands[i])
13654                               : REGNO (SUBREG_REG (operands[i])));
13655
13656           /* If it isn't an integer register, or if it overwrites the
13657              base register but isn't the last insn in the list, then
13658              we can't do this.  */
13659           if (unsorted_regs[i] < 0
13660               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13661               || unsorted_regs[i] > 14
13662               || (i != nops - 1 && unsorted_regs[i] == base_reg))
13663             return 0;
13664
13665           /* Don't allow SP to be loaded unless it is also the base
13666              register.  It guarantees that SP is reset correctly when
13667              an LDM instruction is interrupted.  Otherwise, we might
13668              end up with a corrupt stack.  */
13669           if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13670             return 0;
13671
13672           unsorted_offsets[i] = INTVAL (offset);
13673           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13674             order[0] = i;
13675         }
13676       else
13677         /* Not a suitable memory address.  */
13678         return 0;
13679     }
13680
13681   /* All the useful information has now been extracted from the
13682      operands into unsorted_regs and unsorted_offsets; additionally,
13683      order[0] has been set to the lowest offset in the list.  Sort
13684      the offsets into order, verifying that they are adjacent, and
13685      check that the register numbers are ascending.  */
13686   if (!compute_offset_order (nops, unsorted_offsets, order,
13687                              check_regs ? unsorted_regs : NULL))
13688     return 0;
13689
13690   if (saved_order)
13691     memcpy (saved_order, order, sizeof order);
13692
13693   if (base)
13694     {
13695       *base = base_reg;
13696
13697       for (i = 0; i < nops; i++)
13698         regs[i] = unsorted_regs[check_regs ? order[i] : i];
13699
13700       *load_offset = unsorted_offsets[order[0]];
13701     }
13702
13703   if (TARGET_THUMB1
13704       && !peep2_reg_dead_p (nops, base_reg_rtx))
13705     return 0;
13706
13707   if (unsorted_offsets[order[0]] == 0)
13708     ldm_case = 1; /* ldmia */
13709   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13710     ldm_case = 2; /* ldmib */
13711   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13712     ldm_case = 3; /* ldmda */
13713   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13714     ldm_case = 4; /* ldmdb */
13715   else if (const_ok_for_arm (unsorted_offsets[order[0]])
13716            || const_ok_for_arm (-unsorted_offsets[order[0]]))
13717     ldm_case = 5;
13718   else
13719     return 0;
13720
13721   if (!multiple_operation_profitable_p (false, nops,
13722                                         ldm_case == 5
13723                                         ? unsorted_offsets[order[0]] : 0))
13724     return 0;
13725
13726   return ldm_case;
13727 }
13728
13729 /* Used to determine in a peephole whether a sequence of store instructions can
13730    be changed into a store-multiple instruction.
13731    NOPS is the number of separate store instructions we are examining.
13732    NOPS_TOTAL is the total number of instructions recognized by the peephole
13733    pattern.
13734    The first NOPS entries in OPERANDS are the source registers, the next
13735    NOPS entries are memory operands.  If this function is successful, *BASE is
13736    set to the common base register of the memory accesses; *LOAD_OFFSET is set
13737    to the first memory location's offset from that base register.  REGS is an
13738    array filled in with the source register numbers, REG_RTXS (if nonnull) is
13739    likewise filled with the corresponding rtx's.
13740    SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13741    numbers to an ascending order of stores.
13742    If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13743    from ascending memory locations, and the function verifies that the register
13744    numbers are themselves ascending.  If CHECK_REGS is false, the register
13745    numbers are stored in the order they are found in the operands.  */
13746 static int
13747 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13748                          int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13749                          HOST_WIDE_INT *load_offset, bool check_regs)
13750 {
13751   int unsorted_regs[MAX_LDM_STM_OPS];
13752   rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13753   HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13754   int order[MAX_LDM_STM_OPS];
13755   int base_reg = -1;
13756   rtx base_reg_rtx = NULL;
13757   int i, stm_case;
13758
13759   /* Write back of base register is currently only supported for Thumb 1.  */
13760   int base_writeback = TARGET_THUMB1;
13761
13762   /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13763      easily extended if required.  */
13764   gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13765
13766   memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13767
13768   /* Loop over the operands and check that the memory references are
13769      suitable (i.e. immediate offsets from the same base register).  At
13770      the same time, extract the target register, and the memory
13771      offsets.  */
13772   for (i = 0; i < nops; i++)
13773     {
13774       rtx reg;
13775       rtx offset;
13776
13777       /* Convert a subreg of a mem into the mem itself.  */
13778       if (GET_CODE (operands[nops + i]) == SUBREG)
13779         operands[nops + i] = alter_subreg (operands + (nops + i), true);
13780
13781       gcc_assert (MEM_P (operands[nops + i]));
13782
13783       /* Don't reorder volatile memory references; it doesn't seem worth
13784          looking for the case where the order is ok anyway.  */
13785       if (MEM_VOLATILE_P (operands[nops + i]))
13786         return 0;
13787
13788       offset = const0_rtx;
13789
13790       if ((REG_P (reg = XEXP (operands[nops + i], 0))
13791            || (GET_CODE (reg) == SUBREG
13792                && REG_P (reg = SUBREG_REG (reg))))
13793           || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13794               && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13795                   || (GET_CODE (reg) == SUBREG
13796                       && REG_P (reg = SUBREG_REG (reg))))
13797               && (CONST_INT_P (offset
13798                   = XEXP (XEXP (operands[nops + i], 0), 1)))))
13799         {
13800           unsorted_reg_rtxs[i] = (REG_P (operands[i])
13801                                   ? operands[i] : SUBREG_REG (operands[i]));
13802           unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13803
13804           if (i == 0)
13805             {
13806               base_reg = REGNO (reg);
13807               base_reg_rtx = reg;
13808               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13809                 return 0;
13810             }
13811           else if (base_reg != (int) REGNO (reg))
13812             /* Not addressed from the same base register.  */
13813             return 0;
13814
13815           /* If it isn't an integer register, then we can't do this.  */
13816           if (unsorted_regs[i] < 0
13817               || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13818               /* The effects are unpredictable if the base register is
13819                  both updated and stored.  */
13820               || (base_writeback && unsorted_regs[i] == base_reg)
13821               || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13822               || unsorted_regs[i] > 14)
13823             return 0;
13824
13825           unsorted_offsets[i] = INTVAL (offset);
13826           if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13827             order[0] = i;
13828         }
13829       else
13830         /* Not a suitable memory address.  */
13831         return 0;
13832     }
13833
13834   /* All the useful information has now been extracted from the
13835      operands into unsorted_regs and unsorted_offsets; additionally,
13836      order[0] has been set to the lowest offset in the list.  Sort
13837      the offsets into order, verifying that they are adjacent, and
13838      check that the register numbers are ascending.  */
13839   if (!compute_offset_order (nops, unsorted_offsets, order,
13840                              check_regs ? unsorted_regs : NULL))
13841     return 0;
13842
13843   if (saved_order)
13844     memcpy (saved_order, order, sizeof order);
13845
13846   if (base)
13847     {
13848       *base = base_reg;
13849
13850       for (i = 0; i < nops; i++)
13851         {
13852           regs[i] = unsorted_regs[check_regs ? order[i] : i];
13853           if (reg_rtxs)
13854             reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13855         }
13856
13857       *load_offset = unsorted_offsets[order[0]];
13858     }
13859
13860   if (TARGET_THUMB1
13861       && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13862     return 0;
13863
13864   if (unsorted_offsets[order[0]] == 0)
13865     stm_case = 1; /* stmia */
13866   else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13867     stm_case = 2; /* stmib */
13868   else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13869     stm_case = 3; /* stmda */
13870   else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13871     stm_case = 4; /* stmdb */
13872   else
13873     return 0;
13874
13875   if (!multiple_operation_profitable_p (false, nops, 0))
13876     return 0;
13877
13878   return stm_case;
13879 }
13880 \f
13881 /* Routines for use in generating RTL.  */
13882
13883 /* Generate a load-multiple instruction.  COUNT is the number of loads in
13884    the instruction; REGS and MEMS are arrays containing the operands.
13885    BASEREG is the base register to be used in addressing the memory operands.
13886    WBACK_OFFSET is nonzero if the instruction should update the base
13887    register.  */
13888
13889 static rtx
13890 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13891                          HOST_WIDE_INT wback_offset)
13892 {
13893   int i = 0, j;
13894   rtx result;
13895
13896   if (!multiple_operation_profitable_p (false, count, 0))
13897     {
13898       rtx seq;
13899
13900       start_sequence ();
13901
13902       for (i = 0; i < count; i++)
13903         emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13904
13905       if (wback_offset != 0)
13906         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13907
13908       seq = get_insns ();
13909       end_sequence ();
13910
13911       return seq;
13912     }
13913
13914   result = gen_rtx_PARALLEL (VOIDmode,
13915                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13916   if (wback_offset != 0)
13917     {
13918       XVECEXP (result, 0, 0)
13919         = gen_rtx_SET (VOIDmode, basereg,
13920                        plus_constant (Pmode, basereg, wback_offset));
13921       i = 1;
13922       count++;
13923     }
13924
13925   for (j = 0; i < count; i++, j++)
13926     XVECEXP (result, 0, i)
13927       = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
13928
13929   return result;
13930 }
13931
13932 /* Generate a store-multiple instruction.  COUNT is the number of stores in
13933    the instruction; REGS and MEMS are arrays containing the operands.
13934    BASEREG is the base register to be used in addressing the memory operands.
13935    WBACK_OFFSET is nonzero if the instruction should update the base
13936    register.  */
13937
13938 static rtx
13939 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13940                           HOST_WIDE_INT wback_offset)
13941 {
13942   int i = 0, j;
13943   rtx result;
13944
13945   if (GET_CODE (basereg) == PLUS)
13946     basereg = XEXP (basereg, 0);
13947
13948   if (!multiple_operation_profitable_p (false, count, 0))
13949     {
13950       rtx seq;
13951
13952       start_sequence ();
13953
13954       for (i = 0; i < count; i++)
13955         emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13956
13957       if (wback_offset != 0)
13958         emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13959
13960       seq = get_insns ();
13961       end_sequence ();
13962
13963       return seq;
13964     }
13965
13966   result = gen_rtx_PARALLEL (VOIDmode,
13967                              rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13968   if (wback_offset != 0)
13969     {
13970       XVECEXP (result, 0, 0)
13971         = gen_rtx_SET (VOIDmode, basereg,
13972                        plus_constant (Pmode, basereg, wback_offset));
13973       i = 1;
13974       count++;
13975     }
13976
13977   for (j = 0; i < count; i++, j++)
13978     XVECEXP (result, 0, i)
13979       = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
13980
13981   return result;
13982 }
13983
13984 /* Generate either a load-multiple or a store-multiple instruction.  This
13985    function can be used in situations where we can start with a single MEM
13986    rtx and adjust its address upwards.
13987    COUNT is the number of operations in the instruction, not counting a
13988    possible update of the base register.  REGS is an array containing the
13989    register operands.
13990    BASEREG is the base register to be used in addressing the memory operands,
13991    which are constructed from BASEMEM.
13992    WRITE_BACK specifies whether the generated instruction should include an
13993    update of the base register.
13994    OFFSETP is used to pass an offset to and from this function; this offset
13995    is not used when constructing the address (instead BASEMEM should have an
13996    appropriate offset in its address), it is used only for setting
13997    MEM_OFFSET.  It is updated only if WRITE_BACK is true.*/
13998
13999 static rtx
14000 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
14001                      bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
14002 {
14003   rtx mems[MAX_LDM_STM_OPS];
14004   HOST_WIDE_INT offset = *offsetp;
14005   int i;
14006
14007   gcc_assert (count <= MAX_LDM_STM_OPS);
14008
14009   if (GET_CODE (basereg) == PLUS)
14010     basereg = XEXP (basereg, 0);
14011
14012   for (i = 0; i < count; i++)
14013     {
14014       rtx addr = plus_constant (Pmode, basereg, i * 4);
14015       mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
14016       offset += 4;
14017     }
14018
14019   if (write_back)
14020     *offsetp = offset;
14021
14022   if (is_load)
14023     return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14024                                     write_back ? 4 * count : 0);
14025   else
14026     return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14027                                      write_back ? 4 * count : 0);
14028 }
14029
14030 rtx
14031 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14032                        rtx basemem, HOST_WIDE_INT *offsetp)
14033 {
14034   return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14035                               offsetp);
14036 }
14037
14038 rtx
14039 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14040                         rtx basemem, HOST_WIDE_INT *offsetp)
14041 {
14042   return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14043                               offsetp);
14044 }
14045
14046 /* Called from a peephole2 expander to turn a sequence of loads into an
14047    LDM instruction.  OPERANDS are the operands found by the peephole matcher;
14048    NOPS indicates how many separate loads we are trying to combine.  SORT_REGS
14049    is true if we can reorder the registers because they are used commutatively
14050    subsequently.
14051    Returns true iff we could generate a new instruction.  */
14052
14053 bool
14054 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14055 {
14056   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14057   rtx mems[MAX_LDM_STM_OPS];
14058   int i, j, base_reg;
14059   rtx base_reg_rtx;
14060   HOST_WIDE_INT offset;
14061   int write_back = FALSE;
14062   int ldm_case;
14063   rtx addr;
14064
14065   ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14066                                      &base_reg, &offset, !sort_regs);
14067
14068   if (ldm_case == 0)
14069     return false;
14070
14071   if (sort_regs)
14072     for (i = 0; i < nops - 1; i++)
14073       for (j = i + 1; j < nops; j++)
14074         if (regs[i] > regs[j])
14075           {
14076             int t = regs[i];
14077             regs[i] = regs[j];
14078             regs[j] = t;
14079           }
14080   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14081
14082   if (TARGET_THUMB1)
14083     {
14084       gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14085       gcc_assert (ldm_case == 1 || ldm_case == 5);
14086       write_back = TRUE;
14087     }
14088
14089   if (ldm_case == 5)
14090     {
14091       rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14092       emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14093       offset = 0;
14094       if (!TARGET_THUMB1)
14095         {
14096           base_reg = regs[0];
14097           base_reg_rtx = newbase;
14098         }
14099     }
14100
14101   for (i = 0; i < nops; i++)
14102     {
14103       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14104       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14105                                               SImode, addr, 0);
14106     }
14107   emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14108                                       write_back ? offset + i * 4 : 0));
14109   return true;
14110 }
14111
14112 /* Called from a peephole2 expander to turn a sequence of stores into an
14113    STM instruction.  OPERANDS are the operands found by the peephole matcher;
14114    NOPS indicates how many separate stores we are trying to combine.
14115    Returns true iff we could generate a new instruction.  */
14116
14117 bool
14118 gen_stm_seq (rtx *operands, int nops)
14119 {
14120   int i;
14121   int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14122   rtx mems[MAX_LDM_STM_OPS];
14123   int base_reg;
14124   rtx base_reg_rtx;
14125   HOST_WIDE_INT offset;
14126   int write_back = FALSE;
14127   int stm_case;
14128   rtx addr;
14129   bool base_reg_dies;
14130
14131   stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14132                                       mem_order, &base_reg, &offset, true);
14133
14134   if (stm_case == 0)
14135     return false;
14136
14137   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14138
14139   base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14140   if (TARGET_THUMB1)
14141     {
14142       gcc_assert (base_reg_dies);
14143       write_back = TRUE;
14144     }
14145
14146   if (stm_case == 5)
14147     {
14148       gcc_assert (base_reg_dies);
14149       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14150       offset = 0;
14151     }
14152
14153   addr = plus_constant (Pmode, base_reg_rtx, offset);
14154
14155   for (i = 0; i < nops; i++)
14156     {
14157       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14158       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14159                                               SImode, addr, 0);
14160     }
14161   emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14162                                        write_back ? offset + i * 4 : 0));
14163   return true;
14164 }
14165
14166 /* Called from a peephole2 expander to turn a sequence of stores that are
14167    preceded by constant loads into an STM instruction.  OPERANDS are the
14168    operands found by the peephole matcher; NOPS indicates how many
14169    separate stores we are trying to combine; there are 2 * NOPS
14170    instructions in the peephole.
14171    Returns true iff we could generate a new instruction.  */
14172
14173 bool
14174 gen_const_stm_seq (rtx *operands, int nops)
14175 {
14176   int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14177   int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14178   rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14179   rtx mems[MAX_LDM_STM_OPS];
14180   int base_reg;
14181   rtx base_reg_rtx;
14182   HOST_WIDE_INT offset;
14183   int write_back = FALSE;
14184   int stm_case;
14185   rtx addr;
14186   bool base_reg_dies;
14187   int i, j;
14188   HARD_REG_SET allocated;
14189
14190   stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14191                                       mem_order, &base_reg, &offset, false);
14192
14193   if (stm_case == 0)
14194     return false;
14195
14196   memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14197
14198   /* If the same register is used more than once, try to find a free
14199      register.  */
14200   CLEAR_HARD_REG_SET (allocated);
14201   for (i = 0; i < nops; i++)
14202     {
14203       for (j = i + 1; j < nops; j++)
14204         if (regs[i] == regs[j])
14205           {
14206             rtx t = peep2_find_free_register (0, nops * 2,
14207                                               TARGET_THUMB1 ? "l" : "r",
14208                                               SImode, &allocated);
14209             if (t == NULL_RTX)
14210               return false;
14211             reg_rtxs[i] = t;
14212             regs[i] = REGNO (t);
14213           }
14214     }
14215
14216   /* Compute an ordering that maps the register numbers to an ascending
14217      sequence.  */
14218   reg_order[0] = 0;
14219   for (i = 0; i < nops; i++)
14220     if (regs[i] < regs[reg_order[0]])
14221       reg_order[0] = i;
14222
14223   for (i = 1; i < nops; i++)
14224     {
14225       int this_order = reg_order[i - 1];
14226       for (j = 0; j < nops; j++)
14227         if (regs[j] > regs[reg_order[i - 1]]
14228             && (this_order == reg_order[i - 1]
14229                 || regs[j] < regs[this_order]))
14230           this_order = j;
14231       reg_order[i] = this_order;
14232     }
14233
14234   /* Ensure that registers that must be live after the instruction end
14235      up with the correct value.  */
14236   for (i = 0; i < nops; i++)
14237     {
14238       int this_order = reg_order[i];
14239       if ((this_order != mem_order[i]
14240            || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14241           && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14242         return false;
14243     }
14244
14245   /* Load the constants.  */
14246   for (i = 0; i < nops; i++)
14247     {
14248       rtx op = operands[2 * nops + mem_order[i]];
14249       sorted_regs[i] = regs[reg_order[i]];
14250       emit_move_insn (reg_rtxs[reg_order[i]], op);
14251     }
14252
14253   base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14254
14255   base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14256   if (TARGET_THUMB1)
14257     {
14258       gcc_assert (base_reg_dies);
14259       write_back = TRUE;
14260     }
14261
14262   if (stm_case == 5)
14263     {
14264       gcc_assert (base_reg_dies);
14265       emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14266       offset = 0;
14267     }
14268
14269   addr = plus_constant (Pmode, base_reg_rtx, offset);
14270
14271   for (i = 0; i < nops; i++)
14272     {
14273       addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14274       mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14275                                               SImode, addr, 0);
14276     }
14277   emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14278                                        write_back ? offset + i * 4 : 0));
14279   return true;
14280 }
14281
14282 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14283    unaligned copies on processors which support unaligned semantics for those
14284    instructions.  INTERLEAVE_FACTOR can be used to attempt to hide load latency
14285    (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14286    An interleave factor of 1 (the minimum) will perform no interleaving.
14287    Load/store multiple are used for aligned addresses where possible.  */
14288
14289 static void
14290 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14291                                    HOST_WIDE_INT length,
14292                                    unsigned int interleave_factor)
14293 {
14294   rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14295   int *regnos = XALLOCAVEC (int, interleave_factor);
14296   HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14297   HOST_WIDE_INT i, j;
14298   HOST_WIDE_INT remaining = length, words;
14299   rtx halfword_tmp = NULL, byte_tmp = NULL;
14300   rtx dst, src;
14301   bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14302   bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14303   HOST_WIDE_INT srcoffset, dstoffset;
14304   HOST_WIDE_INT src_autoinc, dst_autoinc;
14305   rtx mem, addr;
14306
14307   gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14308
14309   /* Use hard registers if we have aligned source or destination so we can use
14310      load/store multiple with contiguous registers.  */
14311   if (dst_aligned || src_aligned)
14312     for (i = 0; i < interleave_factor; i++)
14313       regs[i] = gen_rtx_REG (SImode, i);
14314   else
14315     for (i = 0; i < interleave_factor; i++)
14316       regs[i] = gen_reg_rtx (SImode);
14317
14318   dst = copy_addr_to_reg (XEXP (dstbase, 0));
14319   src = copy_addr_to_reg (XEXP (srcbase, 0));
14320
14321   srcoffset = dstoffset = 0;
14322
14323   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14324      For copying the last bytes we want to subtract this offset again.  */
14325   src_autoinc = dst_autoinc = 0;
14326
14327   for (i = 0; i < interleave_factor; i++)
14328     regnos[i] = i;
14329
14330   /* Copy BLOCK_SIZE_BYTES chunks.  */
14331
14332   for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14333     {
14334       /* Load words.  */
14335       if (src_aligned && interleave_factor > 1)
14336         {
14337           emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14338                                             TRUE, srcbase, &srcoffset));
14339           src_autoinc += UNITS_PER_WORD * interleave_factor;
14340         }
14341       else
14342         {
14343           for (j = 0; j < interleave_factor; j++)
14344             {
14345               addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14346                                                  - src_autoinc));
14347               mem = adjust_automodify_address (srcbase, SImode, addr,
14348                                                srcoffset + j * UNITS_PER_WORD);
14349               emit_insn (gen_unaligned_loadsi (regs[j], mem));
14350             }
14351           srcoffset += block_size_bytes;
14352         }
14353
14354       /* Store words.  */
14355       if (dst_aligned && interleave_factor > 1)
14356         {
14357           emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14358                                              TRUE, dstbase, &dstoffset));
14359           dst_autoinc += UNITS_PER_WORD * interleave_factor;
14360         }
14361       else
14362         {
14363           for (j = 0; j < interleave_factor; j++)
14364             {
14365               addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14366                                                  - dst_autoinc));
14367               mem = adjust_automodify_address (dstbase, SImode, addr,
14368                                                dstoffset + j * UNITS_PER_WORD);
14369               emit_insn (gen_unaligned_storesi (mem, regs[j]));
14370             }
14371           dstoffset += block_size_bytes;
14372         }
14373
14374       remaining -= block_size_bytes;
14375     }
14376
14377   /* Copy any whole words left (note these aren't interleaved with any
14378      subsequent halfword/byte load/stores in the interests of simplicity).  */
14379
14380   words = remaining / UNITS_PER_WORD;
14381
14382   gcc_assert (words < interleave_factor);
14383
14384   if (src_aligned && words > 1)
14385     {
14386       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14387                                         &srcoffset));
14388       src_autoinc += UNITS_PER_WORD * words;
14389     }
14390   else
14391     {
14392       for (j = 0; j < words; j++)
14393         {
14394           addr = plus_constant (Pmode, src,
14395                                 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14396           mem = adjust_automodify_address (srcbase, SImode, addr,
14397                                            srcoffset + j * UNITS_PER_WORD);
14398           emit_insn (gen_unaligned_loadsi (regs[j], mem));
14399         }
14400       srcoffset += words * UNITS_PER_WORD;
14401     }
14402
14403   if (dst_aligned && words > 1)
14404     {
14405       emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14406                                          &dstoffset));
14407       dst_autoinc += words * UNITS_PER_WORD;
14408     }
14409   else
14410     {
14411       for (j = 0; j < words; j++)
14412         {
14413           addr = plus_constant (Pmode, dst,
14414                                 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14415           mem = adjust_automodify_address (dstbase, SImode, addr,
14416                                            dstoffset + j * UNITS_PER_WORD);
14417           emit_insn (gen_unaligned_storesi (mem, regs[j]));
14418         }
14419       dstoffset += words * UNITS_PER_WORD;
14420     }
14421
14422   remaining -= words * UNITS_PER_WORD;
14423
14424   gcc_assert (remaining < 4);
14425
14426   /* Copy a halfword if necessary.  */
14427
14428   if (remaining >= 2)
14429     {
14430       halfword_tmp = gen_reg_rtx (SImode);
14431
14432       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14433       mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14434       emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14435
14436       /* Either write out immediately, or delay until we've loaded the last
14437          byte, depending on interleave factor.  */
14438       if (interleave_factor == 1)
14439         {
14440           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14441           mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14442           emit_insn (gen_unaligned_storehi (mem,
14443                        gen_lowpart (HImode, halfword_tmp)));
14444           halfword_tmp = NULL;
14445           dstoffset += 2;
14446         }
14447
14448       remaining -= 2;
14449       srcoffset += 2;
14450     }
14451
14452   gcc_assert (remaining < 2);
14453
14454   /* Copy last byte.  */
14455
14456   if ((remaining & 1) != 0)
14457     {
14458       byte_tmp = gen_reg_rtx (SImode);
14459
14460       addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14461       mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14462       emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14463
14464       if (interleave_factor == 1)
14465         {
14466           addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14467           mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14468           emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14469           byte_tmp = NULL;
14470           dstoffset++;
14471         }
14472
14473       remaining--;
14474       srcoffset++;
14475     }
14476
14477   /* Store last halfword if we haven't done so already.  */
14478
14479   if (halfword_tmp)
14480     {
14481       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14482       mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14483       emit_insn (gen_unaligned_storehi (mem,
14484                    gen_lowpart (HImode, halfword_tmp)));
14485       dstoffset += 2;
14486     }
14487
14488   /* Likewise for last byte.  */
14489
14490   if (byte_tmp)
14491     {
14492       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14493       mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14494       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14495       dstoffset++;
14496     }
14497
14498   gcc_assert (remaining == 0 && srcoffset == dstoffset);
14499 }
14500
14501 /* From mips_adjust_block_mem:
14502
14503    Helper function for doing a loop-based block operation on memory
14504    reference MEM.  Each iteration of the loop will operate on LENGTH
14505    bytes of MEM.
14506
14507    Create a new base register for use within the loop and point it to
14508    the start of MEM.  Create a new memory reference that uses this
14509    register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
14510
14511 static void
14512 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14513                       rtx *loop_mem)
14514 {
14515   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14516
14517   /* Although the new mem does not refer to a known location,
14518      it does keep up to LENGTH bytes of alignment.  */
14519   *loop_mem = change_address (mem, BLKmode, *loop_reg);
14520   set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14521 }
14522
14523 /* From mips_block_move_loop:
14524
14525    Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14526    bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
14527    the memory regions do not overlap.  */
14528
14529 static void
14530 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14531                                unsigned int interleave_factor,
14532                                HOST_WIDE_INT bytes_per_iter)
14533 {
14534   rtx src_reg, dest_reg, final_src, test;
14535   HOST_WIDE_INT leftover;
14536
14537   leftover = length % bytes_per_iter;
14538   length -= leftover;
14539
14540   /* Create registers and memory references for use within the loop.  */
14541   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14542   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14543
14544   /* Calculate the value that SRC_REG should have after the last iteration of
14545      the loop.  */
14546   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14547                                    0, 0, OPTAB_WIDEN);
14548
14549   /* Emit the start of the loop.  */
14550   rtx_code_label *label = gen_label_rtx ();
14551   emit_label (label);
14552
14553   /* Emit the loop body.  */
14554   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14555                                      interleave_factor);
14556
14557   /* Move on to the next block.  */
14558   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14559   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14560
14561   /* Emit the loop condition.  */
14562   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14563   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14564
14565   /* Mop up any left-over bytes.  */
14566   if (leftover)
14567     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14568 }
14569
14570 /* Emit a block move when either the source or destination is unaligned (not
14571    aligned to a four-byte boundary).  This may need further tuning depending on
14572    core type, optimize_size setting, etc.  */
14573
14574 static int
14575 arm_movmemqi_unaligned (rtx *operands)
14576 {
14577   HOST_WIDE_INT length = INTVAL (operands[2]);
14578
14579   if (optimize_size)
14580     {
14581       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14582       bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14583       /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14584          size of code if optimizing for size.  We'll use ldm/stm if src_aligned
14585          or dst_aligned though: allow more interleaving in those cases since the
14586          resulting code can be smaller.  */
14587       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14588       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14589
14590       if (length > 12)
14591         arm_block_move_unaligned_loop (operands[0], operands[1], length,
14592                                        interleave_factor, bytes_per_iter);
14593       else
14594         arm_block_move_unaligned_straight (operands[0], operands[1], length,
14595                                            interleave_factor);
14596     }
14597   else
14598     {
14599       /* Note that the loop created by arm_block_move_unaligned_loop may be
14600          subject to loop unrolling, which makes tuning this condition a little
14601          redundant.  */
14602       if (length > 32)
14603         arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14604       else
14605         arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14606     }
14607
14608   return 1;
14609 }
14610
14611 int
14612 arm_gen_movmemqi (rtx *operands)
14613 {
14614   HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14615   HOST_WIDE_INT srcoffset, dstoffset;
14616   int i;
14617   rtx src, dst, srcbase, dstbase;
14618   rtx part_bytes_reg = NULL;
14619   rtx mem;
14620
14621   if (!CONST_INT_P (operands[2])
14622       || !CONST_INT_P (operands[3])
14623       || INTVAL (operands[2]) > 64)
14624     return 0;
14625
14626   if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14627     return arm_movmemqi_unaligned (operands);
14628
14629   if (INTVAL (operands[3]) & 3)
14630     return 0;
14631
14632   dstbase = operands[0];
14633   srcbase = operands[1];
14634
14635   dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14636   src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14637
14638   in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14639   out_words_to_go = INTVAL (operands[2]) / 4;
14640   last_bytes = INTVAL (operands[2]) & 3;
14641   dstoffset = srcoffset = 0;
14642
14643   if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14644     part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14645
14646   for (i = 0; in_words_to_go >= 2; i+=4)
14647     {
14648       if (in_words_to_go > 4)
14649         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14650                                           TRUE, srcbase, &srcoffset));
14651       else
14652         emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14653                                           src, FALSE, srcbase,
14654                                           &srcoffset));
14655
14656       if (out_words_to_go)
14657         {
14658           if (out_words_to_go > 4)
14659             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14660                                                TRUE, dstbase, &dstoffset));
14661           else if (out_words_to_go != 1)
14662             emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14663                                                out_words_to_go, dst,
14664                                                (last_bytes == 0
14665                                                 ? FALSE : TRUE),
14666                                                dstbase, &dstoffset));
14667           else
14668             {
14669               mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14670               emit_move_insn (mem, gen_rtx_REG (SImode, 0));
14671               if (last_bytes != 0)
14672                 {
14673                   emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14674                   dstoffset += 4;
14675                 }
14676             }
14677         }
14678
14679       in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14680       out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14681     }
14682
14683   /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do.  */
14684   if (out_words_to_go)
14685     {
14686       rtx sreg;
14687
14688       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14689       sreg = copy_to_reg (mem);
14690
14691       mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14692       emit_move_insn (mem, sreg);
14693       in_words_to_go--;
14694
14695       gcc_assert (!in_words_to_go);     /* Sanity check */
14696     }
14697
14698   if (in_words_to_go)
14699     {
14700       gcc_assert (in_words_to_go > 0);
14701
14702       mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14703       part_bytes_reg = copy_to_mode_reg (SImode, mem);
14704     }
14705
14706   gcc_assert (!last_bytes || part_bytes_reg);
14707
14708   if (BYTES_BIG_ENDIAN && last_bytes)
14709     {
14710       rtx tmp = gen_reg_rtx (SImode);
14711
14712       /* The bytes we want are in the top end of the word.  */
14713       emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14714                               GEN_INT (8 * (4 - last_bytes))));
14715       part_bytes_reg = tmp;
14716
14717       while (last_bytes)
14718         {
14719           mem = adjust_automodify_address (dstbase, QImode,
14720                                            plus_constant (Pmode, dst,
14721                                                           last_bytes - 1),
14722                                            dstoffset + last_bytes - 1);
14723           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14724
14725           if (--last_bytes)
14726             {
14727               tmp = gen_reg_rtx (SImode);
14728               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14729               part_bytes_reg = tmp;
14730             }
14731         }
14732
14733     }
14734   else
14735     {
14736       if (last_bytes > 1)
14737         {
14738           mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14739           emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14740           last_bytes -= 2;
14741           if (last_bytes)
14742             {
14743               rtx tmp = gen_reg_rtx (SImode);
14744               emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14745               emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14746               part_bytes_reg = tmp;
14747               dstoffset += 2;
14748             }
14749         }
14750
14751       if (last_bytes)
14752         {
14753           mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14754           emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14755         }
14756     }
14757
14758   return 1;
14759 }
14760
14761 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14762 by mode size.  */
14763 inline static rtx
14764 next_consecutive_mem (rtx mem)
14765 {
14766   machine_mode mode = GET_MODE (mem);
14767   HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14768   rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14769
14770   return adjust_automodify_address (mem, mode, addr, offset);
14771 }
14772
14773 /* Copy using LDRD/STRD instructions whenever possible.
14774    Returns true upon success. */
14775 bool
14776 gen_movmem_ldrd_strd (rtx *operands)
14777 {
14778   unsigned HOST_WIDE_INT len;
14779   HOST_WIDE_INT align;
14780   rtx src, dst, base;
14781   rtx reg0;
14782   bool src_aligned, dst_aligned;
14783   bool src_volatile, dst_volatile;
14784
14785   gcc_assert (CONST_INT_P (operands[2]));
14786   gcc_assert (CONST_INT_P (operands[3]));
14787
14788   len = UINTVAL (operands[2]);
14789   if (len > 64)
14790     return false;
14791
14792   /* Maximum alignment we can assume for both src and dst buffers.  */
14793   align = INTVAL (operands[3]);
14794
14795   if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14796     return false;
14797
14798   /* Place src and dst addresses in registers
14799      and update the corresponding mem rtx.  */
14800   dst = operands[0];
14801   dst_volatile = MEM_VOLATILE_P (dst);
14802   dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14803   base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14804   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14805
14806   src = operands[1];
14807   src_volatile = MEM_VOLATILE_P (src);
14808   src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14809   base = copy_to_mode_reg (SImode, XEXP (src, 0));
14810   src = adjust_automodify_address (src, VOIDmode, base, 0);
14811
14812   if (!unaligned_access && !(src_aligned && dst_aligned))
14813     return false;
14814
14815   if (src_volatile || dst_volatile)
14816     return false;
14817
14818   /* If we cannot generate any LDRD/STRD, try to generate LDM/STM.  */
14819   if (!(dst_aligned || src_aligned))
14820     return arm_gen_movmemqi (operands);
14821
14822   src = adjust_address (src, DImode, 0);
14823   dst = adjust_address (dst, DImode, 0);
14824   while (len >= 8)
14825     {
14826       len -= 8;
14827       reg0 = gen_reg_rtx (DImode);
14828       if (src_aligned)
14829         emit_move_insn (reg0, src);
14830       else
14831         emit_insn (gen_unaligned_loaddi (reg0, src));
14832
14833       if (dst_aligned)
14834         emit_move_insn (dst, reg0);
14835       else
14836         emit_insn (gen_unaligned_storedi (dst, reg0));
14837
14838       src = next_consecutive_mem (src);
14839       dst = next_consecutive_mem (dst);
14840     }
14841
14842   gcc_assert (len < 8);
14843   if (len >= 4)
14844     {
14845       /* More than a word but less than a double-word to copy.  Copy a word.  */
14846       reg0 = gen_reg_rtx (SImode);
14847       src = adjust_address (src, SImode, 0);
14848       dst = adjust_address (dst, SImode, 0);
14849       if (src_aligned)
14850         emit_move_insn (reg0, src);
14851       else
14852         emit_insn (gen_unaligned_loadsi (reg0, src));
14853
14854       if (dst_aligned)
14855         emit_move_insn (dst, reg0);
14856       else
14857         emit_insn (gen_unaligned_storesi (dst, reg0));
14858
14859       src = next_consecutive_mem (src);
14860       dst = next_consecutive_mem (dst);
14861       len -= 4;
14862     }
14863
14864   if (len == 0)
14865     return true;
14866
14867   /* Copy the remaining bytes.  */
14868   if (len >= 2)
14869     {
14870       dst = adjust_address (dst, HImode, 0);
14871       src = adjust_address (src, HImode, 0);
14872       reg0 = gen_reg_rtx (SImode);
14873       if (src_aligned)
14874         emit_insn (gen_zero_extendhisi2 (reg0, src));
14875       else
14876         emit_insn (gen_unaligned_loadhiu (reg0, src));
14877
14878       if (dst_aligned)
14879         emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14880       else
14881         emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14882
14883       src = next_consecutive_mem (src);
14884       dst = next_consecutive_mem (dst);
14885       if (len == 2)
14886         return true;
14887     }
14888
14889   dst = adjust_address (dst, QImode, 0);
14890   src = adjust_address (src, QImode, 0);
14891   reg0 = gen_reg_rtx (QImode);
14892   emit_move_insn (reg0, src);
14893   emit_move_insn (dst, reg0);
14894   return true;
14895 }
14896
14897 /* Select a dominance comparison mode if possible for a test of the general
14898    form (OP (COND_OR (X) (Y)) (const_int 0)).  We support three forms.
14899    COND_OR == DOM_CC_X_AND_Y => (X && Y)
14900    COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14901    COND_OR == DOM_CC_X_OR_Y => (X || Y)
14902    In all cases OP will be either EQ or NE, but we don't need to know which
14903    here.  If we are unable to support a dominance comparison we return
14904    CC mode.  This will then fail to match for the RTL expressions that
14905    generate this call.  */
14906 machine_mode
14907 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14908 {
14909   enum rtx_code cond1, cond2;
14910   int swapped = 0;
14911
14912   /* Currently we will probably get the wrong result if the individual
14913      comparisons are not simple.  This also ensures that it is safe to
14914      reverse a comparison if necessary.  */
14915   if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14916        != CCmode)
14917       || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14918           != CCmode))
14919     return CCmode;
14920
14921   /* The if_then_else variant of this tests the second condition if the
14922      first passes, but is true if the first fails.  Reverse the first
14923      condition to get a true "inclusive-or" expression.  */
14924   if (cond_or == DOM_CC_NX_OR_Y)
14925     cond1 = reverse_condition (cond1);
14926
14927   /* If the comparisons are not equal, and one doesn't dominate the other,
14928      then we can't do this.  */
14929   if (cond1 != cond2
14930       && !comparison_dominates_p (cond1, cond2)
14931       && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14932     return CCmode;
14933
14934   if (swapped)
14935     std::swap (cond1, cond2);
14936
14937   switch (cond1)
14938     {
14939     case EQ:
14940       if (cond_or == DOM_CC_X_AND_Y)
14941         return CC_DEQmode;
14942
14943       switch (cond2)
14944         {
14945         case EQ: return CC_DEQmode;
14946         case LE: return CC_DLEmode;
14947         case LEU: return CC_DLEUmode;
14948         case GE: return CC_DGEmode;
14949         case GEU: return CC_DGEUmode;
14950         default: gcc_unreachable ();
14951         }
14952
14953     case LT:
14954       if (cond_or == DOM_CC_X_AND_Y)
14955         return CC_DLTmode;
14956
14957       switch (cond2)
14958         {
14959         case  LT:
14960             return CC_DLTmode;
14961         case LE:
14962           return CC_DLEmode;
14963         case NE:
14964           return CC_DNEmode;
14965         default:
14966           gcc_unreachable ();
14967         }
14968
14969     case GT:
14970       if (cond_or == DOM_CC_X_AND_Y)
14971         return CC_DGTmode;
14972
14973       switch (cond2)
14974         {
14975         case GT:
14976           return CC_DGTmode;
14977         case GE:
14978           return CC_DGEmode;
14979         case NE:
14980           return CC_DNEmode;
14981         default:
14982           gcc_unreachable ();
14983         }
14984
14985     case LTU:
14986       if (cond_or == DOM_CC_X_AND_Y)
14987         return CC_DLTUmode;
14988
14989       switch (cond2)
14990         {
14991         case LTU:
14992           return CC_DLTUmode;
14993         case LEU:
14994           return CC_DLEUmode;
14995         case NE:
14996           return CC_DNEmode;
14997         default:
14998           gcc_unreachable ();
14999         }
15000
15001     case GTU:
15002       if (cond_or == DOM_CC_X_AND_Y)
15003         return CC_DGTUmode;
15004
15005       switch (cond2)
15006         {
15007         case GTU:
15008           return CC_DGTUmode;
15009         case GEU:
15010           return CC_DGEUmode;
15011         case NE:
15012           return CC_DNEmode;
15013         default:
15014           gcc_unreachable ();
15015         }
15016
15017     /* The remaining cases only occur when both comparisons are the
15018        same.  */
15019     case NE:
15020       gcc_assert (cond1 == cond2);
15021       return CC_DNEmode;
15022
15023     case LE:
15024       gcc_assert (cond1 == cond2);
15025       return CC_DLEmode;
15026
15027     case GE:
15028       gcc_assert (cond1 == cond2);
15029       return CC_DGEmode;
15030
15031     case LEU:
15032       gcc_assert (cond1 == cond2);
15033       return CC_DLEUmode;
15034
15035     case GEU:
15036       gcc_assert (cond1 == cond2);
15037       return CC_DGEUmode;
15038
15039     default:
15040       gcc_unreachable ();
15041     }
15042 }
15043
15044 machine_mode
15045 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15046 {
15047   /* All floating point compares return CCFP if it is an equality
15048      comparison, and CCFPE otherwise.  */
15049   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15050     {
15051       switch (op)
15052         {
15053         case EQ:
15054         case NE:
15055         case UNORDERED:
15056         case ORDERED:
15057         case UNLT:
15058         case UNLE:
15059         case UNGT:
15060         case UNGE:
15061         case UNEQ:
15062         case LTGT:
15063           return CCFPmode;
15064
15065         case LT:
15066         case LE:
15067         case GT:
15068         case GE:
15069           return CCFPEmode;
15070
15071         default:
15072           gcc_unreachable ();
15073         }
15074     }
15075
15076   /* A compare with a shifted operand.  Because of canonicalization, the
15077      comparison will have to be swapped when we emit the assembler.  */
15078   if (GET_MODE (y) == SImode
15079       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15080       && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15081           || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15082           || GET_CODE (x) == ROTATERT))
15083     return CC_SWPmode;
15084
15085   /* This operation is performed swapped, but since we only rely on the Z
15086      flag we don't need an additional mode.  */
15087   if (GET_MODE (y) == SImode
15088       && (REG_P (y) || (GET_CODE (y) == SUBREG))
15089       && GET_CODE (x) == NEG
15090       && (op == EQ || op == NE))
15091     return CC_Zmode;
15092
15093   /* This is a special case that is used by combine to allow a
15094      comparison of a shifted byte load to be split into a zero-extend
15095      followed by a comparison of the shifted integer (only valid for
15096      equalities and unsigned inequalities).  */
15097   if (GET_MODE (x) == SImode
15098       && GET_CODE (x) == ASHIFT
15099       && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15100       && GET_CODE (XEXP (x, 0)) == SUBREG
15101       && MEM_P (SUBREG_REG (XEXP (x, 0)))
15102       && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15103       && (op == EQ || op == NE
15104           || op == GEU || op == GTU || op == LTU || op == LEU)
15105       && CONST_INT_P (y))
15106     return CC_Zmode;
15107
15108   /* A construct for a conditional compare, if the false arm contains
15109      0, then both conditions must be true, otherwise either condition
15110      must be true.  Not all conditions are possible, so CCmode is
15111      returned if it can't be done.  */
15112   if (GET_CODE (x) == IF_THEN_ELSE
15113       && (XEXP (x, 2) == const0_rtx
15114           || XEXP (x, 2) == const1_rtx)
15115       && COMPARISON_P (XEXP (x, 0))
15116       && COMPARISON_P (XEXP (x, 1)))
15117     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15118                                          INTVAL (XEXP (x, 2)));
15119
15120   /* Alternate canonicalizations of the above.  These are somewhat cleaner.  */
15121   if (GET_CODE (x) == AND
15122       && (op == EQ || op == NE)
15123       && COMPARISON_P (XEXP (x, 0))
15124       && COMPARISON_P (XEXP (x, 1)))
15125     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15126                                          DOM_CC_X_AND_Y);
15127
15128   if (GET_CODE (x) == IOR
15129       && (op == EQ || op == NE)
15130       && COMPARISON_P (XEXP (x, 0))
15131       && COMPARISON_P (XEXP (x, 1)))
15132     return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15133                                          DOM_CC_X_OR_Y);
15134
15135   /* An operation (on Thumb) where we want to test for a single bit.
15136      This is done by shifting that bit up into the top bit of a
15137      scratch register; we can then branch on the sign bit.  */
15138   if (TARGET_THUMB1
15139       && GET_MODE (x) == SImode
15140       && (op == EQ || op == NE)
15141       && GET_CODE (x) == ZERO_EXTRACT
15142       && XEXP (x, 1) == const1_rtx)
15143     return CC_Nmode;
15144
15145   /* An operation that sets the condition codes as a side-effect, the
15146      V flag is not set correctly, so we can only use comparisons where
15147      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
15148      instead.)  */
15149   /* ??? Does the ZERO_EXTRACT case really apply to thumb2?  */
15150   if (GET_MODE (x) == SImode
15151       && y == const0_rtx
15152       && (op == EQ || op == NE || op == LT || op == GE)
15153       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15154           || GET_CODE (x) == AND || GET_CODE (x) == IOR
15155           || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15156           || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15157           || GET_CODE (x) == LSHIFTRT
15158           || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15159           || GET_CODE (x) == ROTATERT
15160           || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15161     return CC_NOOVmode;
15162
15163   if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15164     return CC_Zmode;
15165
15166   if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15167       && GET_CODE (x) == PLUS
15168       && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15169     return CC_Cmode;
15170
15171   if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15172     {
15173       switch (op)
15174         {
15175         case EQ:
15176         case NE:
15177           /* A DImode comparison against zero can be implemented by
15178              or'ing the two halves together.  */
15179           if (y == const0_rtx)
15180             return CC_Zmode;
15181
15182           /* We can do an equality test in three Thumb instructions.  */
15183           if (!TARGET_32BIT)
15184             return CC_Zmode;
15185
15186           /* FALLTHROUGH */
15187
15188         case LTU:
15189         case LEU:
15190         case GTU:
15191         case GEU:
15192           /* DImode unsigned comparisons can be implemented by cmp +
15193              cmpeq without a scratch register.  Not worth doing in
15194              Thumb-2.  */
15195           if (TARGET_32BIT)
15196             return CC_CZmode;
15197
15198           /* FALLTHROUGH */
15199
15200         case LT:
15201         case LE:
15202         case GT:
15203         case GE:
15204           /* DImode signed and unsigned comparisons can be implemented
15205              by cmp + sbcs with a scratch register, but that does not
15206              set the Z flag - we must reverse GT/LE/GTU/LEU.  */
15207           gcc_assert (op != EQ && op != NE);
15208           return CC_NCVmode;
15209
15210         default:
15211           gcc_unreachable ();
15212         }
15213     }
15214
15215   if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15216     return GET_MODE (x);
15217
15218   return CCmode;
15219 }
15220
15221 /* X and Y are two things to compare using CODE.  Emit the compare insn and
15222    return the rtx for register 0 in the proper mode.  FP means this is a
15223    floating point compare: I don't think that it is needed on the arm.  */
15224 rtx
15225 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15226 {
15227   machine_mode mode;
15228   rtx cc_reg;
15229   int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15230
15231   /* We might have X as a constant, Y as a register because of the predicates
15232      used for cmpdi.  If so, force X to a register here.  */
15233   if (dimode_comparison && !REG_P (x))
15234     x = force_reg (DImode, x);
15235
15236   mode = SELECT_CC_MODE (code, x, y);
15237   cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15238
15239   if (dimode_comparison
15240       && mode != CC_CZmode)
15241     {
15242       rtx clobber, set;
15243
15244       /* To compare two non-zero values for equality, XOR them and
15245          then compare against zero.  Not used for ARM mode; there
15246          CC_CZmode is cheaper.  */
15247       if (mode == CC_Zmode && y != const0_rtx)
15248         {
15249           gcc_assert (!reload_completed);
15250           x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15251           y = const0_rtx;
15252         }
15253
15254       /* A scratch register is required.  */
15255       if (reload_completed)
15256         gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15257       else
15258         scratch = gen_rtx_SCRATCH (SImode);
15259
15260       clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15261       set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
15262       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15263     }
15264   else
15265     emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15266
15267   return cc_reg;
15268 }
15269
15270 /* Generate a sequence of insns that will generate the correct return
15271    address mask depending on the physical architecture that the program
15272    is running on.  */
15273 rtx
15274 arm_gen_return_addr_mask (void)
15275 {
15276   rtx reg = gen_reg_rtx (Pmode);
15277
15278   emit_insn (gen_return_addr_mask (reg));
15279   return reg;
15280 }
15281
15282 void
15283 arm_reload_in_hi (rtx *operands)
15284 {
15285   rtx ref = operands[1];
15286   rtx base, scratch;
15287   HOST_WIDE_INT offset = 0;
15288
15289   if (GET_CODE (ref) == SUBREG)
15290     {
15291       offset = SUBREG_BYTE (ref);
15292       ref = SUBREG_REG (ref);
15293     }
15294
15295   if (REG_P (ref))
15296     {
15297       /* We have a pseudo which has been spilt onto the stack; there
15298          are two cases here: the first where there is a simple
15299          stack-slot replacement and a second where the stack-slot is
15300          out of range, or is used as a subreg.  */
15301       if (reg_equiv_mem (REGNO (ref)))
15302         {
15303           ref = reg_equiv_mem (REGNO (ref));
15304           base = find_replacement (&XEXP (ref, 0));
15305         }
15306       else
15307         /* The slot is out of range, or was dressed up in a SUBREG.  */
15308         base = reg_equiv_address (REGNO (ref));
15309     }
15310   else
15311     base = find_replacement (&XEXP (ref, 0));
15312
15313   /* Handle the case where the address is too complex to be offset by 1.  */
15314   if (GET_CODE (base) == MINUS
15315       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15316     {
15317       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15318
15319       emit_set_insn (base_plus, base);
15320       base = base_plus;
15321     }
15322   else if (GET_CODE (base) == PLUS)
15323     {
15324       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15325       HOST_WIDE_INT hi, lo;
15326
15327       offset += INTVAL (XEXP (base, 1));
15328       base = XEXP (base, 0);
15329
15330       /* Rework the address into a legal sequence of insns.  */
15331       /* Valid range for lo is -4095 -> 4095 */
15332       lo = (offset >= 0
15333             ? (offset & 0xfff)
15334             : -((-offset) & 0xfff));
15335
15336       /* Corner case, if lo is the max offset then we would be out of range
15337          once we have added the additional 1 below, so bump the msb into the
15338          pre-loading insn(s).  */
15339       if (lo == 4095)
15340         lo &= 0x7ff;
15341
15342       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15343              ^ (HOST_WIDE_INT) 0x80000000)
15344             - (HOST_WIDE_INT) 0x80000000);
15345
15346       gcc_assert (hi + lo == offset);
15347
15348       if (hi != 0)
15349         {
15350           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15351
15352           /* Get the base address; addsi3 knows how to handle constants
15353              that require more than one insn.  */
15354           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15355           base = base_plus;
15356           offset = lo;
15357         }
15358     }
15359
15360   /* Operands[2] may overlap operands[0] (though it won't overlap
15361      operands[1]), that's why we asked for a DImode reg -- so we can
15362      use the bit that does not overlap.  */
15363   if (REGNO (operands[2]) == REGNO (operands[0]))
15364     scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15365   else
15366     scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15367
15368   emit_insn (gen_zero_extendqisi2 (scratch,
15369                                    gen_rtx_MEM (QImode,
15370                                                 plus_constant (Pmode, base,
15371                                                                offset))));
15372   emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15373                                    gen_rtx_MEM (QImode,
15374                                                 plus_constant (Pmode, base,
15375                                                                offset + 1))));
15376   if (!BYTES_BIG_ENDIAN)
15377     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15378                    gen_rtx_IOR (SImode,
15379                                 gen_rtx_ASHIFT
15380                                 (SImode,
15381                                  gen_rtx_SUBREG (SImode, operands[0], 0),
15382                                  GEN_INT (8)),
15383                                 scratch));
15384   else
15385     emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15386                    gen_rtx_IOR (SImode,
15387                                 gen_rtx_ASHIFT (SImode, scratch,
15388                                                 GEN_INT (8)),
15389                                 gen_rtx_SUBREG (SImode, operands[0], 0)));
15390 }
15391
15392 /* Handle storing a half-word to memory during reload by synthesizing as two
15393    byte stores.  Take care not to clobber the input values until after we
15394    have moved them somewhere safe.  This code assumes that if the DImode
15395    scratch in operands[2] overlaps either the input value or output address
15396    in some way, then that value must die in this insn (we absolutely need
15397    two scratch registers for some corner cases).  */
15398 void
15399 arm_reload_out_hi (rtx *operands)
15400 {
15401   rtx ref = operands[0];
15402   rtx outval = operands[1];
15403   rtx base, scratch;
15404   HOST_WIDE_INT offset = 0;
15405
15406   if (GET_CODE (ref) == SUBREG)
15407     {
15408       offset = SUBREG_BYTE (ref);
15409       ref = SUBREG_REG (ref);
15410     }
15411
15412   if (REG_P (ref))
15413     {
15414       /* We have a pseudo which has been spilt onto the stack; there
15415          are two cases here: the first where there is a simple
15416          stack-slot replacement and a second where the stack-slot is
15417          out of range, or is used as a subreg.  */
15418       if (reg_equiv_mem (REGNO (ref)))
15419         {
15420           ref = reg_equiv_mem (REGNO (ref));
15421           base = find_replacement (&XEXP (ref, 0));
15422         }
15423       else
15424         /* The slot is out of range, or was dressed up in a SUBREG.  */
15425         base = reg_equiv_address (REGNO (ref));
15426     }
15427   else
15428     base = find_replacement (&XEXP (ref, 0));
15429
15430   scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15431
15432   /* Handle the case where the address is too complex to be offset by 1.  */
15433   if (GET_CODE (base) == MINUS
15434       || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15435     {
15436       rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15437
15438       /* Be careful not to destroy OUTVAL.  */
15439       if (reg_overlap_mentioned_p (base_plus, outval))
15440         {
15441           /* Updating base_plus might destroy outval, see if we can
15442              swap the scratch and base_plus.  */
15443           if (!reg_overlap_mentioned_p (scratch, outval))
15444             std::swap (scratch, base_plus);
15445           else
15446             {
15447               rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15448
15449               /* Be conservative and copy OUTVAL into the scratch now,
15450                  this should only be necessary if outval is a subreg
15451                  of something larger than a word.  */
15452               /* XXX Might this clobber base?  I can't see how it can,
15453                  since scratch is known to overlap with OUTVAL, and
15454                  must be wider than a word.  */
15455               emit_insn (gen_movhi (scratch_hi, outval));
15456               outval = scratch_hi;
15457             }
15458         }
15459
15460       emit_set_insn (base_plus, base);
15461       base = base_plus;
15462     }
15463   else if (GET_CODE (base) == PLUS)
15464     {
15465       /* The addend must be CONST_INT, or we would have dealt with it above.  */
15466       HOST_WIDE_INT hi, lo;
15467
15468       offset += INTVAL (XEXP (base, 1));
15469       base = XEXP (base, 0);
15470
15471       /* Rework the address into a legal sequence of insns.  */
15472       /* Valid range for lo is -4095 -> 4095 */
15473       lo = (offset >= 0
15474             ? (offset & 0xfff)
15475             : -((-offset) & 0xfff));
15476
15477       /* Corner case, if lo is the max offset then we would be out of range
15478          once we have added the additional 1 below, so bump the msb into the
15479          pre-loading insn(s).  */
15480       if (lo == 4095)
15481         lo &= 0x7ff;
15482
15483       hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15484              ^ (HOST_WIDE_INT) 0x80000000)
15485             - (HOST_WIDE_INT) 0x80000000);
15486
15487       gcc_assert (hi + lo == offset);
15488
15489       if (hi != 0)
15490         {
15491           rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15492
15493           /* Be careful not to destroy OUTVAL.  */
15494           if (reg_overlap_mentioned_p (base_plus, outval))
15495             {
15496               /* Updating base_plus might destroy outval, see if we
15497                  can swap the scratch and base_plus.  */
15498               if (!reg_overlap_mentioned_p (scratch, outval))
15499                 std::swap (scratch, base_plus);
15500               else
15501                 {
15502                   rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15503
15504                   /* Be conservative and copy outval into scratch now,
15505                      this should only be necessary if outval is a
15506                      subreg of something larger than a word.  */
15507                   /* XXX Might this clobber base?  I can't see how it
15508                      can, since scratch is known to overlap with
15509                      outval.  */
15510                   emit_insn (gen_movhi (scratch_hi, outval));
15511                   outval = scratch_hi;
15512                 }
15513             }
15514
15515           /* Get the base address; addsi3 knows how to handle constants
15516              that require more than one insn.  */
15517           emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15518           base = base_plus;
15519           offset = lo;
15520         }
15521     }
15522
15523   if (BYTES_BIG_ENDIAN)
15524     {
15525       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15526                                          plus_constant (Pmode, base,
15527                                                         offset + 1)),
15528                             gen_lowpart (QImode, outval)));
15529       emit_insn (gen_lshrsi3 (scratch,
15530                               gen_rtx_SUBREG (SImode, outval, 0),
15531                               GEN_INT (8)));
15532       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15533                                                                 offset)),
15534                             gen_lowpart (QImode, scratch)));
15535     }
15536   else
15537     {
15538       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15539                                                                 offset)),
15540                             gen_lowpart (QImode, outval)));
15541       emit_insn (gen_lshrsi3 (scratch,
15542                               gen_rtx_SUBREG (SImode, outval, 0),
15543                               GEN_INT (8)));
15544       emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15545                                          plus_constant (Pmode, base,
15546                                                         offset + 1)),
15547                             gen_lowpart (QImode, scratch)));
15548     }
15549 }
15550
15551 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15552    (padded to the size of a word) should be passed in a register.  */
15553
15554 static bool
15555 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15556 {
15557   if (TARGET_AAPCS_BASED)
15558     return must_pass_in_stack_var_size (mode, type);
15559   else
15560     return must_pass_in_stack_var_size_or_pad (mode, type);
15561 }
15562
15563
15564 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15565    Return true if an argument passed on the stack should be padded upwards,
15566    i.e. if the least-significant byte has useful data.
15567    For legacy APCS ABIs we use the default.  For AAPCS based ABIs small
15568    aggregate types are placed in the lowest memory address.  */
15569
15570 bool
15571 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15572 {
15573   if (!TARGET_AAPCS_BASED)
15574     return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15575
15576   if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15577     return false;
15578
15579   return true;
15580 }
15581
15582
15583 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15584    Return !BYTES_BIG_ENDIAN if the least significant byte of the
15585    register has useful data, and return the opposite if the most
15586    significant byte does.  */
15587
15588 bool
15589 arm_pad_reg_upward (machine_mode mode,
15590                     tree type, int first ATTRIBUTE_UNUSED)
15591 {
15592   if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15593     {
15594       /* For AAPCS, small aggregates, small fixed-point types,
15595          and small complex types are always padded upwards.  */
15596       if (type)
15597         {
15598           if ((AGGREGATE_TYPE_P (type)
15599                || TREE_CODE (type) == COMPLEX_TYPE
15600                || FIXED_POINT_TYPE_P (type))
15601               && int_size_in_bytes (type) <= 4)
15602             return true;
15603         }
15604       else
15605         {
15606           if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15607               && GET_MODE_SIZE (mode) <= 4)
15608             return true;
15609         }
15610     }
15611
15612   /* Otherwise, use default padding.  */
15613   return !BYTES_BIG_ENDIAN;
15614 }
15615
15616 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15617    assuming that the address in the base register is word aligned.  */
15618 bool
15619 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15620 {
15621   HOST_WIDE_INT max_offset;
15622
15623   /* Offset must be a multiple of 4 in Thumb mode.  */
15624   if (TARGET_THUMB2 && ((offset & 3) != 0))
15625     return false;
15626
15627   if (TARGET_THUMB2)
15628     max_offset = 1020;
15629   else if (TARGET_ARM)
15630     max_offset = 255;
15631   else
15632     return false;
15633
15634   return ((offset <= max_offset) && (offset >= -max_offset));
15635 }
15636
15637 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15638    Assumes that RT, RT2, and RN are REG.  This is guaranteed by the patterns.
15639    Assumes that the address in the base register RN is word aligned.  Pattern
15640    guarantees that both memory accesses use the same base register,
15641    the offsets are constants within the range, and the gap between the offsets is 4.
15642    If preload complete then check that registers are legal.  WBACK indicates whether
15643    address is updated.  LOAD indicates whether memory access is load or store.  */
15644 bool
15645 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15646                        bool wback, bool load)
15647 {
15648   unsigned int t, t2, n;
15649
15650   if (!reload_completed)
15651     return true;
15652
15653   if (!offset_ok_for_ldrd_strd (offset))
15654     return false;
15655
15656   t = REGNO (rt);
15657   t2 = REGNO (rt2);
15658   n = REGNO (rn);
15659
15660   if ((TARGET_THUMB2)
15661       && ((wback && (n == t || n == t2))
15662           || (t == SP_REGNUM)
15663           || (t == PC_REGNUM)
15664           || (t2 == SP_REGNUM)
15665           || (t2 == PC_REGNUM)
15666           || (!load && (n == PC_REGNUM))
15667           || (load && (t == t2))
15668           /* Triggers Cortex-M3 LDRD errata.  */
15669           || (!wback && load && fix_cm3_ldrd && (n == t))))
15670     return false;
15671
15672   if ((TARGET_ARM)
15673       && ((wback && (n == t || n == t2))
15674           || (t2 == PC_REGNUM)
15675           || (t % 2 != 0)   /* First destination register is not even.  */
15676           || (t2 != t + 1)
15677           /* PC can be used as base register (for offset addressing only),
15678              but it is depricated.  */
15679           || (n == PC_REGNUM)))
15680     return false;
15681
15682   return true;
15683 }
15684
15685 /* Helper for gen_operands_ldrd_strd.  Returns true iff the memory
15686    operand MEM's address contains an immediate offset from the base
15687    register and has no side effects, in which case it sets BASE and
15688    OFFSET accordingly.  */
15689 static bool
15690 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15691 {
15692   rtx addr;
15693
15694   gcc_assert (base != NULL && offset != NULL);
15695
15696   /* TODO: Handle more general memory operand patterns, such as
15697      PRE_DEC and PRE_INC.  */
15698
15699   if (side_effects_p (mem))
15700     return false;
15701
15702   /* Can't deal with subregs.  */
15703   if (GET_CODE (mem) == SUBREG)
15704     return false;
15705
15706   gcc_assert (MEM_P (mem));
15707
15708   *offset = const0_rtx;
15709
15710   addr = XEXP (mem, 0);
15711
15712   /* If addr isn't valid for DImode, then we can't handle it.  */
15713   if (!arm_legitimate_address_p (DImode, addr,
15714                                  reload_in_progress || reload_completed))
15715     return false;
15716
15717   if (REG_P (addr))
15718     {
15719       *base = addr;
15720       return true;
15721     }
15722   else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15723     {
15724       *base = XEXP (addr, 0);
15725       *offset = XEXP (addr, 1);
15726       return (REG_P (*base) && CONST_INT_P (*offset));
15727     }
15728
15729   return false;
15730 }
15731
15732 /* Called from a peephole2 to replace two word-size accesses with a
15733    single LDRD/STRD instruction.  Returns true iff we can generate a
15734    new instruction sequence.  That is, both accesses use the same base
15735    register and the gap between constant offsets is 4.  This function
15736    may reorder its operands to match ldrd/strd RTL templates.
15737    OPERANDS are the operands found by the peephole matcher;
15738    OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15739    corresponding memory operands.  LOAD indicaates whether the access
15740    is load or store.  CONST_STORE indicates a store of constant
15741    integer values held in OPERANDS[4,5] and assumes that the pattern
15742    is of length 4 insn, for the purpose of checking dead registers.
15743    COMMUTE indicates that register operands may be reordered.  */
15744 bool
15745 gen_operands_ldrd_strd (rtx *operands, bool load,
15746                         bool const_store, bool commute)
15747 {
15748   int nops = 2;
15749   HOST_WIDE_INT offsets[2], offset;
15750   rtx base = NULL_RTX;
15751   rtx cur_base, cur_offset, tmp;
15752   int i, gap;
15753   HARD_REG_SET regset;
15754
15755   gcc_assert (!const_store || !load);
15756   /* Check that the memory references are immediate offsets from the
15757      same base register.  Extract the base register, the destination
15758      registers, and the corresponding memory offsets.  */
15759   for (i = 0; i < nops; i++)
15760     {
15761       if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15762         return false;
15763
15764       if (i == 0)
15765         base = cur_base;
15766       else if (REGNO (base) != REGNO (cur_base))
15767         return false;
15768
15769       offsets[i] = INTVAL (cur_offset);
15770       if (GET_CODE (operands[i]) == SUBREG)
15771         {
15772           tmp = SUBREG_REG (operands[i]);
15773           gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15774           operands[i] = tmp;
15775         }
15776     }
15777
15778   /* Make sure there is no dependency between the individual loads.  */
15779   if (load && REGNO (operands[0]) == REGNO (base))
15780     return false; /* RAW */
15781
15782   if (load && REGNO (operands[0]) == REGNO (operands[1]))
15783     return false; /* WAW */
15784
15785   /* If the same input register is used in both stores
15786      when storing different constants, try to find a free register.
15787      For example, the code
15788         mov r0, 0
15789         str r0, [r2]
15790         mov r0, 1
15791         str r0, [r2, #4]
15792      can be transformed into
15793         mov r1, 0
15794         strd r1, r0, [r2]
15795      in Thumb mode assuming that r1 is free.  */
15796   if (const_store
15797       && REGNO (operands[0]) == REGNO (operands[1])
15798       && INTVAL (operands[4]) != INTVAL (operands[5]))
15799     {
15800     if (TARGET_THUMB2)
15801       {
15802         CLEAR_HARD_REG_SET (regset);
15803         tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15804         if (tmp == NULL_RTX)
15805           return false;
15806
15807         /* Use the new register in the first load to ensure that
15808            if the original input register is not dead after peephole,
15809            then it will have the correct constant value.  */
15810         operands[0] = tmp;
15811       }
15812     else if (TARGET_ARM)
15813       {
15814         return false;
15815         int regno = REGNO (operands[0]);
15816         if (!peep2_reg_dead_p (4, operands[0]))
15817           {
15818             /* When the input register is even and is not dead after the
15819                pattern, it has to hold the second constant but we cannot
15820                form a legal STRD in ARM mode with this register as the second
15821                register.  */
15822             if (regno % 2 == 0)
15823               return false;
15824
15825             /* Is regno-1 free? */
15826             SET_HARD_REG_SET (regset);
15827             CLEAR_HARD_REG_BIT(regset, regno - 1);
15828             tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15829             if (tmp == NULL_RTX)
15830               return false;
15831
15832             operands[0] = tmp;
15833           }
15834         else
15835           {
15836             /* Find a DImode register.  */
15837             CLEAR_HARD_REG_SET (regset);
15838             tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15839             if (tmp != NULL_RTX)
15840               {
15841                 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15842                 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15843               }
15844             else
15845               {
15846                 /* Can we use the input register to form a DI register?  */
15847                 SET_HARD_REG_SET (regset);
15848                 CLEAR_HARD_REG_BIT(regset,
15849                                    regno % 2 == 0 ? regno + 1 : regno - 1);
15850                 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15851                 if (tmp == NULL_RTX)
15852                   return false;
15853                 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15854               }
15855           }
15856
15857         gcc_assert (operands[0] != NULL_RTX);
15858         gcc_assert (operands[1] != NULL_RTX);
15859         gcc_assert (REGNO (operands[0]) % 2 == 0);
15860         gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15861       }
15862     }
15863
15864   /* Make sure the instructions are ordered with lower memory access first.  */
15865   if (offsets[0] > offsets[1])
15866     {
15867       gap = offsets[0] - offsets[1];
15868       offset = offsets[1];
15869
15870       /* Swap the instructions such that lower memory is accessed first.  */
15871       std::swap (operands[0], operands[1]);
15872       std::swap (operands[2], operands[3]);
15873       if (const_store)
15874         std::swap (operands[4], operands[5]);
15875     }
15876   else
15877     {
15878       gap = offsets[1] - offsets[0];
15879       offset = offsets[0];
15880     }
15881
15882   /* Make sure accesses are to consecutive memory locations.  */
15883   if (gap != 4)
15884     return false;
15885
15886   /* Make sure we generate legal instructions.  */
15887   if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15888                              false, load))
15889     return true;
15890
15891   /* In Thumb state, where registers are almost unconstrained, there
15892      is little hope to fix it.  */
15893   if (TARGET_THUMB2)
15894     return false;
15895
15896   if (load && commute)
15897     {
15898       /* Try reordering registers.  */
15899       std::swap (operands[0], operands[1]);
15900       if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15901                                  false, load))
15902         return true;
15903     }
15904
15905   if (const_store)
15906     {
15907       /* If input registers are dead after this pattern, they can be
15908          reordered or replaced by other registers that are free in the
15909          current pattern.  */
15910       if (!peep2_reg_dead_p (4, operands[0])
15911           || !peep2_reg_dead_p (4, operands[1]))
15912         return false;
15913
15914       /* Try to reorder the input registers.  */
15915       /* For example, the code
15916            mov r0, 0
15917            mov r1, 1
15918            str r1, [r2]
15919            str r0, [r2, #4]
15920          can be transformed into
15921            mov r1, 0
15922            mov r0, 1
15923            strd r0, [r2]
15924       */
15925       if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15926                                   false, false))
15927         {
15928           std::swap (operands[0], operands[1]);
15929           return true;
15930         }
15931
15932       /* Try to find a free DI register.  */
15933       CLEAR_HARD_REG_SET (regset);
15934       add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15935       add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15936       while (true)
15937         {
15938           tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15939           if (tmp == NULL_RTX)
15940             return false;
15941
15942           /* DREG must be an even-numbered register in DImode.
15943              Split it into SI registers.  */
15944           operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15945           operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15946           gcc_assert (operands[0] != NULL_RTX);
15947           gcc_assert (operands[1] != NULL_RTX);
15948           gcc_assert (REGNO (operands[0]) % 2 == 0);
15949           gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15950
15951           return (operands_ok_ldrd_strd (operands[0], operands[1],
15952                                          base, offset,
15953                                          false, load));
15954         }
15955     }
15956
15957   return false;
15958 }
15959
15960
15961
15962 \f
15963 /* Print a symbolic form of X to the debug file, F.  */
15964 static void
15965 arm_print_value (FILE *f, rtx x)
15966 {
15967   switch (GET_CODE (x))
15968     {
15969     case CONST_INT:
15970       fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15971       return;
15972
15973     case CONST_DOUBLE:
15974       fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15975       return;
15976
15977     case CONST_VECTOR:
15978       {
15979         int i;
15980
15981         fprintf (f, "<");
15982         for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15983           {
15984             fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15985             if (i < (CONST_VECTOR_NUNITS (x) - 1))
15986               fputc (',', f);
15987           }
15988         fprintf (f, ">");
15989       }
15990       return;
15991
15992     case CONST_STRING:
15993       fprintf (f, "\"%s\"", XSTR (x, 0));
15994       return;
15995
15996     case SYMBOL_REF:
15997       fprintf (f, "`%s'", XSTR (x, 0));
15998       return;
15999
16000     case LABEL_REF:
16001       fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
16002       return;
16003
16004     case CONST:
16005       arm_print_value (f, XEXP (x, 0));
16006       return;
16007
16008     case PLUS:
16009       arm_print_value (f, XEXP (x, 0));
16010       fprintf (f, "+");
16011       arm_print_value (f, XEXP (x, 1));
16012       return;
16013
16014     case PC:
16015       fprintf (f, "pc");
16016       return;
16017
16018     default:
16019       fprintf (f, "????");
16020       return;
16021     }
16022 }
16023 \f
16024 /* Routines for manipulation of the constant pool.  */
16025
16026 /* Arm instructions cannot load a large constant directly into a
16027    register; they have to come from a pc relative load.  The constant
16028    must therefore be placed in the addressable range of the pc
16029    relative load.  Depending on the precise pc relative load
16030    instruction the range is somewhere between 256 bytes and 4k.  This
16031    means that we often have to dump a constant inside a function, and
16032    generate code to branch around it.
16033
16034    It is important to minimize this, since the branches will slow
16035    things down and make the code larger.
16036
16037    Normally we can hide the table after an existing unconditional
16038    branch so that there is no interruption of the flow, but in the
16039    worst case the code looks like this:
16040
16041         ldr     rn, L1
16042         ...
16043         b       L2
16044         align
16045         L1:     .long value
16046         L2:
16047         ...
16048
16049         ldr     rn, L3
16050         ...
16051         b       L4
16052         align
16053         L3:     .long value
16054         L4:
16055         ...
16056
16057    We fix this by performing a scan after scheduling, which notices
16058    which instructions need to have their operands fetched from the
16059    constant table and builds the table.
16060
16061    The algorithm starts by building a table of all the constants that
16062    need fixing up and all the natural barriers in the function (places
16063    where a constant table can be dropped without breaking the flow).
16064    For each fixup we note how far the pc-relative replacement will be
16065    able to reach and the offset of the instruction into the function.
16066
16067    Having built the table we then group the fixes together to form
16068    tables that are as large as possible (subject to addressing
16069    constraints) and emit each table of constants after the last
16070    barrier that is within range of all the instructions in the group.
16071    If a group does not contain a barrier, then we forcibly create one
16072    by inserting a jump instruction into the flow.  Once the table has
16073    been inserted, the insns are then modified to reference the
16074    relevant entry in the pool.
16075
16076    Possible enhancements to the algorithm (not implemented) are:
16077
16078    1) For some processors and object formats, there may be benefit in
16079    aligning the pools to the start of cache lines; this alignment
16080    would need to be taken into account when calculating addressability
16081    of a pool.  */
16082
16083 /* These typedefs are located at the start of this file, so that
16084    they can be used in the prototypes there.  This comment is to
16085    remind readers of that fact so that the following structures
16086    can be understood more easily.
16087
16088      typedef struct minipool_node    Mnode;
16089      typedef struct minipool_fixup   Mfix;  */
16090
16091 struct minipool_node
16092 {
16093   /* Doubly linked chain of entries.  */
16094   Mnode * next;
16095   Mnode * prev;
16096   /* The maximum offset into the code that this entry can be placed.  While
16097      pushing fixes for forward references, all entries are sorted in order
16098      of increasing max_address.  */
16099   HOST_WIDE_INT max_address;
16100   /* Similarly for an entry inserted for a backwards ref.  */
16101   HOST_WIDE_INT min_address;
16102   /* The number of fixes referencing this entry.  This can become zero
16103      if we "unpush" an entry.  In this case we ignore the entry when we
16104      come to emit the code.  */
16105   int refcount;
16106   /* The offset from the start of the minipool.  */
16107   HOST_WIDE_INT offset;
16108   /* The value in table.  */
16109   rtx value;
16110   /* The mode of value.  */
16111   machine_mode mode;
16112   /* The size of the value.  With iWMMXt enabled
16113      sizes > 4 also imply an alignment of 8-bytes.  */
16114   int fix_size;
16115 };
16116
16117 struct minipool_fixup
16118 {
16119   Mfix *            next;
16120   rtx_insn *        insn;
16121   HOST_WIDE_INT     address;
16122   rtx *             loc;
16123   machine_mode mode;
16124   int               fix_size;
16125   rtx               value;
16126   Mnode *           minipool;
16127   HOST_WIDE_INT     forwards;
16128   HOST_WIDE_INT     backwards;
16129 };
16130
16131 /* Fixes less than a word need padding out to a word boundary.  */
16132 #define MINIPOOL_FIX_SIZE(mode) \
16133   (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16134
16135 static Mnode *  minipool_vector_head;
16136 static Mnode *  minipool_vector_tail;
16137 static rtx_code_label   *minipool_vector_label;
16138 static int      minipool_pad;
16139
16140 /* The linked list of all minipool fixes required for this function.  */
16141 Mfix *          minipool_fix_head;
16142 Mfix *          minipool_fix_tail;
16143 /* The fix entry for the current minipool, once it has been placed.  */
16144 Mfix *          minipool_barrier;
16145
16146 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16147 #define JUMP_TABLES_IN_TEXT_SECTION 0
16148 #endif
16149
16150 static HOST_WIDE_INT
16151 get_jump_table_size (rtx_jump_table_data *insn)
16152 {
16153   /* ADDR_VECs only take room if read-only data does into the text
16154      section.  */
16155   if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16156     {
16157       rtx body = PATTERN (insn);
16158       int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16159       HOST_WIDE_INT size;
16160       HOST_WIDE_INT modesize;
16161
16162       modesize = GET_MODE_SIZE (GET_MODE (body));
16163       size = modesize * XVECLEN (body, elt);
16164       switch (modesize)
16165         {
16166         case 1:
16167           /* Round up size  of TBB table to a halfword boundary.  */
16168           size = (size + 1) & ~(HOST_WIDE_INT)1;
16169           break;
16170         case 2:
16171           /* No padding necessary for TBH.  */
16172           break;
16173         case 4:
16174           /* Add two bytes for alignment on Thumb.  */
16175           if (TARGET_THUMB)
16176             size += 2;
16177           break;
16178         default:
16179           gcc_unreachable ();
16180         }
16181       return size;
16182     }
16183
16184   return 0;
16185 }
16186
16187 /* Return the maximum amount of padding that will be inserted before
16188    label LABEL.  */
16189
16190 static HOST_WIDE_INT
16191 get_label_padding (rtx label)
16192 {
16193   HOST_WIDE_INT align, min_insn_size;
16194
16195   align = 1 << label_to_alignment (label);
16196   min_insn_size = TARGET_THUMB ? 2 : 4;
16197   return align > min_insn_size ? align - min_insn_size : 0;
16198 }
16199
16200 /* Move a minipool fix MP from its current location to before MAX_MP.
16201    If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16202    constraints may need updating.  */
16203 static Mnode *
16204 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16205                                HOST_WIDE_INT max_address)
16206 {
16207   /* The code below assumes these are different.  */
16208   gcc_assert (mp != max_mp);
16209
16210   if (max_mp == NULL)
16211     {
16212       if (max_address < mp->max_address)
16213         mp->max_address = max_address;
16214     }
16215   else
16216     {
16217       if (max_address > max_mp->max_address - mp->fix_size)
16218         mp->max_address = max_mp->max_address - mp->fix_size;
16219       else
16220         mp->max_address = max_address;
16221
16222       /* Unlink MP from its current position.  Since max_mp is non-null,
16223        mp->prev must be non-null.  */
16224       mp->prev->next = mp->next;
16225       if (mp->next != NULL)
16226         mp->next->prev = mp->prev;
16227       else
16228         minipool_vector_tail = mp->prev;
16229
16230       /* Re-insert it before MAX_MP.  */
16231       mp->next = max_mp;
16232       mp->prev = max_mp->prev;
16233       max_mp->prev = mp;
16234
16235       if (mp->prev != NULL)
16236         mp->prev->next = mp;
16237       else
16238         minipool_vector_head = mp;
16239     }
16240
16241   /* Save the new entry.  */
16242   max_mp = mp;
16243
16244   /* Scan over the preceding entries and adjust their addresses as
16245      required.  */
16246   while (mp->prev != NULL
16247          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16248     {
16249       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16250       mp = mp->prev;
16251     }
16252
16253   return max_mp;
16254 }
16255
16256 /* Add a constant to the minipool for a forward reference.  Returns the
16257    node added or NULL if the constant will not fit in this pool.  */
16258 static Mnode *
16259 add_minipool_forward_ref (Mfix *fix)
16260 {
16261   /* If set, max_mp is the first pool_entry that has a lower
16262      constraint than the one we are trying to add.  */
16263   Mnode *       max_mp = NULL;
16264   HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16265   Mnode *       mp;
16266
16267   /* If the minipool starts before the end of FIX->INSN then this FIX
16268      can not be placed into the current pool.  Furthermore, adding the
16269      new constant pool entry may cause the pool to start FIX_SIZE bytes
16270      earlier.  */
16271   if (minipool_vector_head &&
16272       (fix->address + get_attr_length (fix->insn)
16273        >= minipool_vector_head->max_address - fix->fix_size))
16274     return NULL;
16275
16276   /* Scan the pool to see if a constant with the same value has
16277      already been added.  While we are doing this, also note the
16278      location where we must insert the constant if it doesn't already
16279      exist.  */
16280   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16281     {
16282       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16283           && fix->mode == mp->mode
16284           && (!LABEL_P (fix->value)
16285               || (CODE_LABEL_NUMBER (fix->value)
16286                   == CODE_LABEL_NUMBER (mp->value)))
16287           && rtx_equal_p (fix->value, mp->value))
16288         {
16289           /* More than one fix references this entry.  */
16290           mp->refcount++;
16291           return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16292         }
16293
16294       /* Note the insertion point if necessary.  */
16295       if (max_mp == NULL
16296           && mp->max_address > max_address)
16297         max_mp = mp;
16298
16299       /* If we are inserting an 8-bytes aligned quantity and
16300          we have not already found an insertion point, then
16301          make sure that all such 8-byte aligned quantities are
16302          placed at the start of the pool.  */
16303       if (ARM_DOUBLEWORD_ALIGN
16304           && max_mp == NULL
16305           && fix->fix_size >= 8
16306           && mp->fix_size < 8)
16307         {
16308           max_mp = mp;
16309           max_address = mp->max_address;
16310         }
16311     }
16312
16313   /* The value is not currently in the minipool, so we need to create
16314      a new entry for it.  If MAX_MP is NULL, the entry will be put on
16315      the end of the list since the placement is less constrained than
16316      any existing entry.  Otherwise, we insert the new fix before
16317      MAX_MP and, if necessary, adjust the constraints on the other
16318      entries.  */
16319   mp = XNEW (Mnode);
16320   mp->fix_size = fix->fix_size;
16321   mp->mode = fix->mode;
16322   mp->value = fix->value;
16323   mp->refcount = 1;
16324   /* Not yet required for a backwards ref.  */
16325   mp->min_address = -65536;
16326
16327   if (max_mp == NULL)
16328     {
16329       mp->max_address = max_address;
16330       mp->next = NULL;
16331       mp->prev = minipool_vector_tail;
16332
16333       if (mp->prev == NULL)
16334         {
16335           minipool_vector_head = mp;
16336           minipool_vector_label = gen_label_rtx ();
16337         }
16338       else
16339         mp->prev->next = mp;
16340
16341       minipool_vector_tail = mp;
16342     }
16343   else
16344     {
16345       if (max_address > max_mp->max_address - mp->fix_size)
16346         mp->max_address = max_mp->max_address - mp->fix_size;
16347       else
16348         mp->max_address = max_address;
16349
16350       mp->next = max_mp;
16351       mp->prev = max_mp->prev;
16352       max_mp->prev = mp;
16353       if (mp->prev != NULL)
16354         mp->prev->next = mp;
16355       else
16356         minipool_vector_head = mp;
16357     }
16358
16359   /* Save the new entry.  */
16360   max_mp = mp;
16361
16362   /* Scan over the preceding entries and adjust their addresses as
16363      required.  */
16364   while (mp->prev != NULL
16365          && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16366     {
16367       mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16368       mp = mp->prev;
16369     }
16370
16371   return max_mp;
16372 }
16373
16374 static Mnode *
16375 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16376                                 HOST_WIDE_INT  min_address)
16377 {
16378   HOST_WIDE_INT offset;
16379
16380   /* The code below assumes these are different.  */
16381   gcc_assert (mp != min_mp);
16382
16383   if (min_mp == NULL)
16384     {
16385       if (min_address > mp->min_address)
16386         mp->min_address = min_address;
16387     }
16388   else
16389     {
16390       /* We will adjust this below if it is too loose.  */
16391       mp->min_address = min_address;
16392
16393       /* Unlink MP from its current position.  Since min_mp is non-null,
16394          mp->next must be non-null.  */
16395       mp->next->prev = mp->prev;
16396       if (mp->prev != NULL)
16397         mp->prev->next = mp->next;
16398       else
16399         minipool_vector_head = mp->next;
16400
16401       /* Reinsert it after MIN_MP.  */
16402       mp->prev = min_mp;
16403       mp->next = min_mp->next;
16404       min_mp->next = mp;
16405       if (mp->next != NULL)
16406         mp->next->prev = mp;
16407       else
16408         minipool_vector_tail = mp;
16409     }
16410
16411   min_mp = mp;
16412
16413   offset = 0;
16414   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16415     {
16416       mp->offset = offset;
16417       if (mp->refcount > 0)
16418         offset += mp->fix_size;
16419
16420       if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16421         mp->next->min_address = mp->min_address + mp->fix_size;
16422     }
16423
16424   return min_mp;
16425 }
16426
16427 /* Add a constant to the minipool for a backward reference.  Returns the
16428    node added or NULL if the constant will not fit in this pool.
16429
16430    Note that the code for insertion for a backwards reference can be
16431    somewhat confusing because the calculated offsets for each fix do
16432    not take into account the size of the pool (which is still under
16433    construction.  */
16434 static Mnode *
16435 add_minipool_backward_ref (Mfix *fix)
16436 {
16437   /* If set, min_mp is the last pool_entry that has a lower constraint
16438      than the one we are trying to add.  */
16439   Mnode *min_mp = NULL;
16440   /* This can be negative, since it is only a constraint.  */
16441   HOST_WIDE_INT  min_address = fix->address - fix->backwards;
16442   Mnode *mp;
16443
16444   /* If we can't reach the current pool from this insn, or if we can't
16445      insert this entry at the end of the pool without pushing other
16446      fixes out of range, then we don't try.  This ensures that we
16447      can't fail later on.  */
16448   if (min_address >= minipool_barrier->address
16449       || (minipool_vector_tail->min_address + fix->fix_size
16450           >= minipool_barrier->address))
16451     return NULL;
16452
16453   /* Scan the pool to see if a constant with the same value has
16454      already been added.  While we are doing this, also note the
16455      location where we must insert the constant if it doesn't already
16456      exist.  */
16457   for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16458     {
16459       if (GET_CODE (fix->value) == GET_CODE (mp->value)
16460           && fix->mode == mp->mode
16461           && (!LABEL_P (fix->value)
16462               || (CODE_LABEL_NUMBER (fix->value)
16463                   == CODE_LABEL_NUMBER (mp->value)))
16464           && rtx_equal_p (fix->value, mp->value)
16465           /* Check that there is enough slack to move this entry to the
16466              end of the table (this is conservative).  */
16467           && (mp->max_address
16468               > (minipool_barrier->address
16469                  + minipool_vector_tail->offset
16470                  + minipool_vector_tail->fix_size)))
16471         {
16472           mp->refcount++;
16473           return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16474         }
16475
16476       if (min_mp != NULL)
16477         mp->min_address += fix->fix_size;
16478       else
16479         {
16480           /* Note the insertion point if necessary.  */
16481           if (mp->min_address < min_address)
16482             {
16483               /* For now, we do not allow the insertion of 8-byte alignment
16484                  requiring nodes anywhere but at the start of the pool.  */
16485               if (ARM_DOUBLEWORD_ALIGN
16486                   && fix->fix_size >= 8 && mp->fix_size < 8)
16487                 return NULL;
16488               else
16489                 min_mp = mp;
16490             }
16491           else if (mp->max_address
16492                    < minipool_barrier->address + mp->offset + fix->fix_size)
16493             {
16494               /* Inserting before this entry would push the fix beyond
16495                  its maximum address (which can happen if we have
16496                  re-located a forwards fix); force the new fix to come
16497                  after it.  */
16498               if (ARM_DOUBLEWORD_ALIGN
16499                   && fix->fix_size >= 8 && mp->fix_size < 8)
16500                 return NULL;
16501               else
16502                 {
16503                   min_mp = mp;
16504                   min_address = mp->min_address + fix->fix_size;
16505                 }
16506             }
16507           /* Do not insert a non-8-byte aligned quantity before 8-byte
16508              aligned quantities.  */
16509           else if (ARM_DOUBLEWORD_ALIGN
16510                    && fix->fix_size < 8
16511                    && mp->fix_size >= 8)
16512             {
16513               min_mp = mp;
16514               min_address = mp->min_address + fix->fix_size;
16515             }
16516         }
16517     }
16518
16519   /* We need to create a new entry.  */
16520   mp = XNEW (Mnode);
16521   mp->fix_size = fix->fix_size;
16522   mp->mode = fix->mode;
16523   mp->value = fix->value;
16524   mp->refcount = 1;
16525   mp->max_address = minipool_barrier->address + 65536;
16526
16527   mp->min_address = min_address;
16528
16529   if (min_mp == NULL)
16530     {
16531       mp->prev = NULL;
16532       mp->next = minipool_vector_head;
16533
16534       if (mp->next == NULL)
16535         {
16536           minipool_vector_tail = mp;
16537           minipool_vector_label = gen_label_rtx ();
16538         }
16539       else
16540         mp->next->prev = mp;
16541
16542       minipool_vector_head = mp;
16543     }
16544   else
16545     {
16546       mp->next = min_mp->next;
16547       mp->prev = min_mp;
16548       min_mp->next = mp;
16549
16550       if (mp->next != NULL)
16551         mp->next->prev = mp;
16552       else
16553         minipool_vector_tail = mp;
16554     }
16555
16556   /* Save the new entry.  */
16557   min_mp = mp;
16558
16559   if (mp->prev)
16560     mp = mp->prev;
16561   else
16562     mp->offset = 0;
16563
16564   /* Scan over the following entries and adjust their offsets.  */
16565   while (mp->next != NULL)
16566     {
16567       if (mp->next->min_address < mp->min_address + mp->fix_size)
16568         mp->next->min_address = mp->min_address + mp->fix_size;
16569
16570       if (mp->refcount)
16571         mp->next->offset = mp->offset + mp->fix_size;
16572       else
16573         mp->next->offset = mp->offset;
16574
16575       mp = mp->next;
16576     }
16577
16578   return min_mp;
16579 }
16580
16581 static void
16582 assign_minipool_offsets (Mfix *barrier)
16583 {
16584   HOST_WIDE_INT offset = 0;
16585   Mnode *mp;
16586
16587   minipool_barrier = barrier;
16588
16589   for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16590     {
16591       mp->offset = offset;
16592
16593       if (mp->refcount > 0)
16594         offset += mp->fix_size;
16595     }
16596 }
16597
16598 /* Output the literal table */
16599 static void
16600 dump_minipool (rtx_insn *scan)
16601 {
16602   Mnode * mp;
16603   Mnode * nmp;
16604   int align64 = 0;
16605
16606   if (ARM_DOUBLEWORD_ALIGN)
16607     for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16608       if (mp->refcount > 0 && mp->fix_size >= 8)
16609         {
16610           align64 = 1;
16611           break;
16612         }
16613
16614   if (dump_file)
16615     fprintf (dump_file,
16616              ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16617              INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16618
16619   scan = emit_label_after (gen_label_rtx (), scan);
16620   scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16621   scan = emit_label_after (minipool_vector_label, scan);
16622
16623   for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16624     {
16625       if (mp->refcount > 0)
16626         {
16627           if (dump_file)
16628             {
16629               fprintf (dump_file,
16630                        ";;  Offset %u, min %ld, max %ld ",
16631                        (unsigned) mp->offset, (unsigned long) mp->min_address,
16632                        (unsigned long) mp->max_address);
16633               arm_print_value (dump_file, mp->value);
16634               fputc ('\n', dump_file);
16635             }
16636
16637           switch (GET_MODE_SIZE (mp->mode))
16638             {
16639 #ifdef HAVE_consttable_1
16640             case 1:
16641               scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16642               break;
16643
16644 #endif
16645 #ifdef HAVE_consttable_2
16646             case 2:
16647               scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16648               break;
16649
16650 #endif
16651 #ifdef HAVE_consttable_4
16652             case 4:
16653               scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16654               break;
16655
16656 #endif
16657 #ifdef HAVE_consttable_8
16658             case 8:
16659               scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16660               break;
16661
16662 #endif
16663 #ifdef HAVE_consttable_16
16664             case 16:
16665               scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16666               break;
16667
16668 #endif
16669             default:
16670               gcc_unreachable ();
16671             }
16672         }
16673
16674       nmp = mp->next;
16675       free (mp);
16676     }
16677
16678   minipool_vector_head = minipool_vector_tail = NULL;
16679   scan = emit_insn_after (gen_consttable_end (), scan);
16680   scan = emit_barrier_after (scan);
16681 }
16682
16683 /* Return the cost of forcibly inserting a barrier after INSN.  */
16684 static int
16685 arm_barrier_cost (rtx insn)
16686 {
16687   /* Basing the location of the pool on the loop depth is preferable,
16688      but at the moment, the basic block information seems to be
16689      corrupt by this stage of the compilation.  */
16690   int base_cost = 50;
16691   rtx next = next_nonnote_insn (insn);
16692
16693   if (next != NULL && LABEL_P (next))
16694     base_cost -= 20;
16695
16696   switch (GET_CODE (insn))
16697     {
16698     case CODE_LABEL:
16699       /* It will always be better to place the table before the label, rather
16700          than after it.  */
16701       return 50;
16702
16703     case INSN:
16704     case CALL_INSN:
16705       return base_cost;
16706
16707     case JUMP_INSN:
16708       return base_cost - 10;
16709
16710     default:
16711       return base_cost + 10;
16712     }
16713 }
16714
16715 /* Find the best place in the insn stream in the range
16716    (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16717    Create the barrier by inserting a jump and add a new fix entry for
16718    it.  */
16719 static Mfix *
16720 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16721 {
16722   HOST_WIDE_INT count = 0;
16723   rtx_barrier *barrier;
16724   rtx_insn *from = fix->insn;
16725   /* The instruction after which we will insert the jump.  */
16726   rtx_insn *selected = NULL;
16727   int selected_cost;
16728   /* The address at which the jump instruction will be placed.  */
16729   HOST_WIDE_INT selected_address;
16730   Mfix * new_fix;
16731   HOST_WIDE_INT max_count = max_address - fix->address;
16732   rtx_code_label *label = gen_label_rtx ();
16733
16734   selected_cost = arm_barrier_cost (from);
16735   selected_address = fix->address;
16736
16737   while (from && count < max_count)
16738     {
16739       rtx_jump_table_data *tmp;
16740       int new_cost;
16741
16742       /* This code shouldn't have been called if there was a natural barrier
16743          within range.  */
16744       gcc_assert (!BARRIER_P (from));
16745
16746       /* Count the length of this insn.  This must stay in sync with the
16747          code that pushes minipool fixes.  */
16748       if (LABEL_P (from))
16749         count += get_label_padding (from);
16750       else
16751         count += get_attr_length (from);
16752
16753       /* If there is a jump table, add its length.  */
16754       if (tablejump_p (from, NULL, &tmp))
16755         {
16756           count += get_jump_table_size (tmp);
16757
16758           /* Jump tables aren't in a basic block, so base the cost on
16759              the dispatch insn.  If we select this location, we will
16760              still put the pool after the table.  */
16761           new_cost = arm_barrier_cost (from);
16762
16763           if (count < max_count
16764               && (!selected || new_cost <= selected_cost))
16765             {
16766               selected = tmp;
16767               selected_cost = new_cost;
16768               selected_address = fix->address + count;
16769             }
16770
16771           /* Continue after the dispatch table.  */
16772           from = NEXT_INSN (tmp);
16773           continue;
16774         }
16775
16776       new_cost = arm_barrier_cost (from);
16777
16778       if (count < max_count
16779           && (!selected || new_cost <= selected_cost))
16780         {
16781           selected = from;
16782           selected_cost = new_cost;
16783           selected_address = fix->address + count;
16784         }
16785
16786       from = NEXT_INSN (from);
16787     }
16788
16789   /* Make sure that we found a place to insert the jump.  */
16790   gcc_assert (selected);
16791
16792   /* Make sure we do not split a call and its corresponding
16793      CALL_ARG_LOCATION note.  */
16794   if (CALL_P (selected))
16795     {
16796       rtx_insn *next = NEXT_INSN (selected);
16797       if (next && NOTE_P (next)
16798           && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16799           selected = next;
16800     }
16801
16802   /* Create a new JUMP_INSN that branches around a barrier.  */
16803   from = emit_jump_insn_after (gen_jump (label), selected);
16804   JUMP_LABEL (from) = label;
16805   barrier = emit_barrier_after (from);
16806   emit_label_after (label, barrier);
16807
16808   /* Create a minipool barrier entry for the new barrier.  */
16809   new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16810   new_fix->insn = barrier;
16811   new_fix->address = selected_address;
16812   new_fix->next = fix->next;
16813   fix->next = new_fix;
16814
16815   return new_fix;
16816 }
16817
16818 /* Record that there is a natural barrier in the insn stream at
16819    ADDRESS.  */
16820 static void
16821 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16822 {
16823   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16824
16825   fix->insn = insn;
16826   fix->address = address;
16827
16828   fix->next = NULL;
16829   if (minipool_fix_head != NULL)
16830     minipool_fix_tail->next = fix;
16831   else
16832     minipool_fix_head = fix;
16833
16834   minipool_fix_tail = fix;
16835 }
16836
16837 /* Record INSN, which will need fixing up to load a value from the
16838    minipool.  ADDRESS is the offset of the insn since the start of the
16839    function; LOC is a pointer to the part of the insn which requires
16840    fixing; VALUE is the constant that must be loaded, which is of type
16841    MODE.  */
16842 static void
16843 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16844                    machine_mode mode, rtx value)
16845 {
16846   Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16847
16848   fix->insn = insn;
16849   fix->address = address;
16850   fix->loc = loc;
16851   fix->mode = mode;
16852   fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16853   fix->value = value;
16854   fix->forwards = get_attr_pool_range (insn);
16855   fix->backwards = get_attr_neg_pool_range (insn);
16856   fix->minipool = NULL;
16857
16858   /* If an insn doesn't have a range defined for it, then it isn't
16859      expecting to be reworked by this code.  Better to stop now than
16860      to generate duff assembly code.  */
16861   gcc_assert (fix->forwards || fix->backwards);
16862
16863   /* If an entry requires 8-byte alignment then assume all constant pools
16864      require 4 bytes of padding.  Trying to do this later on a per-pool
16865      basis is awkward because existing pool entries have to be modified.  */
16866   if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16867     minipool_pad = 4;
16868
16869   if (dump_file)
16870     {
16871       fprintf (dump_file,
16872                ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16873                GET_MODE_NAME (mode),
16874                INSN_UID (insn), (unsigned long) address,
16875                -1 * (long)fix->backwards, (long)fix->forwards);
16876       arm_print_value (dump_file, fix->value);
16877       fprintf (dump_file, "\n");
16878     }
16879
16880   /* Add it to the chain of fixes.  */
16881   fix->next = NULL;
16882
16883   if (minipool_fix_head != NULL)
16884     minipool_fix_tail->next = fix;
16885   else
16886     minipool_fix_head = fix;
16887
16888   minipool_fix_tail = fix;
16889 }
16890
16891 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16892    Returns the number of insns needed, or 99 if we always want to synthesize
16893    the value.  */
16894 int
16895 arm_max_const_double_inline_cost ()
16896 {
16897   /* Let the value get synthesized to avoid the use of literal pools.  */
16898   if (arm_disable_literal_pool)
16899     return 99;
16900
16901   return ((optimize_size || arm_ld_sched) ? 3 : 4);
16902 }
16903
16904 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16905    Returns the number of insns needed, or 99 if we don't know how to
16906    do it.  */
16907 int
16908 arm_const_double_inline_cost (rtx val)
16909 {
16910   rtx lowpart, highpart;
16911   machine_mode mode;
16912
16913   mode = GET_MODE (val);
16914
16915   if (mode == VOIDmode)
16916     mode = DImode;
16917
16918   gcc_assert (GET_MODE_SIZE (mode) == 8);
16919
16920   lowpart = gen_lowpart (SImode, val);
16921   highpart = gen_highpart_mode (SImode, mode, val);
16922
16923   gcc_assert (CONST_INT_P (lowpart));
16924   gcc_assert (CONST_INT_P (highpart));
16925
16926   return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16927                             NULL_RTX, NULL_RTX, 0, 0)
16928           + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16929                               NULL_RTX, NULL_RTX, 0, 0));
16930 }
16931
16932 /* Cost of loading a SImode constant.  */
16933 static inline int
16934 arm_const_inline_cost (enum rtx_code code, rtx val)
16935 {
16936   return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16937                            NULL_RTX, NULL_RTX, 1, 0);
16938 }
16939
16940 /* Return true if it is worthwhile to split a 64-bit constant into two
16941    32-bit operations.  This is the case if optimizing for size, or
16942    if we have load delay slots, or if one 32-bit part can be done with
16943    a single data operation.  */
16944 bool
16945 arm_const_double_by_parts (rtx val)
16946 {
16947   machine_mode mode = GET_MODE (val);
16948   rtx part;
16949
16950   if (optimize_size || arm_ld_sched)
16951     return true;
16952
16953   if (mode == VOIDmode)
16954     mode = DImode;
16955
16956   part = gen_highpart_mode (SImode, mode, val);
16957
16958   gcc_assert (CONST_INT_P (part));
16959
16960   if (const_ok_for_arm (INTVAL (part))
16961       || const_ok_for_arm (~INTVAL (part)))
16962     return true;
16963
16964   part = gen_lowpart (SImode, val);
16965
16966   gcc_assert (CONST_INT_P (part));
16967
16968   if (const_ok_for_arm (INTVAL (part))
16969       || const_ok_for_arm (~INTVAL (part)))
16970     return true;
16971
16972   return false;
16973 }
16974
16975 /* Return true if it is possible to inline both the high and low parts
16976    of a 64-bit constant into 32-bit data processing instructions.  */
16977 bool
16978 arm_const_double_by_immediates (rtx val)
16979 {
16980   machine_mode mode = GET_MODE (val);
16981   rtx part;
16982
16983   if (mode == VOIDmode)
16984     mode = DImode;
16985
16986   part = gen_highpart_mode (SImode, mode, val);
16987
16988   gcc_assert (CONST_INT_P (part));
16989
16990   if (!const_ok_for_arm (INTVAL (part)))
16991     return false;
16992
16993   part = gen_lowpart (SImode, val);
16994
16995   gcc_assert (CONST_INT_P (part));
16996
16997   if (!const_ok_for_arm (INTVAL (part)))
16998     return false;
16999
17000   return true;
17001 }
17002
17003 /* Scan INSN and note any of its operands that need fixing.
17004    If DO_PUSHES is false we do not actually push any of the fixups
17005    needed.  */
17006 static void
17007 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
17008 {
17009   int opno;
17010
17011   extract_constrain_insn (insn);
17012
17013   if (recog_data.n_alternatives == 0)
17014     return;
17015
17016   /* Fill in recog_op_alt with information about the constraints of
17017      this insn.  */
17018   preprocess_constraints (insn);
17019
17020   const operand_alternative *op_alt = which_op_alt ();
17021   for (opno = 0; opno < recog_data.n_operands; opno++)
17022     {
17023       /* Things we need to fix can only occur in inputs.  */
17024       if (recog_data.operand_type[opno] != OP_IN)
17025         continue;
17026
17027       /* If this alternative is a memory reference, then any mention
17028          of constants in this alternative is really to fool reload
17029          into allowing us to accept one there.  We need to fix them up
17030          now so that we output the right code.  */
17031       if (op_alt[opno].memory_ok)
17032         {
17033           rtx op = recog_data.operand[opno];
17034
17035           if (CONSTANT_P (op))
17036             {
17037               if (do_pushes)
17038                 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17039                                    recog_data.operand_mode[opno], op);
17040             }
17041           else if (MEM_P (op)
17042                    && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17043                    && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17044             {
17045               if (do_pushes)
17046                 {
17047                   rtx cop = avoid_constant_pool_reference (op);
17048
17049                   /* Casting the address of something to a mode narrower
17050                      than a word can cause avoid_constant_pool_reference()
17051                      to return the pool reference itself.  That's no good to
17052                      us here.  Lets just hope that we can use the
17053                      constant pool value directly.  */
17054                   if (op == cop)
17055                     cop = get_pool_constant (XEXP (op, 0));
17056
17057                   push_minipool_fix (insn, address,
17058                                      recog_data.operand_loc[opno],
17059                                      recog_data.operand_mode[opno], cop);
17060                 }
17061
17062             }
17063         }
17064     }
17065
17066   return;
17067 }
17068
17069 /* Rewrite move insn into subtract of 0 if the condition codes will
17070    be useful in next conditional jump insn.  */
17071
17072 static void
17073 thumb1_reorg (void)
17074 {
17075   basic_block bb;
17076
17077   FOR_EACH_BB_FN (bb, cfun)
17078     {
17079       rtx dest, src;
17080       rtx pat, op0, set = NULL;
17081       rtx_insn *prev, *insn = BB_END (bb);
17082       bool insn_clobbered = false;
17083
17084       while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17085         insn = PREV_INSN (insn);
17086
17087       /* Find the last cbranchsi4_insn in basic block BB.  */
17088       if (insn == BB_HEAD (bb)
17089           || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17090         continue;
17091
17092       /* Get the register with which we are comparing.  */
17093       pat = PATTERN (insn);
17094       op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17095
17096       /* Find the first flag setting insn before INSN in basic block BB.  */
17097       gcc_assert (insn != BB_HEAD (bb));
17098       for (prev = PREV_INSN (insn);
17099            (!insn_clobbered
17100             && prev != BB_HEAD (bb)
17101             && (NOTE_P (prev)
17102                 || DEBUG_INSN_P (prev)
17103                 || ((set = single_set (prev)) != NULL
17104                     && get_attr_conds (prev) == CONDS_NOCOND)));
17105            prev = PREV_INSN (prev))
17106         {
17107           if (reg_set_p (op0, prev))
17108             insn_clobbered = true;
17109         }
17110
17111       /* Skip if op0 is clobbered by insn other than prev. */
17112       if (insn_clobbered)
17113         continue;
17114
17115       if (!set)
17116         continue;
17117
17118       dest = SET_DEST (set);
17119       src = SET_SRC (set);
17120       if (!low_register_operand (dest, SImode)
17121           || !low_register_operand (src, SImode))
17122         continue;
17123
17124       /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17125          in INSN.  Both src and dest of the move insn are checked.  */
17126       if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17127         {
17128           dest = copy_rtx (dest);
17129           src = copy_rtx (src);
17130           src = gen_rtx_MINUS (SImode, src, const0_rtx);
17131           PATTERN (prev) = gen_rtx_SET (VOIDmode, dest, src);
17132           INSN_CODE (prev) = -1;
17133           /* Set test register in INSN to dest.  */
17134           XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17135           INSN_CODE (insn) = -1;
17136         }
17137     }
17138 }
17139
17140 /* Convert instructions to their cc-clobbering variant if possible, since
17141    that allows us to use smaller encodings.  */
17142
17143 static void
17144 thumb2_reorg (void)
17145 {
17146   basic_block bb;
17147   regset_head live;
17148
17149   INIT_REG_SET (&live);
17150
17151   /* We are freeing block_for_insn in the toplev to keep compatibility
17152      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
17153   compute_bb_for_insn ();
17154   df_analyze ();
17155
17156   enum Convert_Action {SKIP, CONV, SWAP_CONV};
17157
17158   FOR_EACH_BB_FN (bb, cfun)
17159     {
17160       if (current_tune->disparage_flag_setting_t16_encodings
17161           && optimize_bb_for_speed_p (bb))
17162         continue;
17163
17164       rtx_insn *insn;
17165       Convert_Action action = SKIP;
17166       Convert_Action action_for_partial_flag_setting
17167         = (current_tune->disparage_partial_flag_setting_t16_encodings
17168            && optimize_bb_for_speed_p (bb))
17169           ? SKIP : CONV;
17170
17171       COPY_REG_SET (&live, DF_LR_OUT (bb));
17172       df_simulate_initialize_backwards (bb, &live);
17173       FOR_BB_INSNS_REVERSE (bb, insn)
17174         {
17175           if (NONJUMP_INSN_P (insn)
17176               && !REGNO_REG_SET_P (&live, CC_REGNUM)
17177               && GET_CODE (PATTERN (insn)) == SET)
17178             {
17179               action = SKIP;
17180               rtx pat = PATTERN (insn);
17181               rtx dst = XEXP (pat, 0);
17182               rtx src = XEXP (pat, 1);
17183               rtx op0 = NULL_RTX, op1 = NULL_RTX;
17184
17185               if (!OBJECT_P (src))
17186                   op0 = XEXP (src, 0);
17187
17188               if (BINARY_P (src))
17189                   op1 = XEXP (src, 1);
17190
17191               if (low_register_operand (dst, SImode))
17192                 {
17193                   switch (GET_CODE (src))
17194                     {
17195                     case PLUS:
17196                       /* Adding two registers and storing the result
17197                          in the first source is already a 16-bit
17198                          operation.  */
17199                       if (rtx_equal_p (dst, op0)
17200                           && register_operand (op1, SImode))
17201                         break;
17202
17203                       if (low_register_operand (op0, SImode))
17204                         {
17205                           /* ADDS <Rd>,<Rn>,<Rm>  */
17206                           if (low_register_operand (op1, SImode))
17207                             action = CONV;
17208                           /* ADDS <Rdn>,#<imm8>  */
17209                           /* SUBS <Rdn>,#<imm8>  */
17210                           else if (rtx_equal_p (dst, op0)
17211                                    && CONST_INT_P (op1)
17212                                    && IN_RANGE (INTVAL (op1), -255, 255))
17213                             action = CONV;
17214                           /* ADDS <Rd>,<Rn>,#<imm3>  */
17215                           /* SUBS <Rd>,<Rn>,#<imm3>  */
17216                           else if (CONST_INT_P (op1)
17217                                    && IN_RANGE (INTVAL (op1), -7, 7))
17218                             action = CONV;
17219                         }
17220                       /* ADCS <Rd>, <Rn>  */
17221                       else if (GET_CODE (XEXP (src, 0)) == PLUS
17222                               && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17223                               && low_register_operand (XEXP (XEXP (src, 0), 1),
17224                                                        SImode)
17225                               && COMPARISON_P (op1)
17226                               && cc_register (XEXP (op1, 0), VOIDmode)
17227                               && maybe_get_arm_condition_code (op1) == ARM_CS
17228                               && XEXP (op1, 1) == const0_rtx)
17229                         action = CONV;
17230                       break;
17231
17232                     case MINUS:
17233                       /* RSBS <Rd>,<Rn>,#0
17234                          Not handled here: see NEG below.  */
17235                       /* SUBS <Rd>,<Rn>,#<imm3>
17236                          SUBS <Rdn>,#<imm8>
17237                          Not handled here: see PLUS above.  */
17238                       /* SUBS <Rd>,<Rn>,<Rm>  */
17239                       if (low_register_operand (op0, SImode)
17240                           && low_register_operand (op1, SImode))
17241                             action = CONV;
17242                       break;
17243
17244                     case MULT:
17245                       /* MULS <Rdm>,<Rn>,<Rdm>
17246                          As an exception to the rule, this is only used
17247                          when optimizing for size since MULS is slow on all
17248                          known implementations.  We do not even want to use
17249                          MULS in cold code, if optimizing for speed, so we
17250                          test the global flag here.  */
17251                       if (!optimize_size)
17252                         break;
17253                       /* else fall through.  */
17254                     case AND:
17255                     case IOR:
17256                     case XOR:
17257                       /* ANDS <Rdn>,<Rm>  */
17258                       if (rtx_equal_p (dst, op0)
17259                           && low_register_operand (op1, SImode))
17260                         action = action_for_partial_flag_setting;
17261                       else if (rtx_equal_p (dst, op1)
17262                                && low_register_operand (op0, SImode))
17263                         action = action_for_partial_flag_setting == SKIP
17264                                  ? SKIP : SWAP_CONV;
17265                       break;
17266
17267                     case ASHIFTRT:
17268                     case ASHIFT:
17269                     case LSHIFTRT:
17270                       /* ASRS <Rdn>,<Rm> */
17271                       /* LSRS <Rdn>,<Rm> */
17272                       /* LSLS <Rdn>,<Rm> */
17273                       if (rtx_equal_p (dst, op0)
17274                           && low_register_operand (op1, SImode))
17275                         action = action_for_partial_flag_setting;
17276                       /* ASRS <Rd>,<Rm>,#<imm5> */
17277                       /* LSRS <Rd>,<Rm>,#<imm5> */
17278                       /* LSLS <Rd>,<Rm>,#<imm5> */
17279                       else if (low_register_operand (op0, SImode)
17280                                && CONST_INT_P (op1)
17281                                && IN_RANGE (INTVAL (op1), 0, 31))
17282                         action = action_for_partial_flag_setting;
17283                       break;
17284
17285                     case ROTATERT:
17286                       /* RORS <Rdn>,<Rm>  */
17287                       if (rtx_equal_p (dst, op0)
17288                           && low_register_operand (op1, SImode))
17289                         action = action_for_partial_flag_setting;
17290                       break;
17291
17292                     case NOT:
17293                       /* MVNS <Rd>,<Rm>  */
17294                       if (low_register_operand (op0, SImode))
17295                         action = action_for_partial_flag_setting;
17296                       break;
17297
17298                     case NEG:
17299                       /* NEGS <Rd>,<Rm>  (a.k.a RSBS)  */
17300                       if (low_register_operand (op0, SImode))
17301                         action = CONV;
17302                       break;
17303
17304                     case CONST_INT:
17305                       /* MOVS <Rd>,#<imm8>  */
17306                       if (CONST_INT_P (src)
17307                           && IN_RANGE (INTVAL (src), 0, 255))
17308                         action = action_for_partial_flag_setting;
17309                       break;
17310
17311                     case REG:
17312                       /* MOVS and MOV<c> with registers have different
17313                          encodings, so are not relevant here.  */
17314                       break;
17315
17316                     default:
17317                       break;
17318                     }
17319                 }
17320
17321               if (action != SKIP)
17322                 {
17323                   rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17324                   rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17325                   rtvec vec;
17326
17327                   if (action == SWAP_CONV)
17328                     {
17329                       src = copy_rtx (src);
17330                       XEXP (src, 0) = op1;
17331                       XEXP (src, 1) = op0;
17332                       pat = gen_rtx_SET (VOIDmode, dst, src);
17333                       vec = gen_rtvec (2, pat, clobber);
17334                     }
17335                   else /* action == CONV */
17336                     vec = gen_rtvec (2, pat, clobber);
17337
17338                   PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17339                   INSN_CODE (insn) = -1;
17340                 }
17341             }
17342
17343           if (NONDEBUG_INSN_P (insn))
17344             df_simulate_one_insn_backwards (bb, insn, &live);
17345         }
17346     }
17347
17348   CLEAR_REG_SET (&live);
17349 }
17350
17351 /* Gcc puts the pool in the wrong place for ARM, since we can only
17352    load addresses a limited distance around the pc.  We do some
17353    special munging to move the constant pool values to the correct
17354    point in the code.  */
17355 static void
17356 arm_reorg (void)
17357 {
17358   rtx_insn *insn;
17359   HOST_WIDE_INT address = 0;
17360   Mfix * fix;
17361
17362   if (TARGET_THUMB1)
17363     thumb1_reorg ();
17364   else if (TARGET_THUMB2)
17365     thumb2_reorg ();
17366
17367   /* Ensure all insns that must be split have been split at this point.
17368      Otherwise, the pool placement code below may compute incorrect
17369      insn lengths.  Note that when optimizing, all insns have already
17370      been split at this point.  */
17371   if (!optimize)
17372     split_all_insns_noflow ();
17373
17374   minipool_fix_head = minipool_fix_tail = NULL;
17375
17376   /* The first insn must always be a note, or the code below won't
17377      scan it properly.  */
17378   insn = get_insns ();
17379   gcc_assert (NOTE_P (insn));
17380   minipool_pad = 0;
17381
17382   /* Scan all the insns and record the operands that will need fixing.  */
17383   for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17384     {
17385       if (BARRIER_P (insn))
17386         push_minipool_barrier (insn, address);
17387       else if (INSN_P (insn))
17388         {
17389           rtx_jump_table_data *table;
17390
17391           note_invalid_constants (insn, address, true);
17392           address += get_attr_length (insn);
17393
17394           /* If the insn is a vector jump, add the size of the table
17395              and skip the table.  */
17396           if (tablejump_p (insn, NULL, &table))
17397             {
17398               address += get_jump_table_size (table);
17399               insn = table;
17400             }
17401         }
17402       else if (LABEL_P (insn))
17403         /* Add the worst-case padding due to alignment.  We don't add
17404            the _current_ padding because the minipool insertions
17405            themselves might change it.  */
17406         address += get_label_padding (insn);
17407     }
17408
17409   fix = minipool_fix_head;
17410
17411   /* Now scan the fixups and perform the required changes.  */
17412   while (fix)
17413     {
17414       Mfix * ftmp;
17415       Mfix * fdel;
17416       Mfix *  last_added_fix;
17417       Mfix * last_barrier = NULL;
17418       Mfix * this_fix;
17419
17420       /* Skip any further barriers before the next fix.  */
17421       while (fix && BARRIER_P (fix->insn))
17422         fix = fix->next;
17423
17424       /* No more fixes.  */
17425       if (fix == NULL)
17426         break;
17427
17428       last_added_fix = NULL;
17429
17430       for (ftmp = fix; ftmp; ftmp = ftmp->next)
17431         {
17432           if (BARRIER_P (ftmp->insn))
17433             {
17434               if (ftmp->address >= minipool_vector_head->max_address)
17435                 break;
17436
17437               last_barrier = ftmp;
17438             }
17439           else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17440             break;
17441
17442           last_added_fix = ftmp;  /* Keep track of the last fix added.  */
17443         }
17444
17445       /* If we found a barrier, drop back to that; any fixes that we
17446          could have reached but come after the barrier will now go in
17447          the next mini-pool.  */
17448       if (last_barrier != NULL)
17449         {
17450           /* Reduce the refcount for those fixes that won't go into this
17451              pool after all.  */
17452           for (fdel = last_barrier->next;
17453                fdel && fdel != ftmp;
17454                fdel = fdel->next)
17455             {
17456               fdel->minipool->refcount--;
17457               fdel->minipool = NULL;
17458             }
17459
17460           ftmp = last_barrier;
17461         }
17462       else
17463         {
17464           /* ftmp is first fix that we can't fit into this pool and
17465              there no natural barriers that we could use.  Insert a
17466              new barrier in the code somewhere between the previous
17467              fix and this one, and arrange to jump around it.  */
17468           HOST_WIDE_INT max_address;
17469
17470           /* The last item on the list of fixes must be a barrier, so
17471              we can never run off the end of the list of fixes without
17472              last_barrier being set.  */
17473           gcc_assert (ftmp);
17474
17475           max_address = minipool_vector_head->max_address;
17476           /* Check that there isn't another fix that is in range that
17477              we couldn't fit into this pool because the pool was
17478              already too large: we need to put the pool before such an
17479              instruction.  The pool itself may come just after the
17480              fix because create_fix_barrier also allows space for a
17481              jump instruction.  */
17482           if (ftmp->address < max_address)
17483             max_address = ftmp->address + 1;
17484
17485           last_barrier = create_fix_barrier (last_added_fix, max_address);
17486         }
17487
17488       assign_minipool_offsets (last_barrier);
17489
17490       while (ftmp)
17491         {
17492           if (!BARRIER_P (ftmp->insn)
17493               && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17494                   == NULL))
17495             break;
17496
17497           ftmp = ftmp->next;
17498         }
17499
17500       /* Scan over the fixes we have identified for this pool, fixing them
17501          up and adding the constants to the pool itself.  */
17502       for (this_fix = fix; this_fix && ftmp != this_fix;
17503            this_fix = this_fix->next)
17504         if (!BARRIER_P (this_fix->insn))
17505           {
17506             rtx addr
17507               = plus_constant (Pmode,
17508                                gen_rtx_LABEL_REF (VOIDmode,
17509                                                   minipool_vector_label),
17510                                this_fix->minipool->offset);
17511             *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17512           }
17513
17514       dump_minipool (last_barrier->insn);
17515       fix = ftmp;
17516     }
17517
17518   /* From now on we must synthesize any constants that we can't handle
17519      directly.  This can happen if the RTL gets split during final
17520      instruction generation.  */
17521   cfun->machine->after_arm_reorg = 1;
17522
17523   /* Free the minipool memory.  */
17524   obstack_free (&minipool_obstack, minipool_startobj);
17525 }
17526 \f
17527 /* Routines to output assembly language.  */
17528
17529 /* Return string representation of passed in real value.  */
17530 static const char *
17531 fp_const_from_val (REAL_VALUE_TYPE *r)
17532 {
17533   if (!fp_consts_inited)
17534     init_fp_table ();
17535
17536   gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17537   return "0";
17538 }
17539
17540 /* OPERANDS[0] is the entire list of insns that constitute pop,
17541    OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17542    is in the list, UPDATE is true iff the list contains explicit
17543    update of base register.  */
17544 void
17545 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17546                          bool update)
17547 {
17548   int i;
17549   char pattern[100];
17550   int offset;
17551   const char *conditional;
17552   int num_saves = XVECLEN (operands[0], 0);
17553   unsigned int regno;
17554   unsigned int regno_base = REGNO (operands[1]);
17555
17556   offset = 0;
17557   offset += update ? 1 : 0;
17558   offset += return_pc ? 1 : 0;
17559
17560   /* Is the base register in the list?  */
17561   for (i = offset; i < num_saves; i++)
17562     {
17563       regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17564       /* If SP is in the list, then the base register must be SP.  */
17565       gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17566       /* If base register is in the list, there must be no explicit update.  */
17567       if (regno == regno_base)
17568         gcc_assert (!update);
17569     }
17570
17571   conditional = reverse ? "%?%D0" : "%?%d0";
17572   if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17573     {
17574       /* Output pop (not stmfd) because it has a shorter encoding.  */
17575       gcc_assert (update);
17576       sprintf (pattern, "pop%s\t{", conditional);
17577     }
17578   else
17579     {
17580       /* Output ldmfd when the base register is SP, otherwise output ldmia.
17581          It's just a convention, their semantics are identical.  */
17582       if (regno_base == SP_REGNUM)
17583         sprintf (pattern, "ldm%sfd\t", conditional);
17584       else if (TARGET_UNIFIED_ASM)
17585         sprintf (pattern, "ldmia%s\t", conditional);
17586       else
17587         sprintf (pattern, "ldm%sia\t", conditional);
17588
17589       strcat (pattern, reg_names[regno_base]);
17590       if (update)
17591         strcat (pattern, "!, {");
17592       else
17593         strcat (pattern, ", {");
17594     }
17595
17596   /* Output the first destination register.  */
17597   strcat (pattern,
17598           reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17599
17600   /* Output the rest of the destination registers.  */
17601   for (i = offset + 1; i < num_saves; i++)
17602     {
17603       strcat (pattern, ", ");
17604       strcat (pattern,
17605               reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17606     }
17607
17608   strcat (pattern, "}");
17609
17610   if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17611     strcat (pattern, "^");
17612
17613   output_asm_insn (pattern, &cond);
17614 }
17615
17616
17617 /* Output the assembly for a store multiple.  */
17618
17619 const char *
17620 vfp_output_vstmd (rtx * operands)
17621 {
17622   char pattern[100];
17623   int p;
17624   int base;
17625   int i;
17626   rtx addr_reg = REG_P (XEXP (operands[0], 0))
17627                    ? XEXP (operands[0], 0)
17628                    : XEXP (XEXP (operands[0], 0), 0);
17629   bool push_p =  REGNO (addr_reg) == SP_REGNUM;
17630
17631   if (push_p)
17632     strcpy (pattern, "vpush%?.64\t{%P1");
17633   else
17634     strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17635
17636   p = strlen (pattern);
17637
17638   gcc_assert (REG_P (operands[1]));
17639
17640   base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17641   for (i = 1; i < XVECLEN (operands[2], 0); i++)
17642     {
17643       p += sprintf (&pattern[p], ", d%d", base + i);
17644     }
17645   strcpy (&pattern[p], "}");
17646
17647   output_asm_insn (pattern, operands);
17648   return "";
17649 }
17650
17651
17652 /* Emit RTL to save block of VFP register pairs to the stack.  Returns the
17653    number of bytes pushed.  */
17654
17655 static int
17656 vfp_emit_fstmd (int base_reg, int count)
17657 {
17658   rtx par;
17659   rtx dwarf;
17660   rtx tmp, reg;
17661   int i;
17662
17663   /* Workaround ARM10 VFPr1 bug.  Data corruption can occur when exactly two
17664      register pairs are stored by a store multiple insn.  We avoid this
17665      by pushing an extra pair.  */
17666   if (count == 2 && !arm_arch6)
17667     {
17668       if (base_reg == LAST_VFP_REGNUM - 3)
17669         base_reg -= 2;
17670       count++;
17671     }
17672
17673   /* FSTMD may not store more than 16 doubleword registers at once.  Split
17674      larger stores into multiple parts (up to a maximum of two, in
17675      practice).  */
17676   if (count > 16)
17677     {
17678       int saved;
17679       /* NOTE: base_reg is an internal register number, so each D register
17680          counts as 2.  */
17681       saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17682       saved += vfp_emit_fstmd (base_reg, 16);
17683       return saved;
17684     }
17685
17686   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17687   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17688
17689   reg = gen_rtx_REG (DFmode, base_reg);
17690   base_reg += 2;
17691
17692   XVECEXP (par, 0, 0)
17693     = gen_rtx_SET (VOIDmode,
17694                    gen_frame_mem
17695                    (BLKmode,
17696                     gen_rtx_PRE_MODIFY (Pmode,
17697                                         stack_pointer_rtx,
17698                                         plus_constant
17699                                         (Pmode, stack_pointer_rtx,
17700                                          - (count * 8)))
17701                     ),
17702                    gen_rtx_UNSPEC (BLKmode,
17703                                    gen_rtvec (1, reg),
17704                                    UNSPEC_PUSH_MULT));
17705
17706   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17707                      plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17708   RTX_FRAME_RELATED_P (tmp) = 1;
17709   XVECEXP (dwarf, 0, 0) = tmp;
17710
17711   tmp = gen_rtx_SET (VOIDmode,
17712                      gen_frame_mem (DFmode, stack_pointer_rtx),
17713                      reg);
17714   RTX_FRAME_RELATED_P (tmp) = 1;
17715   XVECEXP (dwarf, 0, 1) = tmp;
17716
17717   for (i = 1; i < count; i++)
17718     {
17719       reg = gen_rtx_REG (DFmode, base_reg);
17720       base_reg += 2;
17721       XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17722
17723       tmp = gen_rtx_SET (VOIDmode,
17724                          gen_frame_mem (DFmode,
17725                                         plus_constant (Pmode,
17726                                                        stack_pointer_rtx,
17727                                                        i * 8)),
17728                          reg);
17729       RTX_FRAME_RELATED_P (tmp) = 1;
17730       XVECEXP (dwarf, 0, i + 1) = tmp;
17731     }
17732
17733   par = emit_insn (par);
17734   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17735   RTX_FRAME_RELATED_P (par) = 1;
17736
17737   return count * 8;
17738 }
17739
17740 /* Emit a call instruction with pattern PAT.  ADDR is the address of
17741    the call target.  */
17742
17743 void
17744 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17745 {
17746   rtx insn;
17747
17748   insn = emit_call_insn (pat);
17749
17750   /* The PIC register is live on entry to VxWorks PIC PLT entries.
17751      If the call might use such an entry, add a use of the PIC register
17752      to the instruction's CALL_INSN_FUNCTION_USAGE.  */
17753   if (TARGET_VXWORKS_RTP
17754       && flag_pic
17755       && !sibcall
17756       && GET_CODE (addr) == SYMBOL_REF
17757       && (SYMBOL_REF_DECL (addr)
17758           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17759           : !SYMBOL_REF_LOCAL_P (addr)))
17760     {
17761       require_pic_register ();
17762       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17763     }
17764
17765   if (TARGET_AAPCS_BASED)
17766     {
17767       /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17768          linker.  We need to add an IP clobber to allow setting
17769          TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true.  A CC clobber
17770          is not needed since it's a fixed register.  */
17771       rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17772       clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17773     }
17774 }
17775
17776 /* Output a 'call' insn.  */
17777 const char *
17778 output_call (rtx *operands)
17779 {
17780   gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
17781
17782   /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
17783   if (REGNO (operands[0]) == LR_REGNUM)
17784     {
17785       operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17786       output_asm_insn ("mov%?\t%0, %|lr", operands);
17787     }
17788
17789   output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17790
17791   if (TARGET_INTERWORK || arm_arch4t)
17792     output_asm_insn ("bx%?\t%0", operands);
17793   else
17794     output_asm_insn ("mov%?\t%|pc, %0", operands);
17795
17796   return "";
17797 }
17798
17799 /* Output a 'call' insn that is a reference in memory. This is
17800    disabled for ARMv5 and we prefer a blx instead because otherwise
17801    there's a significant performance overhead.  */
17802 const char *
17803 output_call_mem (rtx *operands)
17804 {
17805   gcc_assert (!arm_arch5);
17806   if (TARGET_INTERWORK)
17807     {
17808       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17809       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17810       output_asm_insn ("bx%?\t%|ip", operands);
17811     }
17812   else if (regno_use_in (LR_REGNUM, operands[0]))
17813     {
17814       /* LR is used in the memory address.  We load the address in the
17815          first instruction.  It's safe to use IP as the target of the
17816          load since the call will kill it anyway.  */
17817       output_asm_insn ("ldr%?\t%|ip, %0", operands);
17818       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17819       if (arm_arch4t)
17820         output_asm_insn ("bx%?\t%|ip", operands);
17821       else
17822         output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17823     }
17824   else
17825     {
17826       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17827       output_asm_insn ("ldr%?\t%|pc, %0", operands);
17828     }
17829
17830   return "";
17831 }
17832
17833
17834 /* Output a move from arm registers to arm registers of a long double
17835    OPERANDS[0] is the destination.
17836    OPERANDS[1] is the source.  */
17837 const char *
17838 output_mov_long_double_arm_from_arm (rtx *operands)
17839 {
17840   /* We have to be careful here because the two might overlap.  */
17841   int dest_start = REGNO (operands[0]);
17842   int src_start = REGNO (operands[1]);
17843   rtx ops[2];
17844   int i;
17845
17846   if (dest_start < src_start)
17847     {
17848       for (i = 0; i < 3; i++)
17849         {
17850           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17851           ops[1] = gen_rtx_REG (SImode, src_start + i);
17852           output_asm_insn ("mov%?\t%0, %1", ops);
17853         }
17854     }
17855   else
17856     {
17857       for (i = 2; i >= 0; i--)
17858         {
17859           ops[0] = gen_rtx_REG (SImode, dest_start + i);
17860           ops[1] = gen_rtx_REG (SImode, src_start + i);
17861           output_asm_insn ("mov%?\t%0, %1", ops);
17862         }
17863     }
17864
17865   return "";
17866 }
17867
17868 void
17869 arm_emit_movpair (rtx dest, rtx src)
17870  {
17871   /* If the src is an immediate, simplify it.  */
17872   if (CONST_INT_P (src))
17873     {
17874       HOST_WIDE_INT val = INTVAL (src);
17875       emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17876       if ((val >> 16) & 0x0000ffff)
17877         emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17878                                              GEN_INT (16)),
17879                        GEN_INT ((val >> 16) & 0x0000ffff));
17880       return;
17881     }
17882    emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17883    emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17884  }
17885
17886 /* Output a move between double words.  It must be REG<-MEM
17887    or MEM<-REG.  */
17888 const char *
17889 output_move_double (rtx *operands, bool emit, int *count)
17890 {
17891   enum rtx_code code0 = GET_CODE (operands[0]);
17892   enum rtx_code code1 = GET_CODE (operands[1]);
17893   rtx otherops[3];
17894   if (count)
17895     *count = 1;
17896
17897   /* The only case when this might happen is when
17898      you are looking at the length of a DImode instruction
17899      that has an invalid constant in it.  */
17900   if (code0 == REG && code1 != MEM)
17901     {
17902       gcc_assert (!emit);
17903       *count = 2;
17904       return "";
17905     }
17906
17907   if (code0 == REG)
17908     {
17909       unsigned int reg0 = REGNO (operands[0]);
17910
17911       otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17912
17913       gcc_assert (code1 == MEM);  /* Constraints should ensure this.  */
17914
17915       switch (GET_CODE (XEXP (operands[1], 0)))
17916         {
17917         case REG:
17918
17919           if (emit)
17920             {
17921               if (TARGET_LDRD
17922                   && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17923                 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17924               else
17925                 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17926             }
17927           break;
17928
17929         case PRE_INC:
17930           gcc_assert (TARGET_LDRD);
17931           if (emit)
17932             output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17933           break;
17934
17935         case PRE_DEC:
17936           if (emit)
17937             {
17938               if (TARGET_LDRD)
17939                 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17940               else
17941                 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17942             }
17943           break;
17944
17945         case POST_INC:
17946           if (emit)
17947             {
17948               if (TARGET_LDRD)
17949                 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17950               else
17951                 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17952             }
17953           break;
17954
17955         case POST_DEC:
17956           gcc_assert (TARGET_LDRD);
17957           if (emit)
17958             output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17959           break;
17960
17961         case PRE_MODIFY:
17962         case POST_MODIFY:
17963           /* Autoicrement addressing modes should never have overlapping
17964              base and destination registers, and overlapping index registers
17965              are already prohibited, so this doesn't need to worry about
17966              fix_cm3_ldrd.  */
17967           otherops[0] = operands[0];
17968           otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17969           otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17970
17971           if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17972             {
17973               if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17974                 {
17975                   /* Registers overlap so split out the increment.  */
17976                   if (emit)
17977                     {
17978                       output_asm_insn ("add%?\t%1, %1, %2", otherops);
17979                       output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17980                     }
17981                   if (count)
17982                     *count = 2;
17983                 }
17984               else
17985                 {
17986                   /* Use a single insn if we can.
17987                      FIXME: IWMMXT allows offsets larger than ldrd can
17988                      handle, fix these up with a pair of ldr.  */
17989                   if (TARGET_THUMB2
17990                       || !CONST_INT_P (otherops[2])
17991                       || (INTVAL (otherops[2]) > -256
17992                           && INTVAL (otherops[2]) < 256))
17993                     {
17994                       if (emit)
17995                         output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17996                     }
17997                   else
17998                     {
17999                       if (emit)
18000                         {
18001                           output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18002                           output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18003                         }
18004                       if (count)
18005                         *count = 2;
18006
18007                     }
18008                 }
18009             }
18010           else
18011             {
18012               /* Use a single insn if we can.
18013                  FIXME: IWMMXT allows offsets larger than ldrd can handle,
18014                  fix these up with a pair of ldr.  */
18015               if (TARGET_THUMB2
18016                   || !CONST_INT_P (otherops[2])
18017                   || (INTVAL (otherops[2]) > -256
18018                       && INTVAL (otherops[2]) < 256))
18019                 {
18020                   if (emit)
18021                     output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
18022                 }
18023               else
18024                 {
18025                   if (emit)
18026                     {
18027                       output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18028                       output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18029                     }
18030                   if (count)
18031                     *count = 2;
18032                 }
18033             }
18034           break;
18035
18036         case LABEL_REF:
18037         case CONST:
18038           /* We might be able to use ldrd %0, %1 here.  However the range is
18039              different to ldr/adr, and it is broken on some ARMv7-M
18040              implementations.  */
18041           /* Use the second register of the pair to avoid problematic
18042              overlap.  */
18043           otherops[1] = operands[1];
18044           if (emit)
18045             output_asm_insn ("adr%?\t%0, %1", otherops);
18046           operands[1] = otherops[0];
18047           if (emit)
18048             {
18049               if (TARGET_LDRD)
18050                 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18051               else
18052                 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18053             }
18054
18055           if (count)
18056             *count = 2;
18057           break;
18058
18059           /* ??? This needs checking for thumb2.  */
18060         default:
18061           if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18062                                GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18063             {
18064               otherops[0] = operands[0];
18065               otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18066               otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18067
18068               if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18069                 {
18070                   if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18071                     {
18072                       switch ((int) INTVAL (otherops[2]))
18073                         {
18074                         case -8:
18075                           if (emit)
18076                             output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18077                           return "";
18078                         case -4:
18079                           if (TARGET_THUMB2)
18080                             break;
18081                           if (emit)
18082                             output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18083                           return "";
18084                         case 4:
18085                           if (TARGET_THUMB2)
18086                             break;
18087                           if (emit)
18088                             output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18089                           return "";
18090                         }
18091                     }
18092                   otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18093                   operands[1] = otherops[0];
18094                   if (TARGET_LDRD
18095                       && (REG_P (otherops[2])
18096                           || TARGET_THUMB2
18097                           || (CONST_INT_P (otherops[2])
18098                               && INTVAL (otherops[2]) > -256
18099                               && INTVAL (otherops[2]) < 256)))
18100                     {
18101                       if (reg_overlap_mentioned_p (operands[0],
18102                                                    otherops[2]))
18103                         {
18104                           /* Swap base and index registers over to
18105                              avoid a conflict.  */
18106                           std::swap (otherops[1], otherops[2]);
18107                         }
18108                       /* If both registers conflict, it will usually
18109                          have been fixed by a splitter.  */
18110                       if (reg_overlap_mentioned_p (operands[0], otherops[2])
18111                           || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18112                         {
18113                           if (emit)
18114                             {
18115                               output_asm_insn ("add%?\t%0, %1, %2", otherops);
18116                               output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18117                             }
18118                           if (count)
18119                             *count = 2;
18120                         }
18121                       else
18122                         {
18123                           otherops[0] = operands[0];
18124                           if (emit)
18125                             output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18126                         }
18127                       return "";
18128                     }
18129
18130                   if (CONST_INT_P (otherops[2]))
18131                     {
18132                       if (emit)
18133                         {
18134                           if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18135                             output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18136                           else
18137                             output_asm_insn ("add%?\t%0, %1, %2", otherops);
18138                         }
18139                     }
18140                   else
18141                     {
18142                       if (emit)
18143                         output_asm_insn ("add%?\t%0, %1, %2", otherops);
18144                     }
18145                 }
18146               else
18147                 {
18148                   if (emit)
18149                     output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18150                 }
18151
18152               if (count)
18153                 *count = 2;
18154
18155               if (TARGET_LDRD)
18156                 return "ldr%(d%)\t%0, [%1]";
18157
18158               return "ldm%(ia%)\t%1, %M0";
18159             }
18160           else
18161             {
18162               otherops[1] = adjust_address (operands[1], SImode, 4);
18163               /* Take care of overlapping base/data reg.  */
18164               if (reg_mentioned_p (operands[0], operands[1]))
18165                 {
18166                   if (emit)
18167                     {
18168                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18169                       output_asm_insn ("ldr%?\t%0, %1", operands);
18170                     }
18171                   if (count)
18172                     *count = 2;
18173
18174                 }
18175               else
18176                 {
18177                   if (emit)
18178                     {
18179                       output_asm_insn ("ldr%?\t%0, %1", operands);
18180                       output_asm_insn ("ldr%?\t%0, %1", otherops);
18181                     }
18182                   if (count)
18183                     *count = 2;
18184                 }
18185             }
18186         }
18187     }
18188   else
18189     {
18190       /* Constraints should ensure this.  */
18191       gcc_assert (code0 == MEM && code1 == REG);
18192       gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18193                   || (TARGET_ARM && TARGET_LDRD));
18194
18195       switch (GET_CODE (XEXP (operands[0], 0)))
18196         {
18197         case REG:
18198           if (emit)
18199             {
18200               if (TARGET_LDRD)
18201                 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18202               else
18203                 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18204             }
18205           break;
18206
18207         case PRE_INC:
18208           gcc_assert (TARGET_LDRD);
18209           if (emit)
18210             output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18211           break;
18212
18213         case PRE_DEC:
18214           if (emit)
18215             {
18216               if (TARGET_LDRD)
18217                 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18218               else
18219                 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18220             }
18221           break;
18222
18223         case POST_INC:
18224           if (emit)
18225             {
18226               if (TARGET_LDRD)
18227                 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18228               else
18229                 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18230             }
18231           break;
18232
18233         case POST_DEC:
18234           gcc_assert (TARGET_LDRD);
18235           if (emit)
18236             output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18237           break;
18238
18239         case PRE_MODIFY:
18240         case POST_MODIFY:
18241           otherops[0] = operands[1];
18242           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18243           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18244
18245           /* IWMMXT allows offsets larger than ldrd can handle,
18246              fix these up with a pair of ldr.  */
18247           if (!TARGET_THUMB2
18248               && CONST_INT_P (otherops[2])
18249               && (INTVAL(otherops[2]) <= -256
18250                   || INTVAL(otherops[2]) >= 256))
18251             {
18252               if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18253                 {
18254                   if (emit)
18255                     {
18256                       output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18257                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18258                     }
18259                   if (count)
18260                     *count = 2;
18261                 }
18262               else
18263                 {
18264                   if (emit)
18265                     {
18266                       output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18267                       output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18268                     }
18269                   if (count)
18270                     *count = 2;
18271                 }
18272             }
18273           else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18274             {
18275               if (emit)
18276                 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18277             }
18278           else
18279             {
18280               if (emit)
18281                 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18282             }
18283           break;
18284
18285         case PLUS:
18286           otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18287           if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18288             {
18289               switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18290                 {
18291                 case -8:
18292                   if (emit)
18293                     output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18294                   return "";
18295
18296                 case -4:
18297                   if (TARGET_THUMB2)
18298                     break;
18299                   if (emit)
18300                     output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18301                   return "";
18302
18303                 case 4:
18304                   if (TARGET_THUMB2)
18305                     break;
18306                   if (emit)
18307                     output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18308                   return "";
18309                 }
18310             }
18311           if (TARGET_LDRD
18312               && (REG_P (otherops[2])
18313                   || TARGET_THUMB2
18314                   || (CONST_INT_P (otherops[2])
18315                       && INTVAL (otherops[2]) > -256
18316                       && INTVAL (otherops[2]) < 256)))
18317             {
18318               otherops[0] = operands[1];
18319               otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18320               if (emit)
18321                 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18322               return "";
18323             }
18324           /* Fall through */
18325
18326         default:
18327           otherops[0] = adjust_address (operands[0], SImode, 4);
18328           otherops[1] = operands[1];
18329           if (emit)
18330             {
18331               output_asm_insn ("str%?\t%1, %0", operands);
18332               output_asm_insn ("str%?\t%H1, %0", otherops);
18333             }
18334           if (count)
18335             *count = 2;
18336         }
18337     }
18338
18339   return "";
18340 }
18341
18342 /* Output a move, load or store for quad-word vectors in ARM registers.  Only
18343    handles MEMs accepted by neon_vector_mem_operand with TYPE=1.  */
18344
18345 const char *
18346 output_move_quad (rtx *operands)
18347 {
18348   if (REG_P (operands[0]))
18349     {
18350       /* Load, or reg->reg move.  */
18351
18352       if (MEM_P (operands[1]))
18353         {
18354           switch (GET_CODE (XEXP (operands[1], 0)))
18355             {
18356             case REG:
18357               output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18358               break;
18359
18360             case LABEL_REF:
18361             case CONST:
18362               output_asm_insn ("adr%?\t%0, %1", operands);
18363               output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18364               break;
18365
18366             default:
18367               gcc_unreachable ();
18368             }
18369         }
18370       else
18371         {
18372           rtx ops[2];
18373           int dest, src, i;
18374
18375           gcc_assert (REG_P (operands[1]));
18376
18377           dest = REGNO (operands[0]);
18378           src = REGNO (operands[1]);
18379
18380           /* This seems pretty dumb, but hopefully GCC won't try to do it
18381              very often.  */
18382           if (dest < src)
18383             for (i = 0; i < 4; i++)
18384               {
18385                 ops[0] = gen_rtx_REG (SImode, dest + i);
18386                 ops[1] = gen_rtx_REG (SImode, src + i);
18387                 output_asm_insn ("mov%?\t%0, %1", ops);
18388               }
18389           else
18390             for (i = 3; i >= 0; i--)
18391               {
18392                 ops[0] = gen_rtx_REG (SImode, dest + i);
18393                 ops[1] = gen_rtx_REG (SImode, src + i);
18394                 output_asm_insn ("mov%?\t%0, %1", ops);
18395               }
18396         }
18397     }
18398   else
18399     {
18400       gcc_assert (MEM_P (operands[0]));
18401       gcc_assert (REG_P (operands[1]));
18402       gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18403
18404       switch (GET_CODE (XEXP (operands[0], 0)))
18405         {
18406         case REG:
18407           output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18408           break;
18409
18410         default:
18411           gcc_unreachable ();
18412         }
18413     }
18414
18415   return "";
18416 }
18417
18418 /* Output a VFP load or store instruction.  */
18419
18420 const char *
18421 output_move_vfp (rtx *operands)
18422 {
18423   rtx reg, mem, addr, ops[2];
18424   int load = REG_P (operands[0]);
18425   int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18426   int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18427   const char *templ;
18428   char buff[50];
18429   machine_mode mode;
18430
18431   reg = operands[!load];
18432   mem = operands[load];
18433
18434   mode = GET_MODE (reg);
18435
18436   gcc_assert (REG_P (reg));
18437   gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18438   gcc_assert (mode == SFmode
18439               || mode == DFmode
18440               || mode == SImode
18441               || mode == DImode
18442               || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18443   gcc_assert (MEM_P (mem));
18444
18445   addr = XEXP (mem, 0);
18446
18447   switch (GET_CODE (addr))
18448     {
18449     case PRE_DEC:
18450       templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18451       ops[0] = XEXP (addr, 0);
18452       ops[1] = reg;
18453       break;
18454
18455     case POST_INC:
18456       templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18457       ops[0] = XEXP (addr, 0);
18458       ops[1] = reg;
18459       break;
18460
18461     default:
18462       templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18463       ops[0] = reg;
18464       ops[1] = mem;
18465       break;
18466     }
18467
18468   sprintf (buff, templ,
18469            load ? "ld" : "st",
18470            dp ? "64" : "32",
18471            dp ? "P" : "",
18472            integer_p ? "\t%@ int" : "");
18473   output_asm_insn (buff, ops);
18474
18475   return "";
18476 }
18477
18478 /* Output a Neon double-word or quad-word load or store, or a load
18479    or store for larger structure modes.
18480
18481    WARNING: The ordering of elements is weird in big-endian mode,
18482    because the EABI requires that vectors stored in memory appear
18483    as though they were stored by a VSTM, as required by the EABI.
18484    GCC RTL defines element ordering based on in-memory order.
18485    This can be different from the architectural ordering of elements
18486    within a NEON register. The intrinsics defined in arm_neon.h use the
18487    NEON register element ordering, not the GCC RTL element ordering.
18488
18489    For example, the in-memory ordering of a big-endian a quadword
18490    vector with 16-bit elements when stored from register pair {d0,d1}
18491    will be (lowest address first, d0[N] is NEON register element N):
18492
18493      [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18494
18495    When necessary, quadword registers (dN, dN+1) are moved to ARM
18496    registers from rN in the order:
18497
18498      dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18499
18500    So that STM/LDM can be used on vectors in ARM registers, and the
18501    same memory layout will result as if VSTM/VLDM were used.
18502
18503    Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18504    possible, which allows use of appropriate alignment tags.
18505    Note that the choice of "64" is independent of the actual vector
18506    element size; this size simply ensures that the behavior is
18507    equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18508
18509    Due to limitations of those instructions, use of VST1.64/VLD1.64
18510    is not possible if:
18511     - the address contains PRE_DEC, or
18512     - the mode refers to more than 4 double-word registers
18513
18514    In those cases, it would be possible to replace VSTM/VLDM by a
18515    sequence of instructions; this is not currently implemented since
18516    this is not certain to actually improve performance.  */
18517
18518 const char *
18519 output_move_neon (rtx *operands)
18520 {
18521   rtx reg, mem, addr, ops[2];
18522   int regno, nregs, load = REG_P (operands[0]);
18523   const char *templ;
18524   char buff[50];
18525   machine_mode mode;
18526
18527   reg = operands[!load];
18528   mem = operands[load];
18529
18530   mode = GET_MODE (reg);
18531
18532   gcc_assert (REG_P (reg));
18533   regno = REGNO (reg);
18534   nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18535   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18536               || NEON_REGNO_OK_FOR_QUAD (regno));
18537   gcc_assert (VALID_NEON_DREG_MODE (mode)
18538               || VALID_NEON_QREG_MODE (mode)
18539               || VALID_NEON_STRUCT_MODE (mode));
18540   gcc_assert (MEM_P (mem));
18541
18542   addr = XEXP (mem, 0);
18543
18544   /* Strip off const from addresses like (const (plus (...))).  */
18545   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18546     addr = XEXP (addr, 0);
18547
18548   switch (GET_CODE (addr))
18549     {
18550     case POST_INC:
18551       /* We have to use vldm / vstm for too-large modes.  */
18552       if (nregs > 4)
18553         {
18554           templ = "v%smia%%?\t%%0!, %%h1";
18555           ops[0] = XEXP (addr, 0);
18556         }
18557       else
18558         {
18559           templ = "v%s1.64\t%%h1, %%A0";
18560           ops[0] = mem;
18561         }
18562       ops[1] = reg;
18563       break;
18564
18565     case PRE_DEC:
18566       /* We have to use vldm / vstm in this case, since there is no
18567          pre-decrement form of the vld1 / vst1 instructions.  */
18568       templ = "v%smdb%%?\t%%0!, %%h1";
18569       ops[0] = XEXP (addr, 0);
18570       ops[1] = reg;
18571       break;
18572
18573     case POST_MODIFY:
18574       /* FIXME: Not currently enabled in neon_vector_mem_operand.  */
18575       gcc_unreachable ();
18576
18577     case REG:
18578       /* We have to use vldm / vstm for too-large modes.  */
18579       if (nregs > 1)
18580         {
18581           if (nregs > 4)
18582             templ = "v%smia%%?\t%%m0, %%h1";
18583           else
18584             templ = "v%s1.64\t%%h1, %%A0";
18585
18586           ops[0] = mem;
18587           ops[1] = reg;
18588           break;
18589         }
18590       /* Fall through.  */
18591     case LABEL_REF:
18592     case PLUS:
18593       {
18594         int i;
18595         int overlap = -1;
18596         for (i = 0; i < nregs; i++)
18597           {
18598             /* We're only using DImode here because it's a convenient size.  */
18599             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18600             ops[1] = adjust_address (mem, DImode, 8 * i);
18601             if (reg_overlap_mentioned_p (ops[0], mem))
18602               {
18603                 gcc_assert (overlap == -1);
18604                 overlap = i;
18605               }
18606             else
18607               {
18608                 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18609                 output_asm_insn (buff, ops);
18610               }
18611           }
18612         if (overlap != -1)
18613           {
18614             ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18615             ops[1] = adjust_address (mem, SImode, 8 * overlap);
18616             sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18617             output_asm_insn (buff, ops);
18618           }
18619
18620         return "";
18621       }
18622
18623     default:
18624       gcc_unreachable ();
18625     }
18626
18627   sprintf (buff, templ, load ? "ld" : "st");
18628   output_asm_insn (buff, ops);
18629
18630   return "";
18631 }
18632
18633 /* Compute and return the length of neon_mov<mode>, where <mode> is
18634    one of VSTRUCT modes: EI, OI, CI or XI.  */
18635 int
18636 arm_attr_length_move_neon (rtx_insn *insn)
18637 {
18638   rtx reg, mem, addr;
18639   int load;
18640   machine_mode mode;
18641
18642   extract_insn_cached (insn);
18643
18644   if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18645     {
18646       mode = GET_MODE (recog_data.operand[0]);
18647       switch (mode)
18648         {
18649         case EImode:
18650         case OImode:
18651           return 8;
18652         case CImode:
18653           return 12;
18654         case XImode:
18655           return 16;
18656         default:
18657           gcc_unreachable ();
18658         }
18659     }
18660
18661   load = REG_P (recog_data.operand[0]);
18662   reg = recog_data.operand[!load];
18663   mem = recog_data.operand[load];
18664
18665   gcc_assert (MEM_P (mem));
18666
18667   mode = GET_MODE (reg);
18668   addr = XEXP (mem, 0);
18669
18670   /* Strip off const from addresses like (const (plus (...))).  */
18671   if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18672     addr = XEXP (addr, 0);
18673
18674   if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18675     {
18676       int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18677       return insns * 4;
18678     }
18679   else
18680     return 4;
18681 }
18682
18683 /* Return nonzero if the offset in the address is an immediate.  Otherwise,
18684    return zero.  */
18685
18686 int
18687 arm_address_offset_is_imm (rtx_insn *insn)
18688 {
18689   rtx mem, addr;
18690
18691   extract_insn_cached (insn);
18692
18693   if (REG_P (recog_data.operand[0]))
18694     return 0;
18695
18696   mem = recog_data.operand[0];
18697
18698   gcc_assert (MEM_P (mem));
18699
18700   addr = XEXP (mem, 0);
18701
18702   if (REG_P (addr)
18703       || (GET_CODE (addr) == PLUS
18704           && REG_P (XEXP (addr, 0))
18705           && CONST_INT_P (XEXP (addr, 1))))
18706     return 1;
18707   else
18708     return 0;
18709 }
18710
18711 /* Output an ADD r, s, #n where n may be too big for one instruction.
18712    If adding zero to one register, output nothing.  */
18713 const char *
18714 output_add_immediate (rtx *operands)
18715 {
18716   HOST_WIDE_INT n = INTVAL (operands[2]);
18717
18718   if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18719     {
18720       if (n < 0)
18721         output_multi_immediate (operands,
18722                                 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18723                                 -n);
18724       else
18725         output_multi_immediate (operands,
18726                                 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18727                                 n);
18728     }
18729
18730   return "";
18731 }
18732
18733 /* Output a multiple immediate operation.
18734    OPERANDS is the vector of operands referred to in the output patterns.
18735    INSTR1 is the output pattern to use for the first constant.
18736    INSTR2 is the output pattern to use for subsequent constants.
18737    IMMED_OP is the index of the constant slot in OPERANDS.
18738    N is the constant value.  */
18739 static const char *
18740 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18741                         int immed_op, HOST_WIDE_INT n)
18742 {
18743 #if HOST_BITS_PER_WIDE_INT > 32
18744   n &= 0xffffffff;
18745 #endif
18746
18747   if (n == 0)
18748     {
18749       /* Quick and easy output.  */
18750       operands[immed_op] = const0_rtx;
18751       output_asm_insn (instr1, operands);
18752     }
18753   else
18754     {
18755       int i;
18756       const char * instr = instr1;
18757
18758       /* Note that n is never zero here (which would give no output).  */
18759       for (i = 0; i < 32; i += 2)
18760         {
18761           if (n & (3 << i))
18762             {
18763               operands[immed_op] = GEN_INT (n & (255 << i));
18764               output_asm_insn (instr, operands);
18765               instr = instr2;
18766               i += 6;
18767             }
18768         }
18769     }
18770
18771   return "";
18772 }
18773
18774 /* Return the name of a shifter operation.  */
18775 static const char *
18776 arm_shift_nmem(enum rtx_code code)
18777 {
18778   switch (code)
18779     {
18780     case ASHIFT:
18781       return ARM_LSL_NAME;
18782
18783     case ASHIFTRT:
18784       return "asr";
18785
18786     case LSHIFTRT:
18787       return "lsr";
18788
18789     case ROTATERT:
18790       return "ror";
18791
18792     default:
18793       abort();
18794     }
18795 }
18796
18797 /* Return the appropriate ARM instruction for the operation code.
18798    The returned result should not be overwritten.  OP is the rtx of the
18799    operation.  SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18800    was shifted.  */
18801 const char *
18802 arithmetic_instr (rtx op, int shift_first_arg)
18803 {
18804   switch (GET_CODE (op))
18805     {
18806     case PLUS:
18807       return "add";
18808
18809     case MINUS:
18810       return shift_first_arg ? "rsb" : "sub";
18811
18812     case IOR:
18813       return "orr";
18814
18815     case XOR:
18816       return "eor";
18817
18818     case AND:
18819       return "and";
18820
18821     case ASHIFT:
18822     case ASHIFTRT:
18823     case LSHIFTRT:
18824     case ROTATERT:
18825       return arm_shift_nmem(GET_CODE(op));
18826
18827     default:
18828       gcc_unreachable ();
18829     }
18830 }
18831
18832 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18833    for the operation code.  The returned result should not be overwritten.
18834    OP is the rtx code of the shift.
18835    On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18836    shift.  */
18837 static const char *
18838 shift_op (rtx op, HOST_WIDE_INT *amountp)
18839 {
18840   const char * mnem;
18841   enum rtx_code code = GET_CODE (op);
18842
18843   switch (code)
18844     {
18845     case ROTATE:
18846       if (!CONST_INT_P (XEXP (op, 1)))
18847         {
18848           output_operand_lossage ("invalid shift operand");
18849           return NULL;
18850         }
18851
18852       code = ROTATERT;
18853       *amountp = 32 - INTVAL (XEXP (op, 1));
18854       mnem = "ror";
18855       break;
18856
18857     case ASHIFT:
18858     case ASHIFTRT:
18859     case LSHIFTRT:
18860     case ROTATERT:
18861       mnem = arm_shift_nmem(code);
18862       if (CONST_INT_P (XEXP (op, 1)))
18863         {
18864           *amountp = INTVAL (XEXP (op, 1));
18865         }
18866       else if (REG_P (XEXP (op, 1)))
18867         {
18868           *amountp = -1;
18869           return mnem;
18870         }
18871       else
18872         {
18873           output_operand_lossage ("invalid shift operand");
18874           return NULL;
18875         }
18876       break;
18877
18878     case MULT:
18879       /* We never have to worry about the amount being other than a
18880          power of 2, since this case can never be reloaded from a reg.  */
18881       if (!CONST_INT_P (XEXP (op, 1)))
18882         {
18883           output_operand_lossage ("invalid shift operand");
18884           return NULL;
18885         }
18886
18887       *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18888
18889       /* Amount must be a power of two.  */
18890       if (*amountp & (*amountp - 1))
18891         {
18892           output_operand_lossage ("invalid shift operand");
18893           return NULL;
18894         }
18895
18896       *amountp = int_log2 (*amountp);
18897       return ARM_LSL_NAME;
18898
18899     default:
18900       output_operand_lossage ("invalid shift operand");
18901       return NULL;
18902     }
18903
18904   /* This is not 100% correct, but follows from the desire to merge
18905      multiplication by a power of 2 with the recognizer for a
18906      shift.  >=32 is not a valid shift for "lsl", so we must try and
18907      output a shift that produces the correct arithmetical result.
18908      Using lsr #32 is identical except for the fact that the carry bit
18909      is not set correctly if we set the flags; but we never use the
18910      carry bit from such an operation, so we can ignore that.  */
18911   if (code == ROTATERT)
18912     /* Rotate is just modulo 32.  */
18913     *amountp &= 31;
18914   else if (*amountp != (*amountp & 31))
18915     {
18916       if (code == ASHIFT)
18917         mnem = "lsr";
18918       *amountp = 32;
18919     }
18920
18921   /* Shifts of 0 are no-ops.  */
18922   if (*amountp == 0)
18923     return NULL;
18924
18925   return mnem;
18926 }
18927
18928 /* Obtain the shift from the POWER of two.  */
18929
18930 static HOST_WIDE_INT
18931 int_log2 (HOST_WIDE_INT power)
18932 {
18933   HOST_WIDE_INT shift = 0;
18934
18935   while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18936     {
18937       gcc_assert (shift <= 31);
18938       shift++;
18939     }
18940
18941   return shift;
18942 }
18943
18944 /* Output a .ascii pseudo-op, keeping track of lengths.  This is
18945    because /bin/as is horribly restrictive.  The judgement about
18946    whether or not each character is 'printable' (and can be output as
18947    is) or not (and must be printed with an octal escape) must be made
18948    with reference to the *host* character set -- the situation is
18949    similar to that discussed in the comments above pp_c_char in
18950    c-pretty-print.c.  */
18951
18952 #define MAX_ASCII_LEN 51
18953
18954 void
18955 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18956 {
18957   int i;
18958   int len_so_far = 0;
18959
18960   fputs ("\t.ascii\t\"", stream);
18961
18962   for (i = 0; i < len; i++)
18963     {
18964       int c = p[i];
18965
18966       if (len_so_far >= MAX_ASCII_LEN)
18967         {
18968           fputs ("\"\n\t.ascii\t\"", stream);
18969           len_so_far = 0;
18970         }
18971
18972       if (ISPRINT (c))
18973         {
18974           if (c == '\\' || c == '\"')
18975             {
18976               putc ('\\', stream);
18977               len_so_far++;
18978             }
18979           putc (c, stream);
18980           len_so_far++;
18981         }
18982       else
18983         {
18984           fprintf (stream, "\\%03o", c);
18985           len_so_far += 4;
18986         }
18987     }
18988
18989   fputs ("\"\n", stream);
18990 }
18991 \f
18992 /* Compute the register save mask for registers 0 through 12
18993    inclusive.  This code is used by arm_compute_save_reg_mask.  */
18994
18995 static unsigned long
18996 arm_compute_save_reg0_reg12_mask (void)
18997 {
18998   unsigned long func_type = arm_current_func_type ();
18999   unsigned long save_reg_mask = 0;
19000   unsigned int reg;
19001
19002   if (IS_INTERRUPT (func_type))
19003     {
19004       unsigned int max_reg;
19005       /* Interrupt functions must not corrupt any registers,
19006          even call clobbered ones.  If this is a leaf function
19007          we can just examine the registers used by the RTL, but
19008          otherwise we have to assume that whatever function is
19009          called might clobber anything, and so we have to save
19010          all the call-clobbered registers as well.  */
19011       if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19012         /* FIQ handlers have registers r8 - r12 banked, so
19013            we only need to check r0 - r7, Normal ISRs only
19014            bank r14 and r15, so we must check up to r12.
19015            r13 is the stack pointer which is always preserved,
19016            so we do not need to consider it here.  */
19017         max_reg = 7;
19018       else
19019         max_reg = 12;
19020
19021       for (reg = 0; reg <= max_reg; reg++)
19022         if (df_regs_ever_live_p (reg)
19023             || (! crtl->is_leaf && call_used_regs[reg]))
19024           save_reg_mask |= (1 << reg);
19025
19026       /* Also save the pic base register if necessary.  */
19027       if (flag_pic
19028           && !TARGET_SINGLE_PIC_BASE
19029           && arm_pic_register != INVALID_REGNUM
19030           && crtl->uses_pic_offset_table)
19031         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19032     }
19033   else if (IS_VOLATILE(func_type))
19034     {
19035       /* For noreturn functions we historically omitted register saves
19036          altogether.  However this really messes up debugging.  As a
19037          compromise save just the frame pointers.  Combined with the link
19038          register saved elsewhere this should be sufficient to get
19039          a backtrace.  */
19040       if (frame_pointer_needed)
19041         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19042       if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19043         save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19044       if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19045         save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19046     }
19047   else
19048     {
19049       /* In the normal case we only need to save those registers
19050          which are call saved and which are used by this function.  */
19051       for (reg = 0; reg <= 11; reg++)
19052         if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
19053           save_reg_mask |= (1 << reg);
19054
19055       /* Handle the frame pointer as a special case.  */
19056       if (frame_pointer_needed)
19057         save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19058
19059       /* If we aren't loading the PIC register,
19060          don't stack it even though it may be live.  */
19061       if (flag_pic
19062           && !TARGET_SINGLE_PIC_BASE
19063           && arm_pic_register != INVALID_REGNUM
19064           && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19065               || crtl->uses_pic_offset_table))
19066         save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19067
19068       /* The prologue will copy SP into R0, so save it.  */
19069       if (IS_STACKALIGN (func_type))
19070         save_reg_mask |= 1;
19071     }
19072
19073   /* Save registers so the exception handler can modify them.  */
19074   if (crtl->calls_eh_return)
19075     {
19076       unsigned int i;
19077
19078       for (i = 0; ; i++)
19079         {
19080           reg = EH_RETURN_DATA_REGNO (i);
19081           if (reg == INVALID_REGNUM)
19082             break;
19083           save_reg_mask |= 1 << reg;
19084         }
19085     }
19086
19087   return save_reg_mask;
19088 }
19089
19090 /* Return true if r3 is live at the start of the function.  */
19091
19092 static bool
19093 arm_r3_live_at_start_p (void)
19094 {
19095   /* Just look at cfg info, which is still close enough to correct at this
19096      point.  This gives false positives for broken functions that might use
19097      uninitialized data that happens to be allocated in r3, but who cares?  */
19098   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19099 }
19100
19101 /* Compute the number of bytes used to store the static chain register on the
19102    stack, above the stack frame.  We need to know this accurately to get the
19103    alignment of the rest of the stack frame correct.  */
19104
19105 static int
19106 arm_compute_static_chain_stack_bytes (void)
19107 {
19108   /* See the defining assertion in arm_expand_prologue.  */
19109   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19110       && IS_NESTED (arm_current_func_type ())
19111       && arm_r3_live_at_start_p ()
19112       && crtl->args.pretend_args_size == 0)
19113     return 4;
19114
19115   return 0;
19116 }
19117
19118 /* Compute a bit mask of which registers need to be
19119    saved on the stack for the current function.
19120    This is used by arm_get_frame_offsets, which may add extra registers.  */
19121
19122 static unsigned long
19123 arm_compute_save_reg_mask (void)
19124 {
19125   unsigned int save_reg_mask = 0;
19126   unsigned long func_type = arm_current_func_type ();
19127   unsigned int reg;
19128
19129   if (IS_NAKED (func_type))
19130     /* This should never really happen.  */
19131     return 0;
19132
19133   /* If we are creating a stack frame, then we must save the frame pointer,
19134      IP (which will hold the old stack pointer), LR and the PC.  */
19135   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19136     save_reg_mask |=
19137       (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19138       | (1 << IP_REGNUM)
19139       | (1 << LR_REGNUM)
19140       | (1 << PC_REGNUM);
19141
19142   save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19143
19144   /* Decide if we need to save the link register.
19145      Interrupt routines have their own banked link register,
19146      so they never need to save it.
19147      Otherwise if we do not use the link register we do not need to save
19148      it.  If we are pushing other registers onto the stack however, we
19149      can save an instruction in the epilogue by pushing the link register
19150      now and then popping it back into the PC.  This incurs extra memory
19151      accesses though, so we only do it when optimizing for size, and only
19152      if we know that we will not need a fancy return sequence.  */
19153   if (df_regs_ever_live_p (LR_REGNUM)
19154       || (save_reg_mask
19155           && optimize_size
19156           && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19157           && !crtl->calls_eh_return))
19158     save_reg_mask |= 1 << LR_REGNUM;
19159
19160   if (cfun->machine->lr_save_eliminated)
19161     save_reg_mask &= ~ (1 << LR_REGNUM);
19162
19163   if (TARGET_REALLY_IWMMXT
19164       && ((bit_count (save_reg_mask)
19165            + ARM_NUM_INTS (crtl->args.pretend_args_size +
19166                            arm_compute_static_chain_stack_bytes())
19167            ) % 2) != 0)
19168     {
19169       /* The total number of registers that are going to be pushed
19170          onto the stack is odd.  We need to ensure that the stack
19171          is 64-bit aligned before we start to save iWMMXt registers,
19172          and also before we start to create locals.  (A local variable
19173          might be a double or long long which we will load/store using
19174          an iWMMXt instruction).  Therefore we need to push another
19175          ARM register, so that the stack will be 64-bit aligned.  We
19176          try to avoid using the arg registers (r0 -r3) as they might be
19177          used to pass values in a tail call.  */
19178       for (reg = 4; reg <= 12; reg++)
19179         if ((save_reg_mask & (1 << reg)) == 0)
19180           break;
19181
19182       if (reg <= 12)
19183         save_reg_mask |= (1 << reg);
19184       else
19185         {
19186           cfun->machine->sibcall_blocked = 1;
19187           save_reg_mask |= (1 << 3);
19188         }
19189     }
19190
19191   /* We may need to push an additional register for use initializing the
19192      PIC base register.  */
19193   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19194       && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19195     {
19196       reg = thumb_find_work_register (1 << 4);
19197       if (!call_used_regs[reg])
19198         save_reg_mask |= (1 << reg);
19199     }
19200
19201   return save_reg_mask;
19202 }
19203
19204
19205 /* Compute a bit mask of which registers need to be
19206    saved on the stack for the current function.  */
19207 static unsigned long
19208 thumb1_compute_save_reg_mask (void)
19209 {
19210   unsigned long mask;
19211   unsigned reg;
19212
19213   mask = 0;
19214   for (reg = 0; reg < 12; reg ++)
19215     if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
19216       mask |= 1 << reg;
19217
19218   if (flag_pic
19219       && !TARGET_SINGLE_PIC_BASE
19220       && arm_pic_register != INVALID_REGNUM
19221       && crtl->uses_pic_offset_table)
19222     mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19223
19224   /* See if we might need r11 for calls to _interwork_r11_call_via_rN().  */
19225   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19226     mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19227
19228   /* LR will also be pushed if any lo regs are pushed.  */
19229   if (mask & 0xff || thumb_force_lr_save ())
19230     mask |= (1 << LR_REGNUM);
19231
19232   /* Make sure we have a low work register if we need one.
19233      We will need one if we are going to push a high register,
19234      but we are not currently intending to push a low register.  */
19235   if ((mask & 0xff) == 0
19236       && ((mask & 0x0f00) || TARGET_BACKTRACE))
19237     {
19238       /* Use thumb_find_work_register to choose which register
19239          we will use.  If the register is live then we will
19240          have to push it.  Use LAST_LO_REGNUM as our fallback
19241          choice for the register to select.  */
19242       reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19243       /* Make sure the register returned by thumb_find_work_register is
19244          not part of the return value.  */
19245       if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19246         reg = LAST_LO_REGNUM;
19247
19248       if (! call_used_regs[reg])
19249         mask |= 1 << reg;
19250     }
19251
19252   /* The 504 below is 8 bytes less than 512 because there are two possible
19253      alignment words.  We can't tell here if they will be present or not so we
19254      have to play it safe and assume that they are. */
19255   if ((CALLER_INTERWORKING_SLOT_SIZE +
19256        ROUND_UP_WORD (get_frame_size ()) +
19257        crtl->outgoing_args_size) >= 504)
19258     {
19259       /* This is the same as the code in thumb1_expand_prologue() which
19260          determines which register to use for stack decrement. */
19261       for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19262         if (mask & (1 << reg))
19263           break;
19264
19265       if (reg > LAST_LO_REGNUM)
19266         {
19267           /* Make sure we have a register available for stack decrement. */
19268           mask |= 1 << LAST_LO_REGNUM;
19269         }
19270     }
19271
19272   return mask;
19273 }
19274
19275
19276 /* Return the number of bytes required to save VFP registers.  */
19277 static int
19278 arm_get_vfp_saved_size (void)
19279 {
19280   unsigned int regno;
19281   int count;
19282   int saved;
19283
19284   saved = 0;
19285   /* Space for saved VFP registers.  */
19286   if (TARGET_HARD_FLOAT && TARGET_VFP)
19287     {
19288       count = 0;
19289       for (regno = FIRST_VFP_REGNUM;
19290            regno < LAST_VFP_REGNUM;
19291            regno += 2)
19292         {
19293           if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19294               && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19295             {
19296               if (count > 0)
19297                 {
19298                   /* Workaround ARM10 VFPr1 bug.  */
19299                   if (count == 2 && !arm_arch6)
19300                     count++;
19301                   saved += count * 8;
19302                 }
19303               count = 0;
19304             }
19305           else
19306             count++;
19307         }
19308       if (count > 0)
19309         {
19310           if (count == 2 && !arm_arch6)
19311             count++;
19312           saved += count * 8;
19313         }
19314     }
19315   return saved;
19316 }
19317
19318
19319 /* Generate a function exit sequence.  If REALLY_RETURN is false, then do
19320    everything bar the final return instruction.  If simple_return is true,
19321    then do not output epilogue, because it has already been emitted in RTL.  */
19322 const char *
19323 output_return_instruction (rtx operand, bool really_return, bool reverse,
19324                            bool simple_return)
19325 {
19326   char conditional[10];
19327   char instr[100];
19328   unsigned reg;
19329   unsigned long live_regs_mask;
19330   unsigned long func_type;
19331   arm_stack_offsets *offsets;
19332
19333   func_type = arm_current_func_type ();
19334
19335   if (IS_NAKED (func_type))
19336     return "";
19337
19338   if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19339     {
19340       /* If this function was declared non-returning, and we have
19341          found a tail call, then we have to trust that the called
19342          function won't return.  */
19343       if (really_return)
19344         {
19345           rtx ops[2];
19346
19347           /* Otherwise, trap an attempted return by aborting.  */
19348           ops[0] = operand;
19349           ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19350                                        : "abort");
19351           assemble_external_libcall (ops[1]);
19352           output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19353         }
19354
19355       return "";
19356     }
19357
19358   gcc_assert (!cfun->calls_alloca || really_return);
19359
19360   sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19361
19362   cfun->machine->return_used_this_function = 1;
19363
19364   offsets = arm_get_frame_offsets ();
19365   live_regs_mask = offsets->saved_regs_mask;
19366
19367   if (!simple_return && live_regs_mask)
19368     {
19369       const char * return_reg;
19370
19371       /* If we do not have any special requirements for function exit
19372          (e.g. interworking) then we can load the return address
19373          directly into the PC.  Otherwise we must load it into LR.  */
19374       if (really_return
19375           && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19376         return_reg = reg_names[PC_REGNUM];
19377       else
19378         return_reg = reg_names[LR_REGNUM];
19379
19380       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19381         {
19382           /* There are three possible reasons for the IP register
19383              being saved.  1) a stack frame was created, in which case
19384              IP contains the old stack pointer, or 2) an ISR routine
19385              corrupted it, or 3) it was saved to align the stack on
19386              iWMMXt.  In case 1, restore IP into SP, otherwise just
19387              restore IP.  */
19388           if (frame_pointer_needed)
19389             {
19390               live_regs_mask &= ~ (1 << IP_REGNUM);
19391               live_regs_mask |=   (1 << SP_REGNUM);
19392             }
19393           else
19394             gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19395         }
19396
19397       /* On some ARM architectures it is faster to use LDR rather than
19398          LDM to load a single register.  On other architectures, the
19399          cost is the same.  In 26 bit mode, or for exception handlers,
19400          we have to use LDM to load the PC so that the CPSR is also
19401          restored.  */
19402       for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19403         if (live_regs_mask == (1U << reg))
19404           break;
19405
19406       if (reg <= LAST_ARM_REGNUM
19407           && (reg != LR_REGNUM
19408               || ! really_return
19409               || ! IS_INTERRUPT (func_type)))
19410         {
19411           sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19412                    (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19413         }
19414       else
19415         {
19416           char *p;
19417           int first = 1;
19418
19419           /* Generate the load multiple instruction to restore the
19420              registers.  Note we can get here, even if
19421              frame_pointer_needed is true, but only if sp already
19422              points to the base of the saved core registers.  */
19423           if (live_regs_mask & (1 << SP_REGNUM))
19424             {
19425               unsigned HOST_WIDE_INT stack_adjust;
19426
19427               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19428               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19429
19430               if (stack_adjust && arm_arch5 && TARGET_ARM)
19431                 if (TARGET_UNIFIED_ASM)
19432                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19433                 else
19434                   sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19435               else
19436                 {
19437                   /* If we can't use ldmib (SA110 bug),
19438                      then try to pop r3 instead.  */
19439                   if (stack_adjust)
19440                     live_regs_mask |= 1 << 3;
19441
19442                   if (TARGET_UNIFIED_ASM)
19443                     sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19444                   else
19445                     sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19446                 }
19447             }
19448           else
19449             if (TARGET_UNIFIED_ASM)
19450               sprintf (instr, "pop%s\t{", conditional);
19451             else
19452               sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19453
19454           p = instr + strlen (instr);
19455
19456           for (reg = 0; reg <= SP_REGNUM; reg++)
19457             if (live_regs_mask & (1 << reg))
19458               {
19459                 int l = strlen (reg_names[reg]);
19460
19461                 if (first)
19462                   first = 0;
19463                 else
19464                   {
19465                     memcpy (p, ", ", 2);
19466                     p += 2;
19467                   }
19468
19469                 memcpy (p, "%|", 2);
19470                 memcpy (p + 2, reg_names[reg], l);
19471                 p += l + 2;
19472               }
19473
19474           if (live_regs_mask & (1 << LR_REGNUM))
19475             {
19476               sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19477               /* If returning from an interrupt, restore the CPSR.  */
19478               if (IS_INTERRUPT (func_type))
19479                 strcat (p, "^");
19480             }
19481           else
19482             strcpy (p, "}");
19483         }
19484
19485       output_asm_insn (instr, & operand);
19486
19487       /* See if we need to generate an extra instruction to
19488          perform the actual function return.  */
19489       if (really_return
19490           && func_type != ARM_FT_INTERWORKED
19491           && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19492         {
19493           /* The return has already been handled
19494              by loading the LR into the PC.  */
19495           return "";
19496         }
19497     }
19498
19499   if (really_return)
19500     {
19501       switch ((int) ARM_FUNC_TYPE (func_type))
19502         {
19503         case ARM_FT_ISR:
19504         case ARM_FT_FIQ:
19505           /* ??? This is wrong for unified assembly syntax.  */
19506           sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19507           break;
19508
19509         case ARM_FT_INTERWORKED:
19510           sprintf (instr, "bx%s\t%%|lr", conditional);
19511           break;
19512
19513         case ARM_FT_EXCEPTION:
19514           /* ??? This is wrong for unified assembly syntax.  */
19515           sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19516           break;
19517
19518         default:
19519           /* Use bx if it's available.  */
19520           if (arm_arch5 || arm_arch4t)
19521             sprintf (instr, "bx%s\t%%|lr", conditional);
19522           else
19523             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19524           break;
19525         }
19526
19527       output_asm_insn (instr, & operand);
19528     }
19529
19530   return "";
19531 }
19532
19533 /* Write the function name into the code section, directly preceding
19534    the function prologue.
19535
19536    Code will be output similar to this:
19537      t0
19538          .ascii "arm_poke_function_name", 0
19539          .align
19540      t1
19541          .word 0xff000000 + (t1 - t0)
19542      arm_poke_function_name
19543          mov     ip, sp
19544          stmfd   sp!, {fp, ip, lr, pc}
19545          sub     fp, ip, #4
19546
19547    When performing a stack backtrace, code can inspect the value
19548    of 'pc' stored at 'fp' + 0.  If the trace function then looks
19549    at location pc - 12 and the top 8 bits are set, then we know
19550    that there is a function name embedded immediately preceding this
19551    location and has length ((pc[-3]) & 0xff000000).
19552
19553    We assume that pc is declared as a pointer to an unsigned long.
19554
19555    It is of no benefit to output the function name if we are assembling
19556    a leaf function.  These function types will not contain a stack
19557    backtrace structure, therefore it is not possible to determine the
19558    function name.  */
19559 void
19560 arm_poke_function_name (FILE *stream, const char *name)
19561 {
19562   unsigned long alignlength;
19563   unsigned long length;
19564   rtx           x;
19565
19566   length      = strlen (name) + 1;
19567   alignlength = ROUND_UP_WORD (length);
19568
19569   ASM_OUTPUT_ASCII (stream, name, length);
19570   ASM_OUTPUT_ALIGN (stream, 2);
19571   x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19572   assemble_aligned_integer (UNITS_PER_WORD, x);
19573 }
19574
19575 /* Place some comments into the assembler stream
19576    describing the current function.  */
19577 static void
19578 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19579 {
19580   unsigned long func_type;
19581
19582   /* ??? Do we want to print some of the below anyway?  */
19583   if (TARGET_THUMB1)
19584     return;
19585
19586   /* Sanity check.  */
19587   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19588
19589   func_type = arm_current_func_type ();
19590
19591   switch ((int) ARM_FUNC_TYPE (func_type))
19592     {
19593     default:
19594     case ARM_FT_NORMAL:
19595       break;
19596     case ARM_FT_INTERWORKED:
19597       asm_fprintf (f, "\t%@ Function supports interworking.\n");
19598       break;
19599     case ARM_FT_ISR:
19600       asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19601       break;
19602     case ARM_FT_FIQ:
19603       asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19604       break;
19605     case ARM_FT_EXCEPTION:
19606       asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19607       break;
19608     }
19609
19610   if (IS_NAKED (func_type))
19611     asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19612
19613   if (IS_VOLATILE (func_type))
19614     asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19615
19616   if (IS_NESTED (func_type))
19617     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19618   if (IS_STACKALIGN (func_type))
19619     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19620
19621   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19622                crtl->args.size,
19623                crtl->args.pretend_args_size, frame_size);
19624
19625   asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19626                frame_pointer_needed,
19627                cfun->machine->uses_anonymous_args);
19628
19629   if (cfun->machine->lr_save_eliminated)
19630     asm_fprintf (f, "\t%@ link register save eliminated.\n");
19631
19632   if (crtl->calls_eh_return)
19633     asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19634
19635 }
19636
19637 static void
19638 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19639                               HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19640 {
19641   arm_stack_offsets *offsets;
19642
19643   if (TARGET_THUMB1)
19644     {
19645       int regno;
19646
19647       /* Emit any call-via-reg trampolines that are needed for v4t support
19648          of call_reg and call_value_reg type insns.  */
19649       for (regno = 0; regno < LR_REGNUM; regno++)
19650         {
19651           rtx label = cfun->machine->call_via[regno];
19652
19653           if (label != NULL)
19654             {
19655               switch_to_section (function_section (current_function_decl));
19656               targetm.asm_out.internal_label (asm_out_file, "L",
19657                                               CODE_LABEL_NUMBER (label));
19658               asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19659             }
19660         }
19661
19662       /* ??? Probably not safe to set this here, since it assumes that a
19663          function will be emitted as assembly immediately after we generate
19664          RTL for it.  This does not happen for inline functions.  */
19665       cfun->machine->return_used_this_function = 0;
19666     }
19667   else /* TARGET_32BIT */
19668     {
19669       /* We need to take into account any stack-frame rounding.  */
19670       offsets = arm_get_frame_offsets ();
19671
19672       gcc_assert (!use_return_insn (FALSE, NULL)
19673                   || (cfun->machine->return_used_this_function != 0)
19674                   || offsets->saved_regs == offsets->outgoing_args
19675                   || frame_pointer_needed);
19676     }
19677 }
19678
19679 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19680    STR and STRD.  If an even number of registers are being pushed, one
19681    or more STRD patterns are created for each register pair.  If an
19682    odd number of registers are pushed, emit an initial STR followed by
19683    as many STRD instructions as are needed.  This works best when the
19684    stack is initially 64-bit aligned (the normal case), since it
19685    ensures that each STRD is also 64-bit aligned.  */
19686 static void
19687 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19688 {
19689   int num_regs = 0;
19690   int i;
19691   int regno;
19692   rtx par = NULL_RTX;
19693   rtx dwarf = NULL_RTX;
19694   rtx tmp;
19695   bool first = true;
19696
19697   num_regs = bit_count (saved_regs_mask);
19698
19699   /* Must be at least one register to save, and can't save SP or PC.  */
19700   gcc_assert (num_regs > 0 && num_regs <= 14);
19701   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19702   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19703
19704   /* Create sequence for DWARF info.  All the frame-related data for
19705      debugging is held in this wrapper.  */
19706   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19707
19708   /* Describe the stack adjustment.  */
19709   tmp = gen_rtx_SET (VOIDmode,
19710                       stack_pointer_rtx,
19711                       plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19712   RTX_FRAME_RELATED_P (tmp) = 1;
19713   XVECEXP (dwarf, 0, 0) = tmp;
19714
19715   /* Find the first register.  */
19716   for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19717     ;
19718
19719   i = 0;
19720
19721   /* If there's an odd number of registers to push.  Start off by
19722      pushing a single register.  This ensures that subsequent strd
19723      operations are dword aligned (assuming that SP was originally
19724      64-bit aligned).  */
19725   if ((num_regs & 1) != 0)
19726     {
19727       rtx reg, mem, insn;
19728
19729       reg = gen_rtx_REG (SImode, regno);
19730       if (num_regs == 1)
19731         mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19732                                                      stack_pointer_rtx));
19733       else
19734         mem = gen_frame_mem (Pmode,
19735                              gen_rtx_PRE_MODIFY
19736                              (Pmode, stack_pointer_rtx,
19737                               plus_constant (Pmode, stack_pointer_rtx,
19738                                              -4 * num_regs)));
19739
19740       tmp = gen_rtx_SET (VOIDmode, mem, reg);
19741       RTX_FRAME_RELATED_P (tmp) = 1;
19742       insn = emit_insn (tmp);
19743       RTX_FRAME_RELATED_P (insn) = 1;
19744       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19745       tmp = gen_rtx_SET (VOIDmode, gen_frame_mem (Pmode, stack_pointer_rtx),
19746                          reg);
19747       RTX_FRAME_RELATED_P (tmp) = 1;
19748       i++;
19749       regno++;
19750       XVECEXP (dwarf, 0, i) = tmp;
19751       first = false;
19752     }
19753
19754   while (i < num_regs)
19755     if (saved_regs_mask & (1 << regno))
19756       {
19757         rtx reg1, reg2, mem1, mem2;
19758         rtx tmp0, tmp1, tmp2;
19759         int regno2;
19760
19761         /* Find the register to pair with this one.  */
19762         for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19763              regno2++)
19764           ;
19765
19766         reg1 = gen_rtx_REG (SImode, regno);
19767         reg2 = gen_rtx_REG (SImode, regno2);
19768
19769         if (first)
19770           {
19771             rtx insn;
19772
19773             first = false;
19774             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19775                                                         stack_pointer_rtx,
19776                                                         -4 * num_regs));
19777             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19778                                                         stack_pointer_rtx,
19779                                                         -4 * (num_regs - 1)));
19780             tmp0 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19781                                 plus_constant (Pmode, stack_pointer_rtx,
19782                                                -4 * (num_regs)));
19783             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19784             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19785             RTX_FRAME_RELATED_P (tmp0) = 1;
19786             RTX_FRAME_RELATED_P (tmp1) = 1;
19787             RTX_FRAME_RELATED_P (tmp2) = 1;
19788             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19789             XVECEXP (par, 0, 0) = tmp0;
19790             XVECEXP (par, 0, 1) = tmp1;
19791             XVECEXP (par, 0, 2) = tmp2;
19792             insn = emit_insn (par);
19793             RTX_FRAME_RELATED_P (insn) = 1;
19794             add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19795           }
19796         else
19797           {
19798             mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19799                                                         stack_pointer_rtx,
19800                                                         4 * i));
19801             mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19802                                                         stack_pointer_rtx,
19803                                                         4 * (i + 1)));
19804             tmp1 = gen_rtx_SET (VOIDmode, mem1, reg1);
19805             tmp2 = gen_rtx_SET (VOIDmode, mem2, reg2);
19806             RTX_FRAME_RELATED_P (tmp1) = 1;
19807             RTX_FRAME_RELATED_P (tmp2) = 1;
19808             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19809             XVECEXP (par, 0, 0) = tmp1;
19810             XVECEXP (par, 0, 1) = tmp2;
19811             emit_insn (par);
19812           }
19813
19814         /* Create unwind information.  This is an approximation.  */
19815         tmp1 = gen_rtx_SET (VOIDmode,
19816                             gen_frame_mem (Pmode,
19817                                            plus_constant (Pmode,
19818                                                           stack_pointer_rtx,
19819                                                           4 * i)),
19820                             reg1);
19821         tmp2 = gen_rtx_SET (VOIDmode,
19822                             gen_frame_mem (Pmode,
19823                                            plus_constant (Pmode,
19824                                                           stack_pointer_rtx,
19825                                                           4 * (i + 1))),
19826                             reg2);
19827
19828         RTX_FRAME_RELATED_P (tmp1) = 1;
19829         RTX_FRAME_RELATED_P (tmp2) = 1;
19830         XVECEXP (dwarf, 0, i + 1) = tmp1;
19831         XVECEXP (dwarf, 0, i + 2) = tmp2;
19832         i += 2;
19833         regno = regno2 + 1;
19834       }
19835     else
19836       regno++;
19837
19838   return;
19839 }
19840
19841 /* STRD in ARM mode requires consecutive registers.  This function emits STRD
19842    whenever possible, otherwise it emits single-word stores.  The first store
19843    also allocates stack space for all saved registers, using writeback with
19844    post-addressing mode.  All other stores use offset addressing.  If no STRD
19845    can be emitted, this function emits a sequence of single-word stores,
19846    and not an STM as before, because single-word stores provide more freedom
19847    scheduling and can be turned into an STM by peephole optimizations.  */
19848 static void
19849 arm_emit_strd_push (unsigned long saved_regs_mask)
19850 {
19851   int num_regs = 0;
19852   int i, j, dwarf_index  = 0;
19853   int offset = 0;
19854   rtx dwarf = NULL_RTX;
19855   rtx insn = NULL_RTX;
19856   rtx tmp, mem;
19857
19858   /* TODO: A more efficient code can be emitted by changing the
19859      layout, e.g., first push all pairs that can use STRD to keep the
19860      stack aligned, and then push all other registers.  */
19861   for (i = 0; i <= LAST_ARM_REGNUM; i++)
19862     if (saved_regs_mask & (1 << i))
19863       num_regs++;
19864
19865   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19866   gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19867   gcc_assert (num_regs > 0);
19868
19869   /* Create sequence for DWARF info.  */
19870   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19871
19872   /* For dwarf info, we generate explicit stack update.  */
19873   tmp = gen_rtx_SET (VOIDmode,
19874                      stack_pointer_rtx,
19875                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19876   RTX_FRAME_RELATED_P (tmp) = 1;
19877   XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19878
19879   /* Save registers.  */
19880   offset = - 4 * num_regs;
19881   j = 0;
19882   while (j <= LAST_ARM_REGNUM)
19883     if (saved_regs_mask & (1 << j))
19884       {
19885         if ((j % 2 == 0)
19886             && (saved_regs_mask & (1 << (j + 1))))
19887           {
19888             /* Current register and previous register form register pair for
19889                which STRD can be generated.  */
19890             if (offset < 0)
19891               {
19892                 /* Allocate stack space for all saved registers.  */
19893                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19894                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19895                 mem = gen_frame_mem (DImode, tmp);
19896                 offset = 0;
19897               }
19898             else if (offset > 0)
19899               mem = gen_frame_mem (DImode,
19900                                    plus_constant (Pmode,
19901                                                   stack_pointer_rtx,
19902                                                   offset));
19903             else
19904               mem = gen_frame_mem (DImode, stack_pointer_rtx);
19905
19906             tmp = gen_rtx_SET (DImode, mem, gen_rtx_REG (DImode, j));
19907             RTX_FRAME_RELATED_P (tmp) = 1;
19908             tmp = emit_insn (tmp);
19909
19910             /* Record the first store insn.  */
19911             if (dwarf_index == 1)
19912               insn = tmp;
19913
19914             /* Generate dwarf info.  */
19915             mem = gen_frame_mem (SImode,
19916                                  plus_constant (Pmode,
19917                                                 stack_pointer_rtx,
19918                                                 offset));
19919             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19920             RTX_FRAME_RELATED_P (tmp) = 1;
19921             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19922
19923             mem = gen_frame_mem (SImode,
19924                                  plus_constant (Pmode,
19925                                                 stack_pointer_rtx,
19926                                                 offset + 4));
19927             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j + 1));
19928             RTX_FRAME_RELATED_P (tmp) = 1;
19929             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19930
19931             offset += 8;
19932             j += 2;
19933           }
19934         else
19935           {
19936             /* Emit a single word store.  */
19937             if (offset < 0)
19938               {
19939                 /* Allocate stack space for all saved registers.  */
19940                 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19941                 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19942                 mem = gen_frame_mem (SImode, tmp);
19943                 offset = 0;
19944               }
19945             else if (offset > 0)
19946               mem = gen_frame_mem (SImode,
19947                                    plus_constant (Pmode,
19948                                                   stack_pointer_rtx,
19949                                                   offset));
19950             else
19951               mem = gen_frame_mem (SImode, stack_pointer_rtx);
19952
19953             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19954             RTX_FRAME_RELATED_P (tmp) = 1;
19955             tmp = emit_insn (tmp);
19956
19957             /* Record the first store insn.  */
19958             if (dwarf_index == 1)
19959               insn = tmp;
19960
19961             /* Generate dwarf info.  */
19962             mem = gen_frame_mem (SImode,
19963                                  plus_constant(Pmode,
19964                                                stack_pointer_rtx,
19965                                                offset));
19966             tmp = gen_rtx_SET (SImode, mem, gen_rtx_REG (SImode, j));
19967             RTX_FRAME_RELATED_P (tmp) = 1;
19968             XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19969
19970             offset += 4;
19971             j += 1;
19972           }
19973       }
19974     else
19975       j++;
19976
19977   /* Attach dwarf info to the first insn we generate.  */
19978   gcc_assert (insn != NULL_RTX);
19979   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19980   RTX_FRAME_RELATED_P (insn) = 1;
19981 }
19982
19983 /* Generate and emit an insn that we will recognize as a push_multi.
19984    Unfortunately, since this insn does not reflect very well the actual
19985    semantics of the operation, we need to annotate the insn for the benefit
19986    of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
19987    MASK for registers that should be annotated for DWARF2 frame unwind
19988    information.  */
19989 static rtx
19990 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19991 {
19992   int num_regs = 0;
19993   int num_dwarf_regs = 0;
19994   int i, j;
19995   rtx par;
19996   rtx dwarf;
19997   int dwarf_par_index;
19998   rtx tmp, reg;
19999
20000   /* We don't record the PC in the dwarf frame information.  */
20001   dwarf_regs_mask &= ~(1 << PC_REGNUM);
20002
20003   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20004     {
20005       if (mask & (1 << i))
20006         num_regs++;
20007       if (dwarf_regs_mask & (1 << i))
20008         num_dwarf_regs++;
20009     }
20010
20011   gcc_assert (num_regs && num_regs <= 16);
20012   gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20013
20014   /* For the body of the insn we are going to generate an UNSPEC in
20015      parallel with several USEs.  This allows the insn to be recognized
20016      by the push_multi pattern in the arm.md file.
20017
20018      The body of the insn looks something like this:
20019
20020        (parallel [
20021            (set (mem:BLK (pre_modify:SI (reg:SI sp)
20022                                         (const_int:SI <num>)))
20023                 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20024            (use (reg:SI XX))
20025            (use (reg:SI YY))
20026            ...
20027         ])
20028
20029      For the frame note however, we try to be more explicit and actually
20030      show each register being stored into the stack frame, plus a (single)
20031      decrement of the stack pointer.  We do it this way in order to be
20032      friendly to the stack unwinding code, which only wants to see a single
20033      stack decrement per instruction.  The RTL we generate for the note looks
20034      something like this:
20035
20036       (sequence [
20037            (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20038            (set (mem:SI (reg:SI sp)) (reg:SI r4))
20039            (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20040            (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20041            ...
20042         ])
20043
20044      FIXME:: In an ideal world the PRE_MODIFY would not exist and
20045      instead we'd have a parallel expression detailing all
20046      the stores to the various memory addresses so that debug
20047      information is more up-to-date. Remember however while writing
20048      this to take care of the constraints with the push instruction.
20049
20050      Note also that this has to be taken care of for the VFP registers.
20051
20052      For more see PR43399.  */
20053
20054   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20055   dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20056   dwarf_par_index = 1;
20057
20058   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20059     {
20060       if (mask & (1 << i))
20061         {
20062           reg = gen_rtx_REG (SImode, i);
20063
20064           XVECEXP (par, 0, 0)
20065             = gen_rtx_SET (VOIDmode,
20066                            gen_frame_mem
20067                            (BLKmode,
20068                             gen_rtx_PRE_MODIFY (Pmode,
20069                                                 stack_pointer_rtx,
20070                                                 plus_constant
20071                                                 (Pmode, stack_pointer_rtx,
20072                                                  -4 * num_regs))
20073                             ),
20074                            gen_rtx_UNSPEC (BLKmode,
20075                                            gen_rtvec (1, reg),
20076                                            UNSPEC_PUSH_MULT));
20077
20078           if (dwarf_regs_mask & (1 << i))
20079             {
20080               tmp = gen_rtx_SET (VOIDmode,
20081                                  gen_frame_mem (SImode, stack_pointer_rtx),
20082                                  reg);
20083               RTX_FRAME_RELATED_P (tmp) = 1;
20084               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20085             }
20086
20087           break;
20088         }
20089     }
20090
20091   for (j = 1, i++; j < num_regs; i++)
20092     {
20093       if (mask & (1 << i))
20094         {
20095           reg = gen_rtx_REG (SImode, i);
20096
20097           XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20098
20099           if (dwarf_regs_mask & (1 << i))
20100             {
20101               tmp
20102                 = gen_rtx_SET (VOIDmode,
20103                                gen_frame_mem
20104                                (SImode,
20105                                 plus_constant (Pmode, stack_pointer_rtx,
20106                                                4 * j)),
20107                                reg);
20108               RTX_FRAME_RELATED_P (tmp) = 1;
20109               XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20110             }
20111
20112           j++;
20113         }
20114     }
20115
20116   par = emit_insn (par);
20117
20118   tmp = gen_rtx_SET (VOIDmode,
20119                      stack_pointer_rtx,
20120                      plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20121   RTX_FRAME_RELATED_P (tmp) = 1;
20122   XVECEXP (dwarf, 0, 0) = tmp;
20123
20124   add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20125
20126   return par;
20127 }
20128
20129 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20130    SIZE is the offset to be adjusted.
20131    DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx.  */
20132 static void
20133 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20134 {
20135   rtx dwarf;
20136
20137   RTX_FRAME_RELATED_P (insn) = 1;
20138   dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
20139   add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20140 }
20141
20142 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20143    SAVED_REGS_MASK shows which registers need to be restored.
20144
20145    Unfortunately, since this insn does not reflect very well the actual
20146    semantics of the operation, we need to annotate the insn for the benefit
20147    of DWARF2 frame unwind information.  */
20148 static void
20149 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20150 {
20151   int num_regs = 0;
20152   int i, j;
20153   rtx par;
20154   rtx dwarf = NULL_RTX;
20155   rtx tmp, reg;
20156   bool return_in_pc;
20157   int offset_adj;
20158   int emit_update;
20159
20160   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20161   offset_adj = return_in_pc ? 1 : 0;
20162   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20163     if (saved_regs_mask & (1 << i))
20164       num_regs++;
20165
20166   gcc_assert (num_regs && num_regs <= 16);
20167
20168   /* If SP is in reglist, then we don't emit SP update insn.  */
20169   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20170
20171   /* The parallel needs to hold num_regs SETs
20172      and one SET for the stack update.  */
20173   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20174
20175   if (return_in_pc)
20176     {
20177       tmp = ret_rtx;
20178       XVECEXP (par, 0, 0) = tmp;
20179     }
20180
20181   if (emit_update)
20182     {
20183       /* Increment the stack pointer, based on there being
20184          num_regs 4-byte registers to restore.  */
20185       tmp = gen_rtx_SET (VOIDmode,
20186                          stack_pointer_rtx,
20187                          plus_constant (Pmode,
20188                                         stack_pointer_rtx,
20189                                         4 * num_regs));
20190       RTX_FRAME_RELATED_P (tmp) = 1;
20191       XVECEXP (par, 0, offset_adj) = tmp;
20192     }
20193
20194   /* Now restore every reg, which may include PC.  */
20195   for (j = 0, i = 0; j < num_regs; i++)
20196     if (saved_regs_mask & (1 << i))
20197       {
20198         reg = gen_rtx_REG (SImode, i);
20199         if ((num_regs == 1) && emit_update && !return_in_pc)
20200           {
20201             /* Emit single load with writeback.  */
20202             tmp = gen_frame_mem (SImode,
20203                                  gen_rtx_POST_INC (Pmode,
20204                                                    stack_pointer_rtx));
20205             tmp = emit_insn (gen_rtx_SET (VOIDmode, reg, tmp));
20206             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20207             return;
20208           }
20209
20210         tmp = gen_rtx_SET (VOIDmode,
20211                            reg,
20212                            gen_frame_mem
20213                            (SImode,
20214                             plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20215         RTX_FRAME_RELATED_P (tmp) = 1;
20216         XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20217
20218         /* We need to maintain a sequence for DWARF info too.  As dwarf info
20219            should not have PC, skip PC.  */
20220         if (i != PC_REGNUM)
20221           dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20222
20223         j++;
20224       }
20225
20226   if (return_in_pc)
20227     par = emit_jump_insn (par);
20228   else
20229     par = emit_insn (par);
20230
20231   REG_NOTES (par) = dwarf;
20232   if (!return_in_pc)
20233     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20234                                  stack_pointer_rtx, stack_pointer_rtx);
20235 }
20236
20237 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20238    of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20239
20240    Unfortunately, since this insn does not reflect very well the actual
20241    semantics of the operation, we need to annotate the insn for the benefit
20242    of DWARF2 frame unwind information.  */
20243 static void
20244 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20245 {
20246   int i, j;
20247   rtx par;
20248   rtx dwarf = NULL_RTX;
20249   rtx tmp, reg;
20250
20251   gcc_assert (num_regs && num_regs <= 32);
20252
20253     /* Workaround ARM10 VFPr1 bug.  */
20254   if (num_regs == 2 && !arm_arch6)
20255     {
20256       if (first_reg == 15)
20257         first_reg--;
20258
20259       num_regs++;
20260     }
20261
20262   /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20263      there could be up to 32 D-registers to restore.
20264      If there are more than 16 D-registers, make two recursive calls,
20265      each of which emits one pop_multi instruction.  */
20266   if (num_regs > 16)
20267     {
20268       arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20269       arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20270       return;
20271     }
20272
20273   /* The parallel needs to hold num_regs SETs
20274      and one SET for the stack update.  */
20275   par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20276
20277   /* Increment the stack pointer, based on there being
20278      num_regs 8-byte registers to restore.  */
20279   tmp = gen_rtx_SET (VOIDmode,
20280                      base_reg,
20281                      plus_constant (Pmode, base_reg, 8 * num_regs));
20282   RTX_FRAME_RELATED_P (tmp) = 1;
20283   XVECEXP (par, 0, 0) = tmp;
20284
20285   /* Now show every reg that will be restored, using a SET for each.  */
20286   for (j = 0, i=first_reg; j < num_regs; i += 2)
20287     {
20288       reg = gen_rtx_REG (DFmode, i);
20289
20290       tmp = gen_rtx_SET (VOIDmode,
20291                          reg,
20292                          gen_frame_mem
20293                          (DFmode,
20294                           plus_constant (Pmode, base_reg, 8 * j)));
20295       RTX_FRAME_RELATED_P (tmp) = 1;
20296       XVECEXP (par, 0, j + 1) = tmp;
20297
20298       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20299
20300       j++;
20301     }
20302
20303   par = emit_insn (par);
20304   REG_NOTES (par) = dwarf;
20305
20306   /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP.  */
20307   if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20308     {
20309       RTX_FRAME_RELATED_P (par) = 1;
20310       add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20311     }
20312   else
20313     arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20314                                  base_reg, base_reg);
20315 }
20316
20317 /* Generate and emit a pattern that will be recognized as LDRD pattern.  If even
20318    number of registers are being popped, multiple LDRD patterns are created for
20319    all register pairs.  If odd number of registers are popped, last register is
20320    loaded by using LDR pattern.  */
20321 static void
20322 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20323 {
20324   int num_regs = 0;
20325   int i, j;
20326   rtx par = NULL_RTX;
20327   rtx dwarf = NULL_RTX;
20328   rtx tmp, reg, tmp1;
20329   bool return_in_pc;
20330
20331   return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
20332   for (i = 0; i <= LAST_ARM_REGNUM; i++)
20333     if (saved_regs_mask & (1 << i))
20334       num_regs++;
20335
20336   gcc_assert (num_regs && num_regs <= 16);
20337
20338   /* We cannot generate ldrd for PC.  Hence, reduce the count if PC is
20339      to be popped.  So, if num_regs is even, now it will become odd,
20340      and we can generate pop with PC.  If num_regs is odd, it will be
20341      even now, and ldr with return can be generated for PC.  */
20342   if (return_in_pc)
20343     num_regs--;
20344
20345   gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20346
20347   /* Var j iterates over all the registers to gather all the registers in
20348      saved_regs_mask.  Var i gives index of saved registers in stack frame.
20349      A PARALLEL RTX of register-pair is created here, so that pattern for
20350      LDRD can be matched.  As PC is always last register to be popped, and
20351      we have already decremented num_regs if PC, we don't have to worry
20352      about PC in this loop.  */
20353   for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20354     if (saved_regs_mask & (1 << j))
20355       {
20356         /* Create RTX for memory load.  */
20357         reg = gen_rtx_REG (SImode, j);
20358         tmp = gen_rtx_SET (SImode,
20359                            reg,
20360                            gen_frame_mem (SImode,
20361                                plus_constant (Pmode,
20362                                               stack_pointer_rtx, 4 * i)));
20363         RTX_FRAME_RELATED_P (tmp) = 1;
20364
20365         if (i % 2 == 0)
20366           {
20367             /* When saved-register index (i) is even, the RTX to be emitted is
20368                yet to be created.  Hence create it first.  The LDRD pattern we
20369                are generating is :
20370                [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20371                  (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20372                where target registers need not be consecutive.  */
20373             par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20374             dwarf = NULL_RTX;
20375           }
20376
20377         /* ith register is added in PARALLEL RTX.  If i is even, the reg_i is
20378            added as 0th element and if i is odd, reg_i is added as 1st element
20379            of LDRD pattern shown above.  */
20380         XVECEXP (par, 0, (i % 2)) = tmp;
20381         dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20382
20383         if ((i % 2) == 1)
20384           {
20385             /* When saved-register index (i) is odd, RTXs for both the registers
20386                to be loaded are generated in above given LDRD pattern, and the
20387                pattern can be emitted now.  */
20388             par = emit_insn (par);
20389             REG_NOTES (par) = dwarf;
20390             RTX_FRAME_RELATED_P (par) = 1;
20391           }
20392
20393         i++;
20394       }
20395
20396   /* If the number of registers pushed is odd AND return_in_pc is false OR
20397      number of registers are even AND return_in_pc is true, last register is
20398      popped using LDR.  It can be PC as well.  Hence, adjust the stack first and
20399      then LDR with post increment.  */
20400
20401   /* Increment the stack pointer, based on there being
20402      num_regs 4-byte registers to restore.  */
20403   tmp = gen_rtx_SET (VOIDmode,
20404                      stack_pointer_rtx,
20405                      plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20406   RTX_FRAME_RELATED_P (tmp) = 1;
20407   tmp = emit_insn (tmp);
20408   if (!return_in_pc)
20409     {
20410       arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20411                                    stack_pointer_rtx, stack_pointer_rtx);
20412     }
20413
20414   dwarf = NULL_RTX;
20415
20416   if (((num_regs % 2) == 1 && !return_in_pc)
20417       || ((num_regs % 2) == 0 && return_in_pc))
20418     {
20419       /* Scan for the single register to be popped.  Skip until the saved
20420          register is found.  */
20421       for (; (saved_regs_mask & (1 << j)) == 0; j++);
20422
20423       /* Gen LDR with post increment here.  */
20424       tmp1 = gen_rtx_MEM (SImode,
20425                           gen_rtx_POST_INC (SImode,
20426                                             stack_pointer_rtx));
20427       set_mem_alias_set (tmp1, get_frame_alias_set ());
20428
20429       reg = gen_rtx_REG (SImode, j);
20430       tmp = gen_rtx_SET (SImode, reg, tmp1);
20431       RTX_FRAME_RELATED_P (tmp) = 1;
20432       dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20433
20434       if (return_in_pc)
20435         {
20436           /* If return_in_pc, j must be PC_REGNUM.  */
20437           gcc_assert (j == PC_REGNUM);
20438           par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20439           XVECEXP (par, 0, 0) = ret_rtx;
20440           XVECEXP (par, 0, 1) = tmp;
20441           par = emit_jump_insn (par);
20442         }
20443       else
20444         {
20445           par = emit_insn (tmp);
20446           REG_NOTES (par) = dwarf;
20447           arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20448                                        stack_pointer_rtx, stack_pointer_rtx);
20449         }
20450
20451     }
20452   else if ((num_regs % 2) == 1 && return_in_pc)
20453     {
20454       /* There are 2 registers to be popped.  So, generate the pattern
20455          pop_multiple_with_stack_update_and_return to pop in PC.  */
20456       arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20457     }
20458
20459   return;
20460 }
20461
20462 /* LDRD in ARM mode needs consecutive registers as operands.  This function
20463    emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20464    offset addressing and then generates one separate stack udpate. This provides
20465    more scheduling freedom, compared to writeback on every load.  However,
20466    if the function returns using load into PC directly
20467    (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20468    before the last load.  TODO: Add a peephole optimization to recognize
20469    the new epilogue sequence as an LDM instruction whenever possible.  TODO: Add
20470    peephole optimization to merge the load at stack-offset zero
20471    with the stack update instruction using load with writeback
20472    in post-index addressing mode.  */
20473 static void
20474 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20475 {
20476   int j = 0;
20477   int offset = 0;
20478   rtx par = NULL_RTX;
20479   rtx dwarf = NULL_RTX;
20480   rtx tmp, mem;
20481
20482   /* Restore saved registers.  */
20483   gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20484   j = 0;
20485   while (j <= LAST_ARM_REGNUM)
20486     if (saved_regs_mask & (1 << j))
20487       {
20488         if ((j % 2) == 0
20489             && (saved_regs_mask & (1 << (j + 1)))
20490             && (j + 1) != PC_REGNUM)
20491           {
20492             /* Current register and next register form register pair for which
20493                LDRD can be generated. PC is always the last register popped, and
20494                we handle it separately.  */
20495             if (offset > 0)
20496               mem = gen_frame_mem (DImode,
20497                                    plus_constant (Pmode,
20498                                                   stack_pointer_rtx,
20499                                                   offset));
20500             else
20501               mem = gen_frame_mem (DImode, stack_pointer_rtx);
20502
20503             tmp = gen_rtx_SET (DImode, gen_rtx_REG (DImode, j), mem);
20504             tmp = emit_insn (tmp);
20505             RTX_FRAME_RELATED_P (tmp) = 1;
20506
20507             /* Generate dwarf info.  */
20508
20509             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20510                                     gen_rtx_REG (SImode, j),
20511                                     NULL_RTX);
20512             dwarf = alloc_reg_note (REG_CFA_RESTORE,
20513                                     gen_rtx_REG (SImode, j + 1),
20514                                     dwarf);
20515
20516             REG_NOTES (tmp) = dwarf;
20517
20518             offset += 8;
20519             j += 2;
20520           }
20521         else if (j != PC_REGNUM)
20522           {
20523             /* Emit a single word load.  */
20524             if (offset > 0)
20525               mem = gen_frame_mem (SImode,
20526                                    plus_constant (Pmode,
20527                                                   stack_pointer_rtx,
20528                                                   offset));
20529             else
20530               mem = gen_frame_mem (SImode, stack_pointer_rtx);
20531
20532             tmp = gen_rtx_SET (SImode, gen_rtx_REG (SImode, j), mem);
20533             tmp = emit_insn (tmp);
20534             RTX_FRAME_RELATED_P (tmp) = 1;
20535
20536             /* Generate dwarf info.  */
20537             REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20538                                               gen_rtx_REG (SImode, j),
20539                                               NULL_RTX);
20540
20541             offset += 4;
20542             j += 1;
20543           }
20544         else /* j == PC_REGNUM */
20545           j++;
20546       }
20547     else
20548       j++;
20549
20550   /* Update the stack.  */
20551   if (offset > 0)
20552     {
20553       tmp = gen_rtx_SET (Pmode,
20554                          stack_pointer_rtx,
20555                          plus_constant (Pmode,
20556                                         stack_pointer_rtx,
20557                                         offset));
20558       tmp = emit_insn (tmp);
20559       arm_add_cfa_adjust_cfa_note (tmp, offset,
20560                                    stack_pointer_rtx, stack_pointer_rtx);
20561       offset = 0;
20562     }
20563
20564   if (saved_regs_mask & (1 << PC_REGNUM))
20565     {
20566       /* Only PC is to be popped.  */
20567       par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20568       XVECEXP (par, 0, 0) = ret_rtx;
20569       tmp = gen_rtx_SET (SImode,
20570                          gen_rtx_REG (SImode, PC_REGNUM),
20571                          gen_frame_mem (SImode,
20572                                         gen_rtx_POST_INC (SImode,
20573                                                           stack_pointer_rtx)));
20574       RTX_FRAME_RELATED_P (tmp) = 1;
20575       XVECEXP (par, 0, 1) = tmp;
20576       par = emit_jump_insn (par);
20577
20578       /* Generate dwarf info.  */
20579       dwarf = alloc_reg_note (REG_CFA_RESTORE,
20580                               gen_rtx_REG (SImode, PC_REGNUM),
20581                               NULL_RTX);
20582       REG_NOTES (par) = dwarf;
20583       arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20584                                    stack_pointer_rtx, stack_pointer_rtx);
20585     }
20586 }
20587
20588 /* Calculate the size of the return value that is passed in registers.  */
20589 static unsigned
20590 arm_size_return_regs (void)
20591 {
20592   machine_mode mode;
20593
20594   if (crtl->return_rtx != 0)
20595     mode = GET_MODE (crtl->return_rtx);
20596   else
20597     mode = DECL_MODE (DECL_RESULT (current_function_decl));
20598
20599   return GET_MODE_SIZE (mode);
20600 }
20601
20602 /* Return true if the current function needs to save/restore LR.  */
20603 static bool
20604 thumb_force_lr_save (void)
20605 {
20606   return !cfun->machine->lr_save_eliminated
20607          && (!leaf_function_p ()
20608              || thumb_far_jump_used_p ()
20609              || df_regs_ever_live_p (LR_REGNUM));
20610 }
20611
20612 /* We do not know if r3 will be available because
20613    we do have an indirect tailcall happening in this
20614    particular case.  */
20615 static bool
20616 is_indirect_tailcall_p (rtx call)
20617 {
20618   rtx pat = PATTERN (call);
20619
20620   /* Indirect tail call.  */
20621   pat = XVECEXP (pat, 0, 0);
20622   if (GET_CODE (pat) == SET)
20623     pat = SET_SRC (pat);
20624
20625   pat = XEXP (XEXP (pat, 0), 0);
20626   return REG_P (pat);
20627 }
20628
20629 /* Return true if r3 is used by any of the tail call insns in the
20630    current function.  */
20631 static bool
20632 any_sibcall_could_use_r3 (void)
20633 {
20634   edge_iterator ei;
20635   edge e;
20636
20637   if (!crtl->tail_call_emit)
20638     return false;
20639   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20640     if (e->flags & EDGE_SIBCALL)
20641       {
20642         rtx call = BB_END (e->src);
20643         if (!CALL_P (call))
20644           call = prev_nonnote_nondebug_insn (call);
20645         gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20646         if (find_regno_fusage (call, USE, 3)
20647             || is_indirect_tailcall_p (call))
20648           return true;
20649       }
20650   return false;
20651 }
20652
20653
20654 /* Compute the distance from register FROM to register TO.
20655    These can be the arg pointer (26), the soft frame pointer (25),
20656    the stack pointer (13) or the hard frame pointer (11).
20657    In thumb mode r7 is used as the soft frame pointer, if needed.
20658    Typical stack layout looks like this:
20659
20660        old stack pointer -> |    |
20661                              ----
20662                             |    | \
20663                             |    |   saved arguments for
20664                             |    |   vararg functions
20665                             |    | /
20666                               --
20667    hard FP & arg pointer -> |    | \
20668                             |    |   stack
20669                             |    |   frame
20670                             |    | /
20671                               --
20672                             |    | \
20673                             |    |   call saved
20674                             |    |   registers
20675       soft frame pointer -> |    | /
20676                               --
20677                             |    | \
20678                             |    |   local
20679                             |    |   variables
20680      locals base pointer -> |    | /
20681                               --
20682                             |    | \
20683                             |    |   outgoing
20684                             |    |   arguments
20685    current stack pointer -> |    | /
20686                               --
20687
20688   For a given function some or all of these stack components
20689   may not be needed, giving rise to the possibility of
20690   eliminating some of the registers.
20691
20692   The values returned by this function must reflect the behavior
20693   of arm_expand_prologue() and arm_compute_save_reg_mask().
20694
20695   The sign of the number returned reflects the direction of stack
20696   growth, so the values are positive for all eliminations except
20697   from the soft frame pointer to the hard frame pointer.
20698
20699   SFP may point just inside the local variables block to ensure correct
20700   alignment.  */
20701
20702
20703 /* Calculate stack offsets.  These are used to calculate register elimination
20704    offsets and in prologue/epilogue code.  Also calculates which registers
20705    should be saved.  */
20706
20707 static arm_stack_offsets *
20708 arm_get_frame_offsets (void)
20709 {
20710   struct arm_stack_offsets *offsets;
20711   unsigned long func_type;
20712   int leaf;
20713   int saved;
20714   int core_saved;
20715   HOST_WIDE_INT frame_size;
20716   int i;
20717
20718   offsets = &cfun->machine->stack_offsets;
20719
20720   /* We need to know if we are a leaf function.  Unfortunately, it
20721      is possible to be called after start_sequence has been called,
20722      which causes get_insns to return the insns for the sequence,
20723      not the function, which will cause leaf_function_p to return
20724      the incorrect result.
20725
20726      to know about leaf functions once reload has completed, and the
20727      frame size cannot be changed after that time, so we can safely
20728      use the cached value.  */
20729
20730   if (reload_completed)
20731     return offsets;
20732
20733   /* Initially this is the size of the local variables.  It will translated
20734      into an offset once we have determined the size of preceding data.  */
20735   frame_size = ROUND_UP_WORD (get_frame_size ());
20736
20737   leaf = leaf_function_p ();
20738
20739   /* Space for variadic functions.  */
20740   offsets->saved_args = crtl->args.pretend_args_size;
20741
20742   /* In Thumb mode this is incorrect, but never used.  */
20743   offsets->frame
20744     = (offsets->saved_args
20745        + arm_compute_static_chain_stack_bytes ()
20746        + (frame_pointer_needed ? 4 : 0));
20747
20748   if (TARGET_32BIT)
20749     {
20750       unsigned int regno;
20751
20752       offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20753       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20754       saved = core_saved;
20755
20756       /* We know that SP will be doubleword aligned on entry, and we must
20757          preserve that condition at any subroutine call.  We also require the
20758          soft frame pointer to be doubleword aligned.  */
20759
20760       if (TARGET_REALLY_IWMMXT)
20761         {
20762           /* Check for the call-saved iWMMXt registers.  */
20763           for (regno = FIRST_IWMMXT_REGNUM;
20764                regno <= LAST_IWMMXT_REGNUM;
20765                regno++)
20766             if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20767               saved += 8;
20768         }
20769
20770       func_type = arm_current_func_type ();
20771       /* Space for saved VFP registers.  */
20772       if (! IS_VOLATILE (func_type)
20773           && TARGET_HARD_FLOAT && TARGET_VFP)
20774         saved += arm_get_vfp_saved_size ();
20775     }
20776   else /* TARGET_THUMB1 */
20777     {
20778       offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20779       core_saved = bit_count (offsets->saved_regs_mask) * 4;
20780       saved = core_saved;
20781       if (TARGET_BACKTRACE)
20782         saved += 16;
20783     }
20784
20785   /* Saved registers include the stack frame.  */
20786   offsets->saved_regs
20787     = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20788   offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20789
20790   /* A leaf function does not need any stack alignment if it has nothing
20791      on the stack.  */
20792   if (leaf && frame_size == 0
20793       /* However if it calls alloca(), we have a dynamically allocated
20794          block of BIGGEST_ALIGNMENT on stack, so still do stack alignment.  */
20795       && ! cfun->calls_alloca)
20796     {
20797       offsets->outgoing_args = offsets->soft_frame;
20798       offsets->locals_base = offsets->soft_frame;
20799       return offsets;
20800     }
20801
20802   /* Ensure SFP has the correct alignment.  */
20803   if (ARM_DOUBLEWORD_ALIGN
20804       && (offsets->soft_frame & 7))
20805     {
20806       offsets->soft_frame += 4;
20807       /* Try to align stack by pushing an extra reg.  Don't bother doing this
20808          when there is a stack frame as the alignment will be rolled into
20809          the normal stack adjustment.  */
20810       if (frame_size + crtl->outgoing_args_size == 0)
20811         {
20812           int reg = -1;
20813
20814           /* Register r3 is caller-saved.  Normally it does not need to be
20815              saved on entry by the prologue.  However if we choose to save
20816              it for padding then we may confuse the compiler into thinking
20817              a prologue sequence is required when in fact it is not.  This
20818              will occur when shrink-wrapping if r3 is used as a scratch
20819              register and there are no other callee-saved writes.
20820
20821              This situation can be avoided when other callee-saved registers
20822              are available and r3 is not mandatory if we choose a callee-saved
20823              register for padding.  */
20824           bool prefer_callee_reg_p = false;
20825
20826           /* If it is safe to use r3, then do so.  This sometimes
20827              generates better code on Thumb-2 by avoiding the need to
20828              use 32-bit push/pop instructions.  */
20829           if (! any_sibcall_could_use_r3 ()
20830               && arm_size_return_regs () <= 12
20831               && (offsets->saved_regs_mask & (1 << 3)) == 0
20832               && (TARGET_THUMB2
20833                   || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20834             {
20835               reg = 3;
20836               if (!TARGET_THUMB2)
20837                 prefer_callee_reg_p = true;
20838             }
20839           if (reg == -1
20840               || prefer_callee_reg_p)
20841             {
20842               for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20843                 {
20844                   /* Avoid fixed registers; they may be changed at
20845                      arbitrary times so it's unsafe to restore them
20846                      during the epilogue.  */
20847                   if (!fixed_regs[i]
20848                       && (offsets->saved_regs_mask & (1 << i)) == 0)
20849                     {
20850                       reg = i;
20851                       break;
20852                     }
20853                 }
20854             }
20855
20856           if (reg != -1)
20857             {
20858               offsets->saved_regs += 4;
20859               offsets->saved_regs_mask |= (1 << reg);
20860             }
20861         }
20862     }
20863
20864   offsets->locals_base = offsets->soft_frame + frame_size;
20865   offsets->outgoing_args = (offsets->locals_base
20866                             + crtl->outgoing_args_size);
20867
20868   if (ARM_DOUBLEWORD_ALIGN)
20869     {
20870       /* Ensure SP remains doubleword aligned.  */
20871       if (offsets->outgoing_args & 7)
20872         offsets->outgoing_args += 4;
20873       gcc_assert (!(offsets->outgoing_args & 7));
20874     }
20875
20876   return offsets;
20877 }
20878
20879
20880 /* Calculate the relative offsets for the different stack pointers.  Positive
20881    offsets are in the direction of stack growth.  */
20882
20883 HOST_WIDE_INT
20884 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20885 {
20886   arm_stack_offsets *offsets;
20887
20888   offsets = arm_get_frame_offsets ();
20889
20890   /* OK, now we have enough information to compute the distances.
20891      There must be an entry in these switch tables for each pair
20892      of registers in ELIMINABLE_REGS, even if some of the entries
20893      seem to be redundant or useless.  */
20894   switch (from)
20895     {
20896     case ARG_POINTER_REGNUM:
20897       switch (to)
20898         {
20899         case THUMB_HARD_FRAME_POINTER_REGNUM:
20900           return 0;
20901
20902         case FRAME_POINTER_REGNUM:
20903           /* This is the reverse of the soft frame pointer
20904              to hard frame pointer elimination below.  */
20905           return offsets->soft_frame - offsets->saved_args;
20906
20907         case ARM_HARD_FRAME_POINTER_REGNUM:
20908           /* This is only non-zero in the case where the static chain register
20909              is stored above the frame.  */
20910           return offsets->frame - offsets->saved_args - 4;
20911
20912         case STACK_POINTER_REGNUM:
20913           /* If nothing has been pushed on the stack at all
20914              then this will return -4.  This *is* correct!  */
20915           return offsets->outgoing_args - (offsets->saved_args + 4);
20916
20917         default:
20918           gcc_unreachable ();
20919         }
20920       gcc_unreachable ();
20921
20922     case FRAME_POINTER_REGNUM:
20923       switch (to)
20924         {
20925         case THUMB_HARD_FRAME_POINTER_REGNUM:
20926           return 0;
20927
20928         case ARM_HARD_FRAME_POINTER_REGNUM:
20929           /* The hard frame pointer points to the top entry in the
20930              stack frame.  The soft frame pointer to the bottom entry
20931              in the stack frame.  If there is no stack frame at all,
20932              then they are identical.  */
20933
20934           return offsets->frame - offsets->soft_frame;
20935
20936         case STACK_POINTER_REGNUM:
20937           return offsets->outgoing_args - offsets->soft_frame;
20938
20939         default:
20940           gcc_unreachable ();
20941         }
20942       gcc_unreachable ();
20943
20944     default:
20945       /* You cannot eliminate from the stack pointer.
20946          In theory you could eliminate from the hard frame
20947          pointer to the stack pointer, but this will never
20948          happen, since if a stack frame is not needed the
20949          hard frame pointer will never be used.  */
20950       gcc_unreachable ();
20951     }
20952 }
20953
20954 /* Given FROM and TO register numbers, say whether this elimination is
20955    allowed.  Frame pointer elimination is automatically handled.
20956
20957    All eliminations are permissible.  Note that ARG_POINTER_REGNUM and
20958    HARD_FRAME_POINTER_REGNUM are in fact the same thing.  If we need a frame
20959    pointer, we must eliminate FRAME_POINTER_REGNUM into
20960    HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20961    ARG_POINTER_REGNUM.  */
20962
20963 bool
20964 arm_can_eliminate (const int from, const int to)
20965 {
20966   return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20967           (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20968           (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20969           (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20970            true);
20971 }
20972
20973 /* Emit RTL to save coprocessor registers on function entry.  Returns the
20974    number of bytes pushed.  */
20975
20976 static int
20977 arm_save_coproc_regs(void)
20978 {
20979   int saved_size = 0;
20980   unsigned reg;
20981   unsigned start_reg;
20982   rtx insn;
20983
20984   for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20985     if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20986       {
20987         insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20988         insn = gen_rtx_MEM (V2SImode, insn);
20989         insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20990         RTX_FRAME_RELATED_P (insn) = 1;
20991         saved_size += 8;
20992       }
20993
20994   if (TARGET_HARD_FLOAT && TARGET_VFP)
20995     {
20996       start_reg = FIRST_VFP_REGNUM;
20997
20998       for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20999         {
21000           if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21001               && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21002             {
21003               if (start_reg != reg)
21004                 saved_size += vfp_emit_fstmd (start_reg,
21005                                               (reg - start_reg) / 2);
21006               start_reg = reg + 2;
21007             }
21008         }
21009       if (start_reg != reg)
21010         saved_size += vfp_emit_fstmd (start_reg,
21011                                       (reg - start_reg) / 2);
21012     }
21013   return saved_size;
21014 }
21015
21016
21017 /* Set the Thumb frame pointer from the stack pointer.  */
21018
21019 static void
21020 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21021 {
21022   HOST_WIDE_INT amount;
21023   rtx insn, dwarf;
21024
21025   amount = offsets->outgoing_args - offsets->locals_base;
21026   if (amount < 1024)
21027     insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21028                                   stack_pointer_rtx, GEN_INT (amount)));
21029   else
21030     {
21031       emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21032       /* Thumb-2 RTL patterns expect sp as the first input.  Thumb-1
21033          expects the first two operands to be the same.  */
21034       if (TARGET_THUMB2)
21035         {
21036           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21037                                         stack_pointer_rtx,
21038                                         hard_frame_pointer_rtx));
21039         }
21040       else
21041         {
21042           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21043                                         hard_frame_pointer_rtx,
21044                                         stack_pointer_rtx));
21045         }
21046       dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
21047                            plus_constant (Pmode, stack_pointer_rtx, amount));
21048       RTX_FRAME_RELATED_P (dwarf) = 1;
21049       add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21050     }
21051
21052   RTX_FRAME_RELATED_P (insn) = 1;
21053 }
21054
21055 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21056    function.  */
21057 void
21058 arm_expand_prologue (void)
21059 {
21060   rtx amount;
21061   rtx insn;
21062   rtx ip_rtx;
21063   unsigned long live_regs_mask;
21064   unsigned long func_type;
21065   int fp_offset = 0;
21066   int saved_pretend_args = 0;
21067   int saved_regs = 0;
21068   unsigned HOST_WIDE_INT args_to_push;
21069   arm_stack_offsets *offsets;
21070
21071   func_type = arm_current_func_type ();
21072
21073   /* Naked functions don't have prologues.  */
21074   if (IS_NAKED (func_type))
21075     return;
21076
21077   /* Make a copy of c_f_p_a_s as we may need to modify it locally.  */
21078   args_to_push = crtl->args.pretend_args_size;
21079
21080   /* Compute which register we will have to save onto the stack.  */
21081   offsets = arm_get_frame_offsets ();
21082   live_regs_mask = offsets->saved_regs_mask;
21083
21084   ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21085
21086   if (IS_STACKALIGN (func_type))
21087     {
21088       rtx r0, r1;
21089
21090       /* Handle a word-aligned stack pointer.  We generate the following:
21091
21092           mov r0, sp
21093           bic r1, r0, #7
21094           mov sp, r1
21095           <save and restore r0 in normal prologue/epilogue>
21096           mov sp, r0
21097           bx lr
21098
21099          The unwinder doesn't need to know about the stack realignment.
21100          Just tell it we saved SP in r0.  */
21101       gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21102
21103       r0 = gen_rtx_REG (SImode, 0);
21104       r1 = gen_rtx_REG (SImode, 1);
21105
21106       insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21107       RTX_FRAME_RELATED_P (insn) = 1;
21108       add_reg_note (insn, REG_CFA_REGISTER, NULL);
21109
21110       emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21111
21112       /* ??? The CFA changes here, which may cause GDB to conclude that it
21113          has entered a different function.  That said, the unwind info is
21114          correct, individually, before and after this instruction because
21115          we've described the save of SP, which will override the default
21116          handling of SP as restoring from the CFA.  */
21117       emit_insn (gen_movsi (stack_pointer_rtx, r1));
21118     }
21119
21120   /* For APCS frames, if IP register is clobbered
21121      when creating frame, save that register in a special
21122      way.  */
21123   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21124     {
21125       if (IS_INTERRUPT (func_type))
21126         {
21127           /* Interrupt functions must not corrupt any registers.
21128              Creating a frame pointer however, corrupts the IP
21129              register, so we must push it first.  */
21130           emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21131
21132           /* Do not set RTX_FRAME_RELATED_P on this insn.
21133              The dwarf stack unwinding code only wants to see one
21134              stack decrement per function, and this is not it.  If
21135              this instruction is labeled as being part of the frame
21136              creation sequence then dwarf2out_frame_debug_expr will
21137              die when it encounters the assignment of IP to FP
21138              later on, since the use of SP here establishes SP as
21139              the CFA register and not IP.
21140
21141              Anyway this instruction is not really part of the stack
21142              frame creation although it is part of the prologue.  */
21143         }
21144       else if (IS_NESTED (func_type))
21145         {
21146           /* The static chain register is the same as the IP register
21147              used as a scratch register during stack frame creation.
21148              To get around this need to find somewhere to store IP
21149              whilst the frame is being created.  We try the following
21150              places in order:
21151
21152                1. The last argument register r3 if it is available.
21153                2. A slot on the stack above the frame if there are no
21154                   arguments to push onto the stack.
21155                3. Register r3 again, after pushing the argument registers
21156                   onto the stack, if this is a varargs function.
21157                4. The last slot on the stack created for the arguments to
21158                   push, if this isn't a varargs function.
21159
21160              Note - we only need to tell the dwarf2 backend about the SP
21161              adjustment in the second variant; the static chain register
21162              doesn't need to be unwound, as it doesn't contain a value
21163              inherited from the caller.  */
21164
21165           if (!arm_r3_live_at_start_p ())
21166             insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21167           else if (args_to_push == 0)
21168             {
21169               rtx addr, dwarf;
21170
21171               gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21172               saved_regs += 4;
21173
21174               addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21175               insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21176               fp_offset = 4;
21177
21178               /* Just tell the dwarf backend that we adjusted SP.  */
21179               dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21180                                    plus_constant (Pmode, stack_pointer_rtx,
21181                                                   -fp_offset));
21182               RTX_FRAME_RELATED_P (insn) = 1;
21183               add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21184             }
21185           else
21186             {
21187               /* Store the args on the stack.  */
21188               if (cfun->machine->uses_anonymous_args)
21189                 {
21190                   insn
21191                     = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21192                                            (0xf0 >> (args_to_push / 4)) & 0xf);
21193                   emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21194                   saved_pretend_args = 1;
21195                 }
21196               else
21197                 {
21198                   rtx addr, dwarf;
21199
21200                   if (args_to_push == 4)
21201                     addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21202                   else
21203                     addr
21204                       = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21205                                             plus_constant (Pmode,
21206                                                            stack_pointer_rtx,
21207                                                            -args_to_push));
21208
21209                   insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21210
21211                   /* Just tell the dwarf backend that we adjusted SP.  */
21212                   dwarf
21213                     = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21214                                    plus_constant (Pmode, stack_pointer_rtx,
21215                                                   -args_to_push));
21216                   add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21217                 }
21218
21219               RTX_FRAME_RELATED_P (insn) = 1;
21220               fp_offset = args_to_push;
21221               args_to_push = 0;
21222             }
21223         }
21224
21225       insn = emit_set_insn (ip_rtx,
21226                             plus_constant (Pmode, stack_pointer_rtx,
21227                                            fp_offset));
21228       RTX_FRAME_RELATED_P (insn) = 1;
21229     }
21230
21231   if (args_to_push)
21232     {
21233       /* Push the argument registers, or reserve space for them.  */
21234       if (cfun->machine->uses_anonymous_args)
21235         insn = emit_multi_reg_push
21236           ((0xf0 >> (args_to_push / 4)) & 0xf,
21237            (0xf0 >> (args_to_push / 4)) & 0xf);
21238       else
21239         insn = emit_insn
21240           (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21241                        GEN_INT (- args_to_push)));
21242       RTX_FRAME_RELATED_P (insn) = 1;
21243     }
21244
21245   /* If this is an interrupt service routine, and the link register
21246      is going to be pushed, and we're not generating extra
21247      push of IP (needed when frame is needed and frame layout if apcs),
21248      subtracting four from LR now will mean that the function return
21249      can be done with a single instruction.  */
21250   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21251       && (live_regs_mask & (1 << LR_REGNUM)) != 0
21252       && !(frame_pointer_needed && TARGET_APCS_FRAME)
21253       && TARGET_ARM)
21254     {
21255       rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21256
21257       emit_set_insn (lr, plus_constant (SImode, lr, -4));
21258     }
21259
21260   if (live_regs_mask)
21261     {
21262       unsigned long dwarf_regs_mask = live_regs_mask;
21263
21264       saved_regs += bit_count (live_regs_mask) * 4;
21265       if (optimize_size && !frame_pointer_needed
21266           && saved_regs == offsets->saved_regs - offsets->saved_args)
21267         {
21268           /* If no coprocessor registers are being pushed and we don't have
21269              to worry about a frame pointer then push extra registers to
21270              create the stack frame.  This is done is a way that does not
21271              alter the frame layout, so is independent of the epilogue.  */
21272           int n;
21273           int frame;
21274           n = 0;
21275           while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21276             n++;
21277           frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21278           if (frame && n * 4 >= frame)
21279             {
21280               n = frame / 4;
21281               live_regs_mask |= (1 << n) - 1;
21282               saved_regs += frame;
21283             }
21284         }
21285
21286       if (TARGET_LDRD
21287           && current_tune->prefer_ldrd_strd
21288           && !optimize_function_for_size_p (cfun))
21289         {
21290           gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21291           if (TARGET_THUMB2)
21292             thumb2_emit_strd_push (live_regs_mask);
21293           else if (TARGET_ARM
21294                    && !TARGET_APCS_FRAME
21295                    && !IS_INTERRUPT (func_type))
21296             arm_emit_strd_push (live_regs_mask);
21297           else
21298             {
21299               insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21300               RTX_FRAME_RELATED_P (insn) = 1;
21301             }
21302         }
21303       else
21304         {
21305           insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21306           RTX_FRAME_RELATED_P (insn) = 1;
21307         }
21308     }
21309
21310   if (! IS_VOLATILE (func_type))
21311     saved_regs += arm_save_coproc_regs ();
21312
21313   if (frame_pointer_needed && TARGET_ARM)
21314     {
21315       /* Create the new frame pointer.  */
21316       if (TARGET_APCS_FRAME)
21317         {
21318           insn = GEN_INT (-(4 + args_to_push + fp_offset));
21319           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21320           RTX_FRAME_RELATED_P (insn) = 1;
21321
21322           if (IS_NESTED (func_type))
21323             {
21324               /* Recover the static chain register.  */
21325               if (!arm_r3_live_at_start_p () || saved_pretend_args)
21326                 insn = gen_rtx_REG (SImode, 3);
21327               else
21328                 {
21329                   insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21330                   insn = gen_frame_mem (SImode, insn);
21331                 }
21332               emit_set_insn (ip_rtx, insn);
21333               /* Add a USE to stop propagate_one_insn() from barfing.  */
21334               emit_insn (gen_force_register_use (ip_rtx));
21335             }
21336         }
21337       else
21338         {
21339           insn = GEN_INT (saved_regs - 4);
21340           insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21341                                         stack_pointer_rtx, insn));
21342           RTX_FRAME_RELATED_P (insn) = 1;
21343         }
21344     }
21345
21346   if (flag_stack_usage_info)
21347     current_function_static_stack_size
21348       = offsets->outgoing_args - offsets->saved_args;
21349
21350   if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21351     {
21352       /* This add can produce multiple insns for a large constant, so we
21353          need to get tricky.  */
21354       rtx_insn *last = get_last_insn ();
21355
21356       amount = GEN_INT (offsets->saved_args + saved_regs
21357                         - offsets->outgoing_args);
21358
21359       insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21360                                     amount));
21361       do
21362         {
21363           last = last ? NEXT_INSN (last) : get_insns ();
21364           RTX_FRAME_RELATED_P (last) = 1;
21365         }
21366       while (last != insn);
21367
21368       /* If the frame pointer is needed, emit a special barrier that
21369          will prevent the scheduler from moving stores to the frame
21370          before the stack adjustment.  */
21371       if (frame_pointer_needed)
21372         insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21373                                          hard_frame_pointer_rtx));
21374     }
21375
21376
21377   if (frame_pointer_needed && TARGET_THUMB2)
21378     thumb_set_frame_pointer (offsets);
21379
21380   if (flag_pic && arm_pic_register != INVALID_REGNUM)
21381     {
21382       unsigned long mask;
21383
21384       mask = live_regs_mask;
21385       mask &= THUMB2_WORK_REGS;
21386       if (!IS_NESTED (func_type))
21387         mask |= (1 << IP_REGNUM);
21388       arm_load_pic_register (mask);
21389     }
21390
21391   /* If we are profiling, make sure no instructions are scheduled before
21392      the call to mcount.  Similarly if the user has requested no
21393      scheduling in the prolog.  Similarly if we want non-call exceptions
21394      using the EABI unwinder, to prevent faulting instructions from being
21395      swapped with a stack adjustment.  */
21396   if (crtl->profile || !TARGET_SCHED_PROLOG
21397       || (arm_except_unwind_info (&global_options) == UI_TARGET
21398           && cfun->can_throw_non_call_exceptions))
21399     emit_insn (gen_blockage ());
21400
21401   /* If the link register is being kept alive, with the return address in it,
21402      then make sure that it does not get reused by the ce2 pass.  */
21403   if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21404     cfun->machine->lr_save_eliminated = 1;
21405 }
21406 \f
21407 /* Print condition code to STREAM.  Helper function for arm_print_operand.  */
21408 static void
21409 arm_print_condition (FILE *stream)
21410 {
21411   if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21412     {
21413       /* Branch conversion is not implemented for Thumb-2.  */
21414       if (TARGET_THUMB)
21415         {
21416           output_operand_lossage ("predicated Thumb instruction");
21417           return;
21418         }
21419       if (current_insn_predicate != NULL)
21420         {
21421           output_operand_lossage
21422             ("predicated instruction in conditional sequence");
21423           return;
21424         }
21425
21426       fputs (arm_condition_codes[arm_current_cc], stream);
21427     }
21428   else if (current_insn_predicate)
21429     {
21430       enum arm_cond_code code;
21431
21432       if (TARGET_THUMB1)
21433         {
21434           output_operand_lossage ("predicated Thumb instruction");
21435           return;
21436         }
21437
21438       code = get_arm_condition_code (current_insn_predicate);
21439       fputs (arm_condition_codes[code], stream);
21440     }
21441 }
21442
21443
21444 /* Globally reserved letters: acln
21445    Puncutation letters currently used: @_|?().!#
21446    Lower case letters currently used: bcdefhimpqtvwxyz
21447    Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21448    Letters previously used, but now deprecated/obsolete: sVWXYZ.
21449
21450    Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21451
21452    If CODE is 'd', then the X is a condition operand and the instruction
21453    should only be executed if the condition is true.
21454    if CODE is 'D', then the X is a condition operand and the instruction
21455    should only be executed if the condition is false: however, if the mode
21456    of the comparison is CCFPEmode, then always execute the instruction -- we
21457    do this because in these circumstances !GE does not necessarily imply LT;
21458    in these cases the instruction pattern will take care to make sure that
21459    an instruction containing %d will follow, thereby undoing the effects of
21460    doing this instruction unconditionally.
21461    If CODE is 'N' then X is a floating point operand that must be negated
21462    before output.
21463    If CODE is 'B' then output a bitwise inverted value of X (a const int).
21464    If X is a REG and CODE is `M', output a ldm/stm style multi-reg.  */
21465 static void
21466 arm_print_operand (FILE *stream, rtx x, int code)
21467 {
21468   switch (code)
21469     {
21470     case '@':
21471       fputs (ASM_COMMENT_START, stream);
21472       return;
21473
21474     case '_':
21475       fputs (user_label_prefix, stream);
21476       return;
21477
21478     case '|':
21479       fputs (REGISTER_PREFIX, stream);
21480       return;
21481
21482     case '?':
21483       arm_print_condition (stream);
21484       return;
21485
21486     case '(':
21487       /* Nothing in unified syntax, otherwise the current condition code.  */
21488       if (!TARGET_UNIFIED_ASM)
21489         arm_print_condition (stream);
21490       break;
21491
21492     case ')':
21493       /* The current condition code in unified syntax, otherwise nothing.  */
21494       if (TARGET_UNIFIED_ASM)
21495         arm_print_condition (stream);
21496       break;
21497
21498     case '.':
21499       /* The current condition code for a condition code setting instruction.
21500          Preceded by 's' in unified syntax, otherwise followed by 's'.  */
21501       if (TARGET_UNIFIED_ASM)
21502         {
21503           fputc('s', stream);
21504           arm_print_condition (stream);
21505         }
21506       else
21507         {
21508           arm_print_condition (stream);
21509           fputc('s', stream);
21510         }
21511       return;
21512
21513     case '!':
21514       /* If the instruction is conditionally executed then print
21515          the current condition code, otherwise print 's'.  */
21516       gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21517       if (current_insn_predicate)
21518         arm_print_condition (stream);
21519       else
21520         fputc('s', stream);
21521       break;
21522
21523     /* %# is a "break" sequence. It doesn't output anything, but is used to
21524        separate e.g. operand numbers from following text, if that text consists
21525        of further digits which we don't want to be part of the operand
21526        number.  */
21527     case '#':
21528       return;
21529
21530     case 'N':
21531       {
21532         REAL_VALUE_TYPE r;
21533         REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21534         r = real_value_negate (&r);
21535         fprintf (stream, "%s", fp_const_from_val (&r));
21536       }
21537       return;
21538
21539     /* An integer or symbol address without a preceding # sign.  */
21540     case 'c':
21541       switch (GET_CODE (x))
21542         {
21543         case CONST_INT:
21544           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21545           break;
21546
21547         case SYMBOL_REF:
21548           output_addr_const (stream, x);
21549           break;
21550
21551         case CONST:
21552           if (GET_CODE (XEXP (x, 0)) == PLUS
21553               && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21554             {
21555               output_addr_const (stream, x);
21556               break;
21557             }
21558           /* Fall through.  */
21559
21560         default:
21561           output_operand_lossage ("Unsupported operand for code '%c'", code);
21562         }
21563       return;
21564
21565     /* An integer that we want to print in HEX.  */
21566     case 'x':
21567       switch (GET_CODE (x))
21568         {
21569         case CONST_INT:
21570           fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21571           break;
21572
21573         default:
21574           output_operand_lossage ("Unsupported operand for code '%c'", code);
21575         }
21576       return;
21577
21578     case 'B':
21579       if (CONST_INT_P (x))
21580         {
21581           HOST_WIDE_INT val;
21582           val = ARM_SIGN_EXTEND (~INTVAL (x));
21583           fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21584         }
21585       else
21586         {
21587           putc ('~', stream);
21588           output_addr_const (stream, x);
21589         }
21590       return;
21591
21592     case 'b':
21593       /* Print the log2 of a CONST_INT.  */
21594       {
21595         HOST_WIDE_INT val;
21596
21597         if (!CONST_INT_P (x)
21598             || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21599           output_operand_lossage ("Unsupported operand for code '%c'", code);
21600         else
21601           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21602       }
21603       return;
21604
21605     case 'L':
21606       /* The low 16 bits of an immediate constant.  */
21607       fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21608       return;
21609
21610     case 'i':
21611       fprintf (stream, "%s", arithmetic_instr (x, 1));
21612       return;
21613
21614     case 'I':
21615       fprintf (stream, "%s", arithmetic_instr (x, 0));
21616       return;
21617
21618     case 'S':
21619       {
21620         HOST_WIDE_INT val;
21621         const char *shift;
21622
21623         shift = shift_op (x, &val);
21624
21625         if (shift)
21626           {
21627             fprintf (stream, ", %s ", shift);
21628             if (val == -1)
21629               arm_print_operand (stream, XEXP (x, 1), 0);
21630             else
21631               fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21632           }
21633       }
21634       return;
21635
21636       /* An explanation of the 'Q', 'R' and 'H' register operands:
21637
21638          In a pair of registers containing a DI or DF value the 'Q'
21639          operand returns the register number of the register containing
21640          the least significant part of the value.  The 'R' operand returns
21641          the register number of the register containing the most
21642          significant part of the value.
21643
21644          The 'H' operand returns the higher of the two register numbers.
21645          On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21646          same as the 'Q' operand, since the most significant part of the
21647          value is held in the lower number register.  The reverse is true
21648          on systems where WORDS_BIG_ENDIAN is false.
21649
21650          The purpose of these operands is to distinguish between cases
21651          where the endian-ness of the values is important (for example
21652          when they are added together), and cases where the endian-ness
21653          is irrelevant, but the order of register operations is important.
21654          For example when loading a value from memory into a register
21655          pair, the endian-ness does not matter.  Provided that the value
21656          from the lower memory address is put into the lower numbered
21657          register, and the value from the higher address is put into the
21658          higher numbered register, the load will work regardless of whether
21659          the value being loaded is big-wordian or little-wordian.  The
21660          order of the two register loads can matter however, if the address
21661          of the memory location is actually held in one of the registers
21662          being overwritten by the load.
21663
21664          The 'Q' and 'R' constraints are also available for 64-bit
21665          constants.  */
21666     case 'Q':
21667       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21668         {
21669           rtx part = gen_lowpart (SImode, x);
21670           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21671           return;
21672         }
21673
21674       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21675         {
21676           output_operand_lossage ("invalid operand for code '%c'", code);
21677           return;
21678         }
21679
21680       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21681       return;
21682
21683     case 'R':
21684       if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21685         {
21686           machine_mode mode = GET_MODE (x);
21687           rtx part;
21688
21689           if (mode == VOIDmode)
21690             mode = DImode;
21691           part = gen_highpart_mode (SImode, mode, x);
21692           fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21693           return;
21694         }
21695
21696       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21697         {
21698           output_operand_lossage ("invalid operand for code '%c'", code);
21699           return;
21700         }
21701
21702       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21703       return;
21704
21705     case 'H':
21706       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21707         {
21708           output_operand_lossage ("invalid operand for code '%c'", code);
21709           return;
21710         }
21711
21712       asm_fprintf (stream, "%r", REGNO (x) + 1);
21713       return;
21714
21715     case 'J':
21716       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21717         {
21718           output_operand_lossage ("invalid operand for code '%c'", code);
21719           return;
21720         }
21721
21722       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21723       return;
21724
21725     case 'K':
21726       if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21727         {
21728           output_operand_lossage ("invalid operand for code '%c'", code);
21729           return;
21730         }
21731
21732       asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21733       return;
21734
21735     case 'm':
21736       asm_fprintf (stream, "%r",
21737                    REG_P (XEXP (x, 0))
21738                    ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21739       return;
21740
21741     case 'M':
21742       asm_fprintf (stream, "{%r-%r}",
21743                    REGNO (x),
21744                    REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21745       return;
21746
21747     /* Like 'M', but writing doubleword vector registers, for use by Neon
21748        insns.  */
21749     case 'h':
21750       {
21751         int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21752         int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21753         if (numregs == 1)
21754           asm_fprintf (stream, "{d%d}", regno);
21755         else
21756           asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21757       }
21758       return;
21759
21760     case 'd':
21761       /* CONST_TRUE_RTX means always -- that's the default.  */
21762       if (x == const_true_rtx)
21763         return;
21764
21765       if (!COMPARISON_P (x))
21766         {
21767           output_operand_lossage ("invalid operand for code '%c'", code);
21768           return;
21769         }
21770
21771       fputs (arm_condition_codes[get_arm_condition_code (x)],
21772              stream);
21773       return;
21774
21775     case 'D':
21776       /* CONST_TRUE_RTX means not always -- i.e. never.  We shouldn't ever
21777          want to do that.  */
21778       if (x == const_true_rtx)
21779         {
21780           output_operand_lossage ("instruction never executed");
21781           return;
21782         }
21783       if (!COMPARISON_P (x))
21784         {
21785           output_operand_lossage ("invalid operand for code '%c'", code);
21786           return;
21787         }
21788
21789       fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21790                                  (get_arm_condition_code (x))],
21791              stream);
21792       return;
21793
21794     case 's':
21795     case 'V':
21796     case 'W':
21797     case 'X':
21798     case 'Y':
21799     case 'Z':
21800       /* Former Maverick support, removed after GCC-4.7.  */
21801       output_operand_lossage ("obsolete Maverick format code '%c'", code);
21802       return;
21803
21804     case 'U':
21805       if (!REG_P (x)
21806           || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21807           || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21808         /* Bad value for wCG register number.  */
21809         {
21810           output_operand_lossage ("invalid operand for code '%c'", code);
21811           return;
21812         }
21813
21814       else
21815         fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21816       return;
21817
21818       /* Print an iWMMXt control register name.  */
21819     case 'w':
21820       if (!CONST_INT_P (x)
21821           || INTVAL (x) < 0
21822           || INTVAL (x) >= 16)
21823         /* Bad value for wC register number.  */
21824         {
21825           output_operand_lossage ("invalid operand for code '%c'", code);
21826           return;
21827         }
21828
21829       else
21830         {
21831           static const char * wc_reg_names [16] =
21832             {
21833               "wCID",  "wCon",  "wCSSF", "wCASF",
21834               "wC4",   "wC5",   "wC6",   "wC7",
21835               "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21836               "wC12",  "wC13",  "wC14",  "wC15"
21837             };
21838
21839           fputs (wc_reg_names [INTVAL (x)], stream);
21840         }
21841       return;
21842
21843     /* Print the high single-precision register of a VFP double-precision
21844        register.  */
21845     case 'p':
21846       {
21847         machine_mode mode = GET_MODE (x);
21848         int regno;
21849
21850         if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21851           {
21852             output_operand_lossage ("invalid operand for code '%c'", code);
21853             return;
21854           }
21855
21856         regno = REGNO (x);
21857         if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21858           {
21859             output_operand_lossage ("invalid operand for code '%c'", code);
21860             return;
21861           }
21862
21863         fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21864       }
21865       return;
21866
21867     /* Print a VFP/Neon double precision or quad precision register name.  */
21868     case 'P':
21869     case 'q':
21870       {
21871         machine_mode mode = GET_MODE (x);
21872         int is_quad = (code == 'q');
21873         int regno;
21874
21875         if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21876           {
21877             output_operand_lossage ("invalid operand for code '%c'", code);
21878             return;
21879           }
21880
21881         if (!REG_P (x)
21882             || !IS_VFP_REGNUM (REGNO (x)))
21883           {
21884             output_operand_lossage ("invalid operand for code '%c'", code);
21885             return;
21886           }
21887
21888         regno = REGNO (x);
21889         if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21890             || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21891           {
21892             output_operand_lossage ("invalid operand for code '%c'", code);
21893             return;
21894           }
21895
21896         fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21897           (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21898       }
21899       return;
21900
21901     /* These two codes print the low/high doubleword register of a Neon quad
21902        register, respectively.  For pair-structure types, can also print
21903        low/high quadword registers.  */
21904     case 'e':
21905     case 'f':
21906       {
21907         machine_mode mode = GET_MODE (x);
21908         int regno;
21909
21910         if ((GET_MODE_SIZE (mode) != 16
21911              && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21912           {
21913             output_operand_lossage ("invalid operand for code '%c'", code);
21914             return;
21915           }
21916
21917         regno = REGNO (x);
21918         if (!NEON_REGNO_OK_FOR_QUAD (regno))
21919           {
21920             output_operand_lossage ("invalid operand for code '%c'", code);
21921             return;
21922           }
21923
21924         if (GET_MODE_SIZE (mode) == 16)
21925           fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21926                                   + (code == 'f' ? 1 : 0));
21927         else
21928           fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21929                                   + (code == 'f' ? 1 : 0));
21930       }
21931       return;
21932
21933     /* Print a VFPv3 floating-point constant, represented as an integer
21934        index.  */
21935     case 'G':
21936       {
21937         int index = vfp3_const_double_index (x);
21938         gcc_assert (index != -1);
21939         fprintf (stream, "%d", index);
21940       }
21941       return;
21942
21943     /* Print bits representing opcode features for Neon.
21944
21945        Bit 0 is 1 for signed, 0 for unsigned.  Floats count as signed
21946        and polynomials as unsigned.
21947
21948        Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21949
21950        Bit 2 is 1 for rounding functions, 0 otherwise.  */
21951
21952     /* Identify the type as 's', 'u', 'p' or 'f'.  */
21953     case 'T':
21954       {
21955         HOST_WIDE_INT bits = INTVAL (x);
21956         fputc ("uspf"[bits & 3], stream);
21957       }
21958       return;
21959
21960     /* Likewise, but signed and unsigned integers are both 'i'.  */
21961     case 'F':
21962       {
21963         HOST_WIDE_INT bits = INTVAL (x);
21964         fputc ("iipf"[bits & 3], stream);
21965       }
21966       return;
21967
21968     /* As for 'T', but emit 'u' instead of 'p'.  */
21969     case 't':
21970       {
21971         HOST_WIDE_INT bits = INTVAL (x);
21972         fputc ("usuf"[bits & 3], stream);
21973       }
21974       return;
21975
21976     /* Bit 2: rounding (vs none).  */
21977     case 'O':
21978       {
21979         HOST_WIDE_INT bits = INTVAL (x);
21980         fputs ((bits & 4) != 0 ? "r" : "", stream);
21981       }
21982       return;
21983
21984     /* Memory operand for vld1/vst1 instruction.  */
21985     case 'A':
21986       {
21987         rtx addr;
21988         bool postinc = FALSE;
21989         rtx postinc_reg = NULL;
21990         unsigned align, memsize, align_bits;
21991
21992         gcc_assert (MEM_P (x));
21993         addr = XEXP (x, 0);
21994         if (GET_CODE (addr) == POST_INC)
21995           {
21996             postinc = 1;
21997             addr = XEXP (addr, 0);
21998           }
21999         if (GET_CODE (addr) == POST_MODIFY)
22000           {
22001             postinc_reg = XEXP( XEXP (addr, 1), 1);
22002             addr = XEXP (addr, 0);
22003           }
22004         asm_fprintf (stream, "[%r", REGNO (addr));
22005
22006         /* We know the alignment of this access, so we can emit a hint in the
22007            instruction (for some alignments) as an aid to the memory subsystem
22008            of the target.  */
22009         align = MEM_ALIGN (x) >> 3;
22010         memsize = MEM_SIZE (x);
22011
22012         /* Only certain alignment specifiers are supported by the hardware.  */
22013         if (memsize == 32 && (align % 32) == 0)
22014           align_bits = 256;
22015         else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22016           align_bits = 128;
22017         else if (memsize >= 8 && (align % 8) == 0)
22018           align_bits = 64;
22019         else
22020           align_bits = 0;
22021
22022         if (align_bits != 0)
22023           asm_fprintf (stream, ":%d", align_bits);
22024
22025         asm_fprintf (stream, "]");
22026
22027         if (postinc)
22028           fputs("!", stream);
22029         if (postinc_reg)
22030           asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22031       }
22032       return;
22033
22034     case 'C':
22035       {
22036         rtx addr;
22037
22038         gcc_assert (MEM_P (x));
22039         addr = XEXP (x, 0);
22040         gcc_assert (REG_P (addr));
22041         asm_fprintf (stream, "[%r]", REGNO (addr));
22042       }
22043       return;
22044
22045     /* Translate an S register number into a D register number and element index.  */
22046     case 'y':
22047       {
22048         machine_mode mode = GET_MODE (x);
22049         int regno;
22050
22051         if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22052           {
22053             output_operand_lossage ("invalid operand for code '%c'", code);
22054             return;
22055           }
22056
22057         regno = REGNO (x);
22058         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22059           {
22060             output_operand_lossage ("invalid operand for code '%c'", code);
22061             return;
22062           }
22063
22064         regno = regno - FIRST_VFP_REGNUM;
22065         fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22066       }
22067       return;
22068
22069     case 'v':
22070         gcc_assert (CONST_DOUBLE_P (x));
22071         int result;
22072         result = vfp3_const_double_for_fract_bits (x);
22073         if (result == 0)
22074           result = vfp3_const_double_for_bits (x);
22075         fprintf (stream, "#%d", result);
22076         return;
22077
22078     /* Register specifier for vld1.16/vst1.16.  Translate the S register
22079        number into a D register number and element index.  */
22080     case 'z':
22081       {
22082         machine_mode mode = GET_MODE (x);
22083         int regno;
22084
22085         if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22086           {
22087             output_operand_lossage ("invalid operand for code '%c'", code);
22088             return;
22089           }
22090
22091         regno = REGNO (x);
22092         if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22093           {
22094             output_operand_lossage ("invalid operand for code '%c'", code);
22095             return;
22096           }
22097
22098         regno = regno - FIRST_VFP_REGNUM;
22099         fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22100       }
22101       return;
22102
22103     default:
22104       if (x == 0)
22105         {
22106           output_operand_lossage ("missing operand");
22107           return;
22108         }
22109
22110       switch (GET_CODE (x))
22111         {
22112         case REG:
22113           asm_fprintf (stream, "%r", REGNO (x));
22114           break;
22115
22116         case MEM:
22117           output_memory_reference_mode = GET_MODE (x);
22118           output_address (XEXP (x, 0));
22119           break;
22120
22121         case CONST_DOUBLE:
22122           {
22123             char fpstr[20];
22124             real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22125                               sizeof (fpstr), 0, 1);
22126             fprintf (stream, "#%s", fpstr);
22127           }
22128           break;
22129
22130         default:
22131           gcc_assert (GET_CODE (x) != NEG);
22132           fputc ('#', stream);
22133           if (GET_CODE (x) == HIGH)
22134             {
22135               fputs (":lower16:", stream);
22136               x = XEXP (x, 0);
22137             }
22138
22139           output_addr_const (stream, x);
22140           break;
22141         }
22142     }
22143 }
22144 \f
22145 /* Target hook for printing a memory address.  */
22146 static void
22147 arm_print_operand_address (FILE *stream, rtx x)
22148 {
22149   if (TARGET_32BIT)
22150     {
22151       int is_minus = GET_CODE (x) == MINUS;
22152
22153       if (REG_P (x))
22154         asm_fprintf (stream, "[%r]", REGNO (x));
22155       else if (GET_CODE (x) == PLUS || is_minus)
22156         {
22157           rtx base = XEXP (x, 0);
22158           rtx index = XEXP (x, 1);
22159           HOST_WIDE_INT offset = 0;
22160           if (!REG_P (base)
22161               || (REG_P (index) && REGNO (index) == SP_REGNUM))
22162             {
22163               /* Ensure that BASE is a register.  */
22164               /* (one of them must be).  */
22165               /* Also ensure the SP is not used as in index register.  */
22166               std::swap (base, index);
22167             }
22168           switch (GET_CODE (index))
22169             {
22170             case CONST_INT:
22171               offset = INTVAL (index);
22172               if (is_minus)
22173                 offset = -offset;
22174               asm_fprintf (stream, "[%r, #%wd]",
22175                            REGNO (base), offset);
22176               break;
22177
22178             case REG:
22179               asm_fprintf (stream, "[%r, %s%r]",
22180                            REGNO (base), is_minus ? "-" : "",
22181                            REGNO (index));
22182               break;
22183
22184             case MULT:
22185             case ASHIFTRT:
22186             case LSHIFTRT:
22187             case ASHIFT:
22188             case ROTATERT:
22189               {
22190                 asm_fprintf (stream, "[%r, %s%r",
22191                              REGNO (base), is_minus ? "-" : "",
22192                              REGNO (XEXP (index, 0)));
22193                 arm_print_operand (stream, index, 'S');
22194                 fputs ("]", stream);
22195                 break;
22196               }
22197
22198             default:
22199               gcc_unreachable ();
22200             }
22201         }
22202       else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22203                || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22204         {
22205           extern machine_mode output_memory_reference_mode;
22206
22207           gcc_assert (REG_P (XEXP (x, 0)));
22208
22209           if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22210             asm_fprintf (stream, "[%r, #%s%d]!",
22211                          REGNO (XEXP (x, 0)),
22212                          GET_CODE (x) == PRE_DEC ? "-" : "",
22213                          GET_MODE_SIZE (output_memory_reference_mode));
22214           else
22215             asm_fprintf (stream, "[%r], #%s%d",
22216                          REGNO (XEXP (x, 0)),
22217                          GET_CODE (x) == POST_DEC ? "-" : "",
22218                          GET_MODE_SIZE (output_memory_reference_mode));
22219         }
22220       else if (GET_CODE (x) == PRE_MODIFY)
22221         {
22222           asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22223           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22224             asm_fprintf (stream, "#%wd]!",
22225                          INTVAL (XEXP (XEXP (x, 1), 1)));
22226           else
22227             asm_fprintf (stream, "%r]!",
22228                          REGNO (XEXP (XEXP (x, 1), 1)));
22229         }
22230       else if (GET_CODE (x) == POST_MODIFY)
22231         {
22232           asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22233           if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22234             asm_fprintf (stream, "#%wd",
22235                          INTVAL (XEXP (XEXP (x, 1), 1)));
22236           else
22237             asm_fprintf (stream, "%r",
22238                          REGNO (XEXP (XEXP (x, 1), 1)));
22239         }
22240       else output_addr_const (stream, x);
22241     }
22242   else
22243     {
22244       if (REG_P (x))
22245         asm_fprintf (stream, "[%r]", REGNO (x));
22246       else if (GET_CODE (x) == POST_INC)
22247         asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22248       else if (GET_CODE (x) == PLUS)
22249         {
22250           gcc_assert (REG_P (XEXP (x, 0)));
22251           if (CONST_INT_P (XEXP (x, 1)))
22252             asm_fprintf (stream, "[%r, #%wd]",
22253                          REGNO (XEXP (x, 0)),
22254                          INTVAL (XEXP (x, 1)));
22255           else
22256             asm_fprintf (stream, "[%r, %r]",
22257                          REGNO (XEXP (x, 0)),
22258                          REGNO (XEXP (x, 1)));
22259         }
22260       else
22261         output_addr_const (stream, x);
22262     }
22263 }
22264 \f
22265 /* Target hook for indicating whether a punctuation character for
22266    TARGET_PRINT_OPERAND is valid.  */
22267 static bool
22268 arm_print_operand_punct_valid_p (unsigned char code)
22269 {
22270   return (code == '@' || code == '|' || code == '.'
22271           || code == '(' || code == ')' || code == '#'
22272           || (TARGET_32BIT && (code == '?'))
22273           || (TARGET_THUMB2 && (code == '!'))
22274           || (TARGET_THUMB && (code == '_')));
22275 }
22276 \f
22277 /* Target hook for assembling integer objects.  The ARM version needs to
22278    handle word-sized values specially.  */
22279 static bool
22280 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22281 {
22282   machine_mode mode;
22283
22284   if (size == UNITS_PER_WORD && aligned_p)
22285     {
22286       fputs ("\t.word\t", asm_out_file);
22287       output_addr_const (asm_out_file, x);
22288
22289       /* Mark symbols as position independent.  We only do this in the
22290          .text segment, not in the .data segment.  */
22291       if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22292           (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22293         {
22294           /* See legitimize_pic_address for an explanation of the
22295              TARGET_VXWORKS_RTP check.  */
22296           if (!arm_pic_data_is_text_relative
22297               || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22298             fputs ("(GOT)", asm_out_file);
22299           else
22300             fputs ("(GOTOFF)", asm_out_file);
22301         }
22302       fputc ('\n', asm_out_file);
22303       return true;
22304     }
22305
22306   mode = GET_MODE (x);
22307
22308   if (arm_vector_mode_supported_p (mode))
22309     {
22310       int i, units;
22311
22312       gcc_assert (GET_CODE (x) == CONST_VECTOR);
22313
22314       units = CONST_VECTOR_NUNITS (x);
22315       size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22316
22317       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22318         for (i = 0; i < units; i++)
22319           {
22320             rtx elt = CONST_VECTOR_ELT (x, i);
22321             assemble_integer
22322               (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22323           }
22324       else
22325         for (i = 0; i < units; i++)
22326           {
22327             rtx elt = CONST_VECTOR_ELT (x, i);
22328             REAL_VALUE_TYPE rval;
22329
22330             REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22331
22332             assemble_real
22333               (rval, GET_MODE_INNER (mode),
22334               i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22335           }
22336
22337       return true;
22338     }
22339
22340   return default_assemble_integer (x, size, aligned_p);
22341 }
22342
22343 static void
22344 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22345 {
22346   section *s;
22347
22348   if (!TARGET_AAPCS_BASED)
22349     {
22350       (is_ctor ?
22351        default_named_section_asm_out_constructor
22352        : default_named_section_asm_out_destructor) (symbol, priority);
22353       return;
22354     }
22355
22356   /* Put these in the .init_array section, using a special relocation.  */
22357   if (priority != DEFAULT_INIT_PRIORITY)
22358     {
22359       char buf[18];
22360       sprintf (buf, "%s.%.5u",
22361                is_ctor ? ".init_array" : ".fini_array",
22362                priority);
22363       s = get_section (buf, SECTION_WRITE, NULL_TREE);
22364     }
22365   else if (is_ctor)
22366     s = ctors_section;
22367   else
22368     s = dtors_section;
22369
22370   switch_to_section (s);
22371   assemble_align (POINTER_SIZE);
22372   fputs ("\t.word\t", asm_out_file);
22373   output_addr_const (asm_out_file, symbol);
22374   fputs ("(target1)\n", asm_out_file);
22375 }
22376
22377 /* Add a function to the list of static constructors.  */
22378
22379 static void
22380 arm_elf_asm_constructor (rtx symbol, int priority)
22381 {
22382   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22383 }
22384
22385 /* Add a function to the list of static destructors.  */
22386
22387 static void
22388 arm_elf_asm_destructor (rtx symbol, int priority)
22389 {
22390   arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22391 }
22392 \f
22393 /* A finite state machine takes care of noticing whether or not instructions
22394    can be conditionally executed, and thus decrease execution time and code
22395    size by deleting branch instructions.  The fsm is controlled by
22396    final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE.  */
22397
22398 /* The state of the fsm controlling condition codes are:
22399    0: normal, do nothing special
22400    1: make ASM_OUTPUT_OPCODE not output this instruction
22401    2: make ASM_OUTPUT_OPCODE not output this instruction
22402    3: make instructions conditional
22403    4: make instructions conditional
22404
22405    State transitions (state->state by whom under condition):
22406    0 -> 1 final_prescan_insn if the `target' is a label
22407    0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22408    1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22409    2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22410    3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22411           (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22412    4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22413           (the target insn is arm_target_insn).
22414
22415    If the jump clobbers the conditions then we use states 2 and 4.
22416
22417    A similar thing can be done with conditional return insns.
22418
22419    XXX In case the `target' is an unconditional branch, this conditionalising
22420    of the instructions always reduces code size, but not always execution
22421    time.  But then, I want to reduce the code size to somewhere near what
22422    /bin/cc produces.  */
22423
22424 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22425    instructions.  When a COND_EXEC instruction is seen the subsequent
22426    instructions are scanned so that multiple conditional instructions can be
22427    combined into a single IT block.  arm_condexec_count and arm_condexec_mask
22428    specify the length and true/false mask for the IT block.  These will be
22429    decremented/zeroed by arm_asm_output_opcode as the insns are output.  */
22430
22431 /* Returns the index of the ARM condition code string in
22432    `arm_condition_codes', or ARM_NV if the comparison is invalid.
22433    COMPARISON should be an rtx like `(eq (...) (...))'.  */
22434
22435 enum arm_cond_code
22436 maybe_get_arm_condition_code (rtx comparison)
22437 {
22438   machine_mode mode = GET_MODE (XEXP (comparison, 0));
22439   enum arm_cond_code code;
22440   enum rtx_code comp_code = GET_CODE (comparison);
22441
22442   if (GET_MODE_CLASS (mode) != MODE_CC)
22443     mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22444                            XEXP (comparison, 1));
22445
22446   switch (mode)
22447     {
22448     case CC_DNEmode: code = ARM_NE; goto dominance;
22449     case CC_DEQmode: code = ARM_EQ; goto dominance;
22450     case CC_DGEmode: code = ARM_GE; goto dominance;
22451     case CC_DGTmode: code = ARM_GT; goto dominance;
22452     case CC_DLEmode: code = ARM_LE; goto dominance;
22453     case CC_DLTmode: code = ARM_LT; goto dominance;
22454     case CC_DGEUmode: code = ARM_CS; goto dominance;
22455     case CC_DGTUmode: code = ARM_HI; goto dominance;
22456     case CC_DLEUmode: code = ARM_LS; goto dominance;
22457     case CC_DLTUmode: code = ARM_CC;
22458
22459     dominance:
22460       if (comp_code == EQ)
22461         return ARM_INVERSE_CONDITION_CODE (code);
22462       if (comp_code == NE)
22463         return code;
22464       return ARM_NV;
22465
22466     case CC_NOOVmode:
22467       switch (comp_code)
22468         {
22469         case NE: return ARM_NE;
22470         case EQ: return ARM_EQ;
22471         case GE: return ARM_PL;
22472         case LT: return ARM_MI;
22473         default: return ARM_NV;
22474         }
22475
22476     case CC_Zmode:
22477       switch (comp_code)
22478         {
22479         case NE: return ARM_NE;
22480         case EQ: return ARM_EQ;
22481         default: return ARM_NV;
22482         }
22483
22484     case CC_Nmode:
22485       switch (comp_code)
22486         {
22487         case NE: return ARM_MI;
22488         case EQ: return ARM_PL;
22489         default: return ARM_NV;
22490         }
22491
22492     case CCFPEmode:
22493     case CCFPmode:
22494       /* We can handle all cases except UNEQ and LTGT.  */
22495       switch (comp_code)
22496         {
22497         case GE: return ARM_GE;
22498         case GT: return ARM_GT;
22499         case LE: return ARM_LS;
22500         case LT: return ARM_MI;
22501         case NE: return ARM_NE;
22502         case EQ: return ARM_EQ;
22503         case ORDERED: return ARM_VC;
22504         case UNORDERED: return ARM_VS;
22505         case UNLT: return ARM_LT;
22506         case UNLE: return ARM_LE;
22507         case UNGT: return ARM_HI;
22508         case UNGE: return ARM_PL;
22509           /* UNEQ and LTGT do not have a representation.  */
22510         case UNEQ: /* Fall through.  */
22511         case LTGT: /* Fall through.  */
22512         default: return ARM_NV;
22513         }
22514
22515     case CC_SWPmode:
22516       switch (comp_code)
22517         {
22518         case NE: return ARM_NE;
22519         case EQ: return ARM_EQ;
22520         case GE: return ARM_LE;
22521         case GT: return ARM_LT;
22522         case LE: return ARM_GE;
22523         case LT: return ARM_GT;
22524         case GEU: return ARM_LS;
22525         case GTU: return ARM_CC;
22526         case LEU: return ARM_CS;
22527         case LTU: return ARM_HI;
22528         default: return ARM_NV;
22529         }
22530
22531     case CC_Cmode:
22532       switch (comp_code)
22533         {
22534         case LTU: return ARM_CS;
22535         case GEU: return ARM_CC;
22536         default: return ARM_NV;
22537         }
22538
22539     case CC_CZmode:
22540       switch (comp_code)
22541         {
22542         case NE: return ARM_NE;
22543         case EQ: return ARM_EQ;
22544         case GEU: return ARM_CS;
22545         case GTU: return ARM_HI;
22546         case LEU: return ARM_LS;
22547         case LTU: return ARM_CC;
22548         default: return ARM_NV;
22549         }
22550
22551     case CC_NCVmode:
22552       switch (comp_code)
22553         {
22554         case GE: return ARM_GE;
22555         case LT: return ARM_LT;
22556         case GEU: return ARM_CS;
22557         case LTU: return ARM_CC;
22558         default: return ARM_NV;
22559         }
22560
22561     case CCmode:
22562       switch (comp_code)
22563         {
22564         case NE: return ARM_NE;
22565         case EQ: return ARM_EQ;
22566         case GE: return ARM_GE;
22567         case GT: return ARM_GT;
22568         case LE: return ARM_LE;
22569         case LT: return ARM_LT;
22570         case GEU: return ARM_CS;
22571         case GTU: return ARM_HI;
22572         case LEU: return ARM_LS;
22573         case LTU: return ARM_CC;
22574         default: return ARM_NV;
22575         }
22576
22577     default: gcc_unreachable ();
22578     }
22579 }
22580
22581 /* Like maybe_get_arm_condition_code, but never return ARM_NV.  */
22582 static enum arm_cond_code
22583 get_arm_condition_code (rtx comparison)
22584 {
22585   enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22586   gcc_assert (code != ARM_NV);
22587   return code;
22588 }
22589
22590 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22591    instructions.  */
22592 void
22593 thumb2_final_prescan_insn (rtx_insn *insn)
22594 {
22595   rtx_insn *first_insn = insn;
22596   rtx body = PATTERN (insn);
22597   rtx predicate;
22598   enum arm_cond_code code;
22599   int n;
22600   int mask;
22601   int max;
22602
22603   /* max_insns_skipped in the tune was already taken into account in the
22604      cost model of ifcvt pass when generating COND_EXEC insns.  At this stage
22605      just emit the IT blocks as we can.  It does not make sense to split
22606      the IT blocks.  */
22607   max = MAX_INSN_PER_IT_BLOCK;
22608
22609   /* Remove the previous insn from the count of insns to be output.  */
22610   if (arm_condexec_count)
22611       arm_condexec_count--;
22612
22613   /* Nothing to do if we are already inside a conditional block.  */
22614   if (arm_condexec_count)
22615     return;
22616
22617   if (GET_CODE (body) != COND_EXEC)
22618     return;
22619
22620   /* Conditional jumps are implemented directly.  */
22621   if (JUMP_P (insn))
22622     return;
22623
22624   predicate = COND_EXEC_TEST (body);
22625   arm_current_cc = get_arm_condition_code (predicate);
22626
22627   n = get_attr_ce_count (insn);
22628   arm_condexec_count = 1;
22629   arm_condexec_mask = (1 << n) - 1;
22630   arm_condexec_masklen = n;
22631   /* See if subsequent instructions can be combined into the same block.  */
22632   for (;;)
22633     {
22634       insn = next_nonnote_insn (insn);
22635
22636       /* Jumping into the middle of an IT block is illegal, so a label or
22637          barrier terminates the block.  */
22638       if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22639         break;
22640
22641       body = PATTERN (insn);
22642       /* USE and CLOBBER aren't really insns, so just skip them.  */
22643       if (GET_CODE (body) == USE
22644           || GET_CODE (body) == CLOBBER)
22645         continue;
22646
22647       /* ??? Recognize conditional jumps, and combine them with IT blocks.  */
22648       if (GET_CODE (body) != COND_EXEC)
22649         break;
22650       /* Maximum number of conditionally executed instructions in a block.  */
22651       n = get_attr_ce_count (insn);
22652       if (arm_condexec_masklen + n > max)
22653         break;
22654
22655       predicate = COND_EXEC_TEST (body);
22656       code = get_arm_condition_code (predicate);
22657       mask = (1 << n) - 1;
22658       if (arm_current_cc == code)
22659         arm_condexec_mask |= (mask << arm_condexec_masklen);
22660       else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22661         break;
22662
22663       arm_condexec_count++;
22664       arm_condexec_masklen += n;
22665
22666       /* A jump must be the last instruction in a conditional block.  */
22667       if (JUMP_P (insn))
22668         break;
22669     }
22670   /* Restore recog_data (getting the attributes of other insns can
22671      destroy this array, but final.c assumes that it remains intact
22672      across this call).  */
22673   extract_constrain_insn_cached (first_insn);
22674 }
22675
22676 void
22677 arm_final_prescan_insn (rtx_insn *insn)
22678 {
22679   /* BODY will hold the body of INSN.  */
22680   rtx body = PATTERN (insn);
22681
22682   /* This will be 1 if trying to repeat the trick, and things need to be
22683      reversed if it appears to fail.  */
22684   int reverse = 0;
22685
22686   /* If we start with a return insn, we only succeed if we find another one.  */
22687   int seeking_return = 0;
22688   enum rtx_code return_code = UNKNOWN;
22689
22690   /* START_INSN will hold the insn from where we start looking.  This is the
22691      first insn after the following code_label if REVERSE is true.  */
22692   rtx_insn *start_insn = insn;
22693
22694   /* If in state 4, check if the target branch is reached, in order to
22695      change back to state 0.  */
22696   if (arm_ccfsm_state == 4)
22697     {
22698       if (insn == arm_target_insn)
22699         {
22700           arm_target_insn = NULL;
22701           arm_ccfsm_state = 0;
22702         }
22703       return;
22704     }
22705
22706   /* If in state 3, it is possible to repeat the trick, if this insn is an
22707      unconditional branch to a label, and immediately following this branch
22708      is the previous target label which is only used once, and the label this
22709      branch jumps to is not too far off.  */
22710   if (arm_ccfsm_state == 3)
22711     {
22712       if (simplejump_p (insn))
22713         {
22714           start_insn = next_nonnote_insn (start_insn);
22715           if (BARRIER_P (start_insn))
22716             {
22717               /* XXX Isn't this always a barrier?  */
22718               start_insn = next_nonnote_insn (start_insn);
22719             }
22720           if (LABEL_P (start_insn)
22721               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22722               && LABEL_NUSES (start_insn) == 1)
22723             reverse = TRUE;
22724           else
22725             return;
22726         }
22727       else if (ANY_RETURN_P (body))
22728         {
22729           start_insn = next_nonnote_insn (start_insn);
22730           if (BARRIER_P (start_insn))
22731             start_insn = next_nonnote_insn (start_insn);
22732           if (LABEL_P (start_insn)
22733               && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22734               && LABEL_NUSES (start_insn) == 1)
22735             {
22736               reverse = TRUE;
22737               seeking_return = 1;
22738               return_code = GET_CODE (body);
22739             }
22740           else
22741             return;
22742         }
22743       else
22744         return;
22745     }
22746
22747   gcc_assert (!arm_ccfsm_state || reverse);
22748   if (!JUMP_P (insn))
22749     return;
22750
22751   /* This jump might be paralleled with a clobber of the condition codes
22752      the jump should always come first */
22753   if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22754     body = XVECEXP (body, 0, 0);
22755
22756   if (reverse
22757       || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22758           && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22759     {
22760       int insns_skipped;
22761       int fail = FALSE, succeed = FALSE;
22762       /* Flag which part of the IF_THEN_ELSE is the LABEL_REF.  */
22763       int then_not_else = TRUE;
22764       rtx_insn *this_insn = start_insn;
22765       rtx label = 0;
22766
22767       /* Register the insn jumped to.  */
22768       if (reverse)
22769         {
22770           if (!seeking_return)
22771             label = XEXP (SET_SRC (body), 0);
22772         }
22773       else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22774         label = XEXP (XEXP (SET_SRC (body), 1), 0);
22775       else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22776         {
22777           label = XEXP (XEXP (SET_SRC (body), 2), 0);
22778           then_not_else = FALSE;
22779         }
22780       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22781         {
22782           seeking_return = 1;
22783           return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22784         }
22785       else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22786         {
22787           seeking_return = 1;
22788           return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22789           then_not_else = FALSE;
22790         }
22791       else
22792         gcc_unreachable ();
22793
22794       /* See how many insns this branch skips, and what kind of insns.  If all
22795          insns are okay, and the label or unconditional branch to the same
22796          label is not too far away, succeed.  */
22797       for (insns_skipped = 0;
22798            !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22799         {
22800           rtx scanbody;
22801
22802           this_insn = next_nonnote_insn (this_insn);
22803           if (!this_insn)
22804             break;
22805
22806           switch (GET_CODE (this_insn))
22807             {
22808             case CODE_LABEL:
22809               /* Succeed if it is the target label, otherwise fail since
22810                  control falls in from somewhere else.  */
22811               if (this_insn == label)
22812                 {
22813                   arm_ccfsm_state = 1;
22814                   succeed = TRUE;
22815                 }
22816               else
22817                 fail = TRUE;
22818               break;
22819
22820             case BARRIER:
22821               /* Succeed if the following insn is the target label.
22822                  Otherwise fail.
22823                  If return insns are used then the last insn in a function
22824                  will be a barrier.  */
22825               this_insn = next_nonnote_insn (this_insn);
22826               if (this_insn && this_insn == label)
22827                 {
22828                   arm_ccfsm_state = 1;
22829                   succeed = TRUE;
22830                 }
22831               else
22832                 fail = TRUE;
22833               break;
22834
22835             case CALL_INSN:
22836               /* The AAPCS says that conditional calls should not be
22837                  used since they make interworking inefficient (the
22838                  linker can't transform BL<cond> into BLX).  That's
22839                  only a problem if the machine has BLX.  */
22840               if (arm_arch5)
22841                 {
22842                   fail = TRUE;
22843                   break;
22844                 }
22845
22846               /* Succeed if the following insn is the target label, or
22847                  if the following two insns are a barrier and the
22848                  target label.  */
22849               this_insn = next_nonnote_insn (this_insn);
22850               if (this_insn && BARRIER_P (this_insn))
22851                 this_insn = next_nonnote_insn (this_insn);
22852
22853               if (this_insn && this_insn == label
22854                   && insns_skipped < max_insns_skipped)
22855                 {
22856                   arm_ccfsm_state = 1;
22857                   succeed = TRUE;
22858                 }
22859               else
22860                 fail = TRUE;
22861               break;
22862
22863             case JUMP_INSN:
22864               /* If this is an unconditional branch to the same label, succeed.
22865                  If it is to another label, do nothing.  If it is conditional,
22866                  fail.  */
22867               /* XXX Probably, the tests for SET and the PC are
22868                  unnecessary.  */
22869
22870               scanbody = PATTERN (this_insn);
22871               if (GET_CODE (scanbody) == SET
22872                   && GET_CODE (SET_DEST (scanbody)) == PC)
22873                 {
22874                   if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22875                       && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22876                     {
22877                       arm_ccfsm_state = 2;
22878                       succeed = TRUE;
22879                     }
22880                   else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22881                     fail = TRUE;
22882                 }
22883               /* Fail if a conditional return is undesirable (e.g. on a
22884                  StrongARM), but still allow this if optimizing for size.  */
22885               else if (GET_CODE (scanbody) == return_code
22886                        && !use_return_insn (TRUE, NULL)
22887                        && !optimize_size)
22888                 fail = TRUE;
22889               else if (GET_CODE (scanbody) == return_code)
22890                 {
22891                   arm_ccfsm_state = 2;
22892                   succeed = TRUE;
22893                 }
22894               else if (GET_CODE (scanbody) == PARALLEL)
22895                 {
22896                   switch (get_attr_conds (this_insn))
22897                     {
22898                     case CONDS_NOCOND:
22899                       break;
22900                     default:
22901                       fail = TRUE;
22902                       break;
22903                     }
22904                 }
22905               else
22906                 fail = TRUE;    /* Unrecognized jump (e.g. epilogue).  */
22907
22908               break;
22909
22910             case INSN:
22911               /* Instructions using or affecting the condition codes make it
22912                  fail.  */
22913               scanbody = PATTERN (this_insn);
22914               if (!(GET_CODE (scanbody) == SET
22915                     || GET_CODE (scanbody) == PARALLEL)
22916                   || get_attr_conds (this_insn) != CONDS_NOCOND)
22917                 fail = TRUE;
22918               break;
22919
22920             default:
22921               break;
22922             }
22923         }
22924       if (succeed)
22925         {
22926           if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22927             arm_target_label = CODE_LABEL_NUMBER (label);
22928           else
22929             {
22930               gcc_assert (seeking_return || arm_ccfsm_state == 2);
22931
22932               while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22933                 {
22934                   this_insn = next_nonnote_insn (this_insn);
22935                   gcc_assert (!this_insn
22936                               || (!BARRIER_P (this_insn)
22937                                   && !LABEL_P (this_insn)));
22938                 }
22939               if (!this_insn)
22940                 {
22941                   /* Oh, dear! we ran off the end.. give up.  */
22942                   extract_constrain_insn_cached (insn);
22943                   arm_ccfsm_state = 0;
22944                   arm_target_insn = NULL;
22945                   return;
22946                 }
22947               arm_target_insn = this_insn;
22948             }
22949
22950           /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22951              what it was.  */
22952           if (!reverse)
22953             arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22954
22955           if (reverse || then_not_else)
22956             arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22957         }
22958
22959       /* Restore recog_data (getting the attributes of other insns can
22960          destroy this array, but final.c assumes that it remains intact
22961          across this call.  */
22962       extract_constrain_insn_cached (insn);
22963     }
22964 }
22965
22966 /* Output IT instructions.  */
22967 void
22968 thumb2_asm_output_opcode (FILE * stream)
22969 {
22970   char buff[5];
22971   int n;
22972
22973   if (arm_condexec_mask)
22974     {
22975       for (n = 0; n < arm_condexec_masklen; n++)
22976         buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22977       buff[n] = 0;
22978       asm_fprintf(stream, "i%s\t%s\n\t", buff,
22979                   arm_condition_codes[arm_current_cc]);
22980       arm_condexec_mask = 0;
22981     }
22982 }
22983
22984 /* Returns true if REGNO is a valid register
22985    for holding a quantity of type MODE.  */
22986 int
22987 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22988 {
22989   if (GET_MODE_CLASS (mode) == MODE_CC)
22990     return (regno == CC_REGNUM
22991             || (TARGET_HARD_FLOAT && TARGET_VFP
22992                 && regno == VFPCC_REGNUM));
22993
22994   if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22995     return false;
22996
22997   if (TARGET_THUMB1)
22998     /* For the Thumb we only allow values bigger than SImode in
22999        registers 0 - 6, so that there is always a second low
23000        register available to hold the upper part of the value.
23001        We probably we ought to ensure that the register is the
23002        start of an even numbered register pair.  */
23003     return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23004
23005   if (TARGET_HARD_FLOAT && TARGET_VFP
23006       && IS_VFP_REGNUM (regno))
23007     {
23008       if (mode == SFmode || mode == SImode)
23009         return VFP_REGNO_OK_FOR_SINGLE (regno);
23010
23011       if (mode == DFmode)
23012         return VFP_REGNO_OK_FOR_DOUBLE (regno);
23013
23014       /* VFP registers can hold HFmode values, but there is no point in
23015          putting them there unless we have hardware conversion insns. */
23016       if (mode == HFmode)
23017         return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
23018
23019       if (TARGET_NEON)
23020         return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23021                || (VALID_NEON_QREG_MODE (mode)
23022                    && NEON_REGNO_OK_FOR_QUAD (regno))
23023                || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23024                || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23025                || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23026                || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23027                || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23028
23029       return FALSE;
23030     }
23031
23032   if (TARGET_REALLY_IWMMXT)
23033     {
23034       if (IS_IWMMXT_GR_REGNUM (regno))
23035         return mode == SImode;
23036
23037       if (IS_IWMMXT_REGNUM (regno))
23038         return VALID_IWMMXT_REG_MODE (mode);
23039     }
23040
23041   /* We allow almost any value to be stored in the general registers.
23042      Restrict doubleword quantities to even register pairs in ARM state
23043      so that we can use ldrd.  Do not allow very large Neon structure
23044      opaque modes in general registers; they would use too many.  */
23045   if (regno <= LAST_ARM_REGNUM)
23046     {
23047       if (ARM_NUM_REGS (mode) > 4)
23048           return FALSE;
23049
23050       if (TARGET_THUMB2)
23051         return TRUE;
23052
23053       return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23054     }
23055
23056   if (regno == FRAME_POINTER_REGNUM
23057       || regno == ARG_POINTER_REGNUM)
23058     /* We only allow integers in the fake hard registers.  */
23059     return GET_MODE_CLASS (mode) == MODE_INT;
23060
23061   return FALSE;
23062 }
23063
23064 /* Implement MODES_TIEABLE_P.  */
23065
23066 bool
23067 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23068 {
23069   if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23070     return true;
23071
23072   /* We specifically want to allow elements of "structure" modes to
23073      be tieable to the structure.  This more general condition allows
23074      other rarer situations too.  */
23075   if (TARGET_NEON
23076       && (VALID_NEON_DREG_MODE (mode1)
23077           || VALID_NEON_QREG_MODE (mode1)
23078           || VALID_NEON_STRUCT_MODE (mode1))
23079       && (VALID_NEON_DREG_MODE (mode2)
23080           || VALID_NEON_QREG_MODE (mode2)
23081           || VALID_NEON_STRUCT_MODE (mode2)))
23082     return true;
23083
23084   return false;
23085 }
23086
23087 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23088    not used in arm mode.  */
23089
23090 enum reg_class
23091 arm_regno_class (int regno)
23092 {
23093   if (regno == PC_REGNUM)
23094     return NO_REGS;
23095
23096   if (TARGET_THUMB1)
23097     {
23098       if (regno == STACK_POINTER_REGNUM)
23099         return STACK_REG;
23100       if (regno == CC_REGNUM)
23101         return CC_REG;
23102       if (regno < 8)
23103         return LO_REGS;
23104       return HI_REGS;
23105     }
23106
23107   if (TARGET_THUMB2 && regno < 8)
23108     return LO_REGS;
23109
23110   if (   regno <= LAST_ARM_REGNUM
23111       || regno == FRAME_POINTER_REGNUM
23112       || regno == ARG_POINTER_REGNUM)
23113     return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23114
23115   if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23116     return TARGET_THUMB2 ? CC_REG : NO_REGS;
23117
23118   if (IS_VFP_REGNUM (regno))
23119     {
23120       if (regno <= D7_VFP_REGNUM)
23121         return VFP_D0_D7_REGS;
23122       else if (regno <= LAST_LO_VFP_REGNUM)
23123         return VFP_LO_REGS;
23124       else
23125         return VFP_HI_REGS;
23126     }
23127
23128   if (IS_IWMMXT_REGNUM (regno))
23129     return IWMMXT_REGS;
23130
23131   if (IS_IWMMXT_GR_REGNUM (regno))
23132     return IWMMXT_GR_REGS;
23133
23134   return NO_REGS;
23135 }
23136
23137 /* Handle a special case when computing the offset
23138    of an argument from the frame pointer.  */
23139 int
23140 arm_debugger_arg_offset (int value, rtx addr)
23141 {
23142   rtx_insn *insn;
23143
23144   /* We are only interested if dbxout_parms() failed to compute the offset.  */
23145   if (value != 0)
23146     return 0;
23147
23148   /* We can only cope with the case where the address is held in a register.  */
23149   if (!REG_P (addr))
23150     return 0;
23151
23152   /* If we are using the frame pointer to point at the argument, then
23153      an offset of 0 is correct.  */
23154   if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23155     return 0;
23156
23157   /* If we are using the stack pointer to point at the
23158      argument, then an offset of 0 is correct.  */
23159   /* ??? Check this is consistent with thumb2 frame layout.  */
23160   if ((TARGET_THUMB || !frame_pointer_needed)
23161       && REGNO (addr) == SP_REGNUM)
23162     return 0;
23163
23164   /* Oh dear.  The argument is pointed to by a register rather
23165      than being held in a register, or being stored at a known
23166      offset from the frame pointer.  Since GDB only understands
23167      those two kinds of argument we must translate the address
23168      held in the register into an offset from the frame pointer.
23169      We do this by searching through the insns for the function
23170      looking to see where this register gets its value.  If the
23171      register is initialized from the frame pointer plus an offset
23172      then we are in luck and we can continue, otherwise we give up.
23173
23174      This code is exercised by producing debugging information
23175      for a function with arguments like this:
23176
23177            double func (double a, double b, int c, double d) {return d;}
23178
23179      Without this code the stab for parameter 'd' will be set to
23180      an offset of 0 from the frame pointer, rather than 8.  */
23181
23182   /* The if() statement says:
23183
23184      If the insn is a normal instruction
23185      and if the insn is setting the value in a register
23186      and if the register being set is the register holding the address of the argument
23187      and if the address is computing by an addition
23188      that involves adding to a register
23189      which is the frame pointer
23190      a constant integer
23191
23192      then...  */
23193
23194   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23195     {
23196       if (   NONJUMP_INSN_P (insn)
23197           && GET_CODE (PATTERN (insn)) == SET
23198           && REGNO    (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23199           && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23200           && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23201           && REGNO    (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23202           && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23203              )
23204         {
23205           value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23206
23207           break;
23208         }
23209     }
23210
23211   if (value == 0)
23212     {
23213       debug_rtx (addr);
23214       warning (0, "unable to compute real location of stacked parameter");
23215       value = 8; /* XXX magic hack */
23216     }
23217
23218   return value;
23219 }
23220 \f
23221 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23222
23223 static const char *
23224 arm_invalid_parameter_type (const_tree t)
23225 {
23226   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23227     return N_("function parameters cannot have __fp16 type");
23228   return NULL;
23229 }
23230
23231 /* Implement TARGET_INVALID_PARAMETER_TYPE.  */
23232
23233 static const char *
23234 arm_invalid_return_type (const_tree t)
23235 {
23236   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23237     return N_("functions cannot return __fp16 type");
23238   return NULL;
23239 }
23240
23241 /* Implement TARGET_PROMOTED_TYPE.  */
23242
23243 static tree
23244 arm_promoted_type (const_tree t)
23245 {
23246   if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23247     return float_type_node;
23248   return NULL_TREE;
23249 }
23250
23251 /* Implement TARGET_CONVERT_TO_TYPE.
23252    Specifically, this hook implements the peculiarity of the ARM
23253    half-precision floating-point C semantics that requires conversions between
23254    __fp16 to or from double to do an intermediate conversion to float.  */
23255
23256 static tree
23257 arm_convert_to_type (tree type, tree expr)
23258 {
23259   tree fromtype = TREE_TYPE (expr);
23260   if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23261     return NULL_TREE;
23262   if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23263       || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23264     return convert (type, convert (float_type_node, expr));
23265   return NULL_TREE;
23266 }
23267
23268 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23269    This simply adds HFmode as a supported mode; even though we don't
23270    implement arithmetic on this type directly, it's supported by
23271    optabs conversions, much the way the double-word arithmetic is
23272    special-cased in the default hook.  */
23273
23274 static bool
23275 arm_scalar_mode_supported_p (machine_mode mode)
23276 {
23277   if (mode == HFmode)
23278     return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23279   else if (ALL_FIXED_POINT_MODE_P (mode))
23280     return true;
23281   else
23282     return default_scalar_mode_supported_p (mode);
23283 }
23284
23285 /* Emit code to reinterpret one Neon type as another, without altering bits.  */
23286 void
23287 neon_reinterpret (rtx dest, rtx src)
23288 {
23289   emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23290 }
23291
23292 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23293    not to early-clobber SRC registers in the process.
23294
23295    We assume that the operands described by SRC and DEST represent a
23296    decomposed copy of OPERANDS[1] into OPERANDS[0].  COUNT is the
23297    number of components into which the copy has been decomposed.  */
23298 void
23299 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23300 {
23301   unsigned int i;
23302
23303   if (!reg_overlap_mentioned_p (operands[0], operands[1])
23304       || REGNO (operands[0]) < REGNO (operands[1]))
23305     {
23306       for (i = 0; i < count; i++)
23307         {
23308           operands[2 * i] = dest[i];
23309           operands[2 * i + 1] = src[i];
23310         }
23311     }
23312   else
23313     {
23314       for (i = 0; i < count; i++)
23315         {
23316           operands[2 * i] = dest[count - i - 1];
23317           operands[2 * i + 1] = src[count - i - 1];
23318         }
23319     }
23320 }
23321
23322 /* Split operands into moves from op[1] + op[2] into op[0].  */
23323
23324 void
23325 neon_split_vcombine (rtx operands[3])
23326 {
23327   unsigned int dest = REGNO (operands[0]);
23328   unsigned int src1 = REGNO (operands[1]);
23329   unsigned int src2 = REGNO (operands[2]);
23330   machine_mode halfmode = GET_MODE (operands[1]);
23331   unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23332   rtx destlo, desthi;
23333
23334   if (src1 == dest && src2 == dest + halfregs)
23335     {
23336       /* No-op move.  Can't split to nothing; emit something.  */
23337       emit_note (NOTE_INSN_DELETED);
23338       return;
23339     }
23340
23341   /* Preserve register attributes for variable tracking.  */
23342   destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23343   desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23344                                GET_MODE_SIZE (halfmode));
23345
23346   /* Special case of reversed high/low parts.  Use VSWP.  */
23347   if (src2 == dest && src1 == dest + halfregs)
23348     {
23349       rtx x = gen_rtx_SET (VOIDmode, destlo, operands[1]);
23350       rtx y = gen_rtx_SET (VOIDmode, desthi, operands[2]);
23351       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23352       return;
23353     }
23354
23355   if (!reg_overlap_mentioned_p (operands[2], destlo))
23356     {
23357       /* Try to avoid unnecessary moves if part of the result
23358          is in the right place already.  */
23359       if (src1 != dest)
23360         emit_move_insn (destlo, operands[1]);
23361       if (src2 != dest + halfregs)
23362         emit_move_insn (desthi, operands[2]);
23363     }
23364   else
23365     {
23366       if (src2 != dest + halfregs)
23367         emit_move_insn (desthi, operands[2]);
23368       if (src1 != dest)
23369         emit_move_insn (destlo, operands[1]);
23370     }
23371 }
23372 \f
23373 /* Return the number (counting from 0) of
23374    the least significant set bit in MASK.  */
23375
23376 inline static int
23377 number_of_first_bit_set (unsigned mask)
23378 {
23379   return ctz_hwi (mask);
23380 }
23381
23382 /* Like emit_multi_reg_push, but allowing for a different set of
23383    registers to be described as saved.  MASK is the set of registers
23384    to be saved; REAL_REGS is the set of registers to be described as
23385    saved.  If REAL_REGS is 0, only describe the stack adjustment.  */
23386
23387 static rtx_insn *
23388 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23389 {
23390   unsigned long regno;
23391   rtx par[10], tmp, reg;
23392   rtx_insn *insn;
23393   int i, j;
23394
23395   /* Build the parallel of the registers actually being stored.  */
23396   for (i = 0; mask; ++i, mask &= mask - 1)
23397     {
23398       regno = ctz_hwi (mask);
23399       reg = gen_rtx_REG (SImode, regno);
23400
23401       if (i == 0)
23402         tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23403       else
23404         tmp = gen_rtx_USE (VOIDmode, reg);
23405
23406       par[i] = tmp;
23407     }
23408
23409   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23410   tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23411   tmp = gen_frame_mem (BLKmode, tmp);
23412   tmp = gen_rtx_SET (VOIDmode, tmp, par[0]);
23413   par[0] = tmp;
23414
23415   tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23416   insn = emit_insn (tmp);
23417
23418   /* Always build the stack adjustment note for unwind info.  */
23419   tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23420   tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp);
23421   par[0] = tmp;
23422
23423   /* Build the parallel of the registers recorded as saved for unwind.  */
23424   for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23425     {
23426       regno = ctz_hwi (real_regs);
23427       reg = gen_rtx_REG (SImode, regno);
23428
23429       tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23430       tmp = gen_frame_mem (SImode, tmp);
23431       tmp = gen_rtx_SET (VOIDmode, tmp, reg);
23432       RTX_FRAME_RELATED_P (tmp) = 1;
23433       par[j + 1] = tmp;
23434     }
23435
23436   if (j == 0)
23437     tmp = par[0];
23438   else
23439     {
23440       RTX_FRAME_RELATED_P (par[0]) = 1;
23441       tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23442     }
23443
23444   add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23445
23446   return insn;
23447 }
23448
23449 /* Emit code to push or pop registers to or from the stack.  F is the
23450    assembly file.  MASK is the registers to pop.  */
23451 static void
23452 thumb_pop (FILE *f, unsigned long mask)
23453 {
23454   int regno;
23455   int lo_mask = mask & 0xFF;
23456   int pushed_words = 0;
23457
23458   gcc_assert (mask);
23459
23460   if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23461     {
23462       /* Special case.  Do not generate a POP PC statement here, do it in
23463          thumb_exit() */
23464       thumb_exit (f, -1);
23465       return;
23466     }
23467
23468   fprintf (f, "\tpop\t{");
23469
23470   /* Look at the low registers first.  */
23471   for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23472     {
23473       if (lo_mask & 1)
23474         {
23475           asm_fprintf (f, "%r", regno);
23476
23477           if ((lo_mask & ~1) != 0)
23478             fprintf (f, ", ");
23479
23480           pushed_words++;
23481         }
23482     }
23483
23484   if (mask & (1 << PC_REGNUM))
23485     {
23486       /* Catch popping the PC.  */
23487       if (TARGET_INTERWORK || TARGET_BACKTRACE
23488           || crtl->calls_eh_return)
23489         {
23490           /* The PC is never poped directly, instead
23491              it is popped into r3 and then BX is used.  */
23492           fprintf (f, "}\n");
23493
23494           thumb_exit (f, -1);
23495
23496           return;
23497         }
23498       else
23499         {
23500           if (mask & 0xFF)
23501             fprintf (f, ", ");
23502
23503           asm_fprintf (f, "%r", PC_REGNUM);
23504         }
23505     }
23506
23507   fprintf (f, "}\n");
23508 }
23509
23510 /* Generate code to return from a thumb function.
23511    If 'reg_containing_return_addr' is -1, then the return address is
23512    actually on the stack, at the stack pointer.  */
23513 static void
23514 thumb_exit (FILE *f, int reg_containing_return_addr)
23515 {
23516   unsigned regs_available_for_popping;
23517   unsigned regs_to_pop;
23518   int pops_needed;
23519   unsigned available;
23520   unsigned required;
23521   machine_mode mode;
23522   int size;
23523   int restore_a4 = FALSE;
23524
23525   /* Compute the registers we need to pop.  */
23526   regs_to_pop = 0;
23527   pops_needed = 0;
23528
23529   if (reg_containing_return_addr == -1)
23530     {
23531       regs_to_pop |= 1 << LR_REGNUM;
23532       ++pops_needed;
23533     }
23534
23535   if (TARGET_BACKTRACE)
23536     {
23537       /* Restore the (ARM) frame pointer and stack pointer.  */
23538       regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23539       pops_needed += 2;
23540     }
23541
23542   /* If there is nothing to pop then just emit the BX instruction and
23543      return.  */
23544   if (pops_needed == 0)
23545     {
23546       if (crtl->calls_eh_return)
23547         asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23548
23549       asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23550       return;
23551     }
23552   /* Otherwise if we are not supporting interworking and we have not created
23553      a backtrace structure and the function was not entered in ARM mode then
23554      just pop the return address straight into the PC.  */
23555   else if (!TARGET_INTERWORK
23556            && !TARGET_BACKTRACE
23557            && !is_called_in_ARM_mode (current_function_decl)
23558            && !crtl->calls_eh_return)
23559     {
23560       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23561       return;
23562     }
23563
23564   /* Find out how many of the (return) argument registers we can corrupt.  */
23565   regs_available_for_popping = 0;
23566
23567   /* If returning via __builtin_eh_return, the bottom three registers
23568      all contain information needed for the return.  */
23569   if (crtl->calls_eh_return)
23570     size = 12;
23571   else
23572     {
23573       /* If we can deduce the registers used from the function's
23574          return value.  This is more reliable that examining
23575          df_regs_ever_live_p () because that will be set if the register is
23576          ever used in the function, not just if the register is used
23577          to hold a return value.  */
23578
23579       if (crtl->return_rtx != 0)
23580         mode = GET_MODE (crtl->return_rtx);
23581       else
23582         mode = DECL_MODE (DECL_RESULT (current_function_decl));
23583
23584       size = GET_MODE_SIZE (mode);
23585
23586       if (size == 0)
23587         {
23588           /* In a void function we can use any argument register.
23589              In a function that returns a structure on the stack
23590              we can use the second and third argument registers.  */
23591           if (mode == VOIDmode)
23592             regs_available_for_popping =
23593               (1 << ARG_REGISTER (1))
23594               | (1 << ARG_REGISTER (2))
23595               | (1 << ARG_REGISTER (3));
23596           else
23597             regs_available_for_popping =
23598               (1 << ARG_REGISTER (2))
23599               | (1 << ARG_REGISTER (3));
23600         }
23601       else if (size <= 4)
23602         regs_available_for_popping =
23603           (1 << ARG_REGISTER (2))
23604           | (1 << ARG_REGISTER (3));
23605       else if (size <= 8)
23606         regs_available_for_popping =
23607           (1 << ARG_REGISTER (3));
23608     }
23609
23610   /* Match registers to be popped with registers into which we pop them.  */
23611   for (available = regs_available_for_popping,
23612        required  = regs_to_pop;
23613        required != 0 && available != 0;
23614        available &= ~(available & - available),
23615        required  &= ~(required  & - required))
23616     -- pops_needed;
23617
23618   /* If we have any popping registers left over, remove them.  */
23619   if (available > 0)
23620     regs_available_for_popping &= ~available;
23621
23622   /* Otherwise if we need another popping register we can use
23623      the fourth argument register.  */
23624   else if (pops_needed)
23625     {
23626       /* If we have not found any free argument registers and
23627          reg a4 contains the return address, we must move it.  */
23628       if (regs_available_for_popping == 0
23629           && reg_containing_return_addr == LAST_ARG_REGNUM)
23630         {
23631           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23632           reg_containing_return_addr = LR_REGNUM;
23633         }
23634       else if (size > 12)
23635         {
23636           /* Register a4 is being used to hold part of the return value,
23637              but we have dire need of a free, low register.  */
23638           restore_a4 = TRUE;
23639
23640           asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23641         }
23642
23643       if (reg_containing_return_addr != LAST_ARG_REGNUM)
23644         {
23645           /* The fourth argument register is available.  */
23646           regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23647
23648           --pops_needed;
23649         }
23650     }
23651
23652   /* Pop as many registers as we can.  */
23653   thumb_pop (f, regs_available_for_popping);
23654
23655   /* Process the registers we popped.  */
23656   if (reg_containing_return_addr == -1)
23657     {
23658       /* The return address was popped into the lowest numbered register.  */
23659       regs_to_pop &= ~(1 << LR_REGNUM);
23660
23661       reg_containing_return_addr =
23662         number_of_first_bit_set (regs_available_for_popping);
23663
23664       /* Remove this register for the mask of available registers, so that
23665          the return address will not be corrupted by further pops.  */
23666       regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23667     }
23668
23669   /* If we popped other registers then handle them here.  */
23670   if (regs_available_for_popping)
23671     {
23672       int frame_pointer;
23673
23674       /* Work out which register currently contains the frame pointer.  */
23675       frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23676
23677       /* Move it into the correct place.  */
23678       asm_fprintf (f, "\tmov\t%r, %r\n",
23679                    ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23680
23681       /* (Temporarily) remove it from the mask of popped registers.  */
23682       regs_available_for_popping &= ~(1 << frame_pointer);
23683       regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23684
23685       if (regs_available_for_popping)
23686         {
23687           int stack_pointer;
23688
23689           /* We popped the stack pointer as well,
23690              find the register that contains it.  */
23691           stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23692
23693           /* Move it into the stack register.  */
23694           asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23695
23696           /* At this point we have popped all necessary registers, so
23697              do not worry about restoring regs_available_for_popping
23698              to its correct value:
23699
23700              assert (pops_needed == 0)
23701              assert (regs_available_for_popping == (1 << frame_pointer))
23702              assert (regs_to_pop == (1 << STACK_POINTER))  */
23703         }
23704       else
23705         {
23706           /* Since we have just move the popped value into the frame
23707              pointer, the popping register is available for reuse, and
23708              we know that we still have the stack pointer left to pop.  */
23709           regs_available_for_popping |= (1 << frame_pointer);
23710         }
23711     }
23712
23713   /* If we still have registers left on the stack, but we no longer have
23714      any registers into which we can pop them, then we must move the return
23715      address into the link register and make available the register that
23716      contained it.  */
23717   if (regs_available_for_popping == 0 && pops_needed > 0)
23718     {
23719       regs_available_for_popping |= 1 << reg_containing_return_addr;
23720
23721       asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23722                    reg_containing_return_addr);
23723
23724       reg_containing_return_addr = LR_REGNUM;
23725     }
23726
23727   /* If we have registers left on the stack then pop some more.
23728      We know that at most we will want to pop FP and SP.  */
23729   if (pops_needed > 0)
23730     {
23731       int  popped_into;
23732       int  move_to;
23733
23734       thumb_pop (f, regs_available_for_popping);
23735
23736       /* We have popped either FP or SP.
23737          Move whichever one it is into the correct register.  */
23738       popped_into = number_of_first_bit_set (regs_available_for_popping);
23739       move_to     = number_of_first_bit_set (regs_to_pop);
23740
23741       asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23742
23743       regs_to_pop &= ~(1 << move_to);
23744
23745       --pops_needed;
23746     }
23747
23748   /* If we still have not popped everything then we must have only
23749      had one register available to us and we are now popping the SP.  */
23750   if (pops_needed > 0)
23751     {
23752       int  popped_into;
23753
23754       thumb_pop (f, regs_available_for_popping);
23755
23756       popped_into = number_of_first_bit_set (regs_available_for_popping);
23757
23758       asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23759       /*
23760         assert (regs_to_pop == (1 << STACK_POINTER))
23761         assert (pops_needed == 1)
23762       */
23763     }
23764
23765   /* If necessary restore the a4 register.  */
23766   if (restore_a4)
23767     {
23768       if (reg_containing_return_addr != LR_REGNUM)
23769         {
23770           asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23771           reg_containing_return_addr = LR_REGNUM;
23772         }
23773
23774       asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23775     }
23776
23777   if (crtl->calls_eh_return)
23778     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23779
23780   /* Return to caller.  */
23781   asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23782 }
23783 \f
23784 /* Scan INSN just before assembler is output for it.
23785    For Thumb-1, we track the status of the condition codes; this
23786    information is used in the cbranchsi4_insn pattern.  */
23787 void
23788 thumb1_final_prescan_insn (rtx_insn *insn)
23789 {
23790   if (flag_print_asm_name)
23791     asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23792                  INSN_ADDRESSES (INSN_UID (insn)));
23793   /* Don't overwrite the previous setter when we get to a cbranch.  */
23794   if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23795     {
23796       enum attr_conds conds;
23797
23798       if (cfun->machine->thumb1_cc_insn)
23799         {
23800           if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23801               || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23802             CC_STATUS_INIT;
23803         }
23804       conds = get_attr_conds (insn);
23805       if (conds == CONDS_SET)
23806         {
23807           rtx set = single_set (insn);
23808           cfun->machine->thumb1_cc_insn = insn;
23809           cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23810           cfun->machine->thumb1_cc_op1 = const0_rtx;
23811           cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23812           if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23813             {
23814               rtx src1 = XEXP (SET_SRC (set), 1);
23815               if (src1 == const0_rtx)
23816                 cfun->machine->thumb1_cc_mode = CCmode;
23817             }
23818           else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23819             {
23820               /* Record the src register operand instead of dest because
23821                  cprop_hardreg pass propagates src.  */
23822               cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23823             }
23824         }
23825       else if (conds != CONDS_NOCOND)
23826         cfun->machine->thumb1_cc_insn = NULL_RTX;
23827     }
23828
23829     /* Check if unexpected far jump is used.  */
23830     if (cfun->machine->lr_save_eliminated
23831         && get_attr_far_jump (insn) == FAR_JUMP_YES)
23832       internal_error("Unexpected thumb1 far jump");
23833 }
23834
23835 int
23836 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23837 {
23838   unsigned HOST_WIDE_INT mask = 0xff;
23839   int i;
23840
23841   val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23842   if (val == 0) /* XXX */
23843     return 0;
23844
23845   for (i = 0; i < 25; i++)
23846     if ((val & (mask << i)) == val)
23847       return 1;
23848
23849   return 0;
23850 }
23851
23852 /* Returns nonzero if the current function contains,
23853    or might contain a far jump.  */
23854 static int
23855 thumb_far_jump_used_p (void)
23856 {
23857   rtx_insn *insn;
23858   bool far_jump = false;
23859   unsigned int func_size = 0;
23860
23861   /* This test is only important for leaf functions.  */
23862   /* assert (!leaf_function_p ()); */
23863
23864   /* If we have already decided that far jumps may be used,
23865      do not bother checking again, and always return true even if
23866      it turns out that they are not being used.  Once we have made
23867      the decision that far jumps are present (and that hence the link
23868      register will be pushed onto the stack) we cannot go back on it.  */
23869   if (cfun->machine->far_jump_used)
23870     return 1;
23871
23872   /* If this function is not being called from the prologue/epilogue
23873      generation code then it must be being called from the
23874      INITIAL_ELIMINATION_OFFSET macro.  */
23875   if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23876     {
23877       /* In this case we know that we are being asked about the elimination
23878          of the arg pointer register.  If that register is not being used,
23879          then there are no arguments on the stack, and we do not have to
23880          worry that a far jump might force the prologue to push the link
23881          register, changing the stack offsets.  In this case we can just
23882          return false, since the presence of far jumps in the function will
23883          not affect stack offsets.
23884
23885          If the arg pointer is live (or if it was live, but has now been
23886          eliminated and so set to dead) then we do have to test to see if
23887          the function might contain a far jump.  This test can lead to some
23888          false negatives, since before reload is completed, then length of
23889          branch instructions is not known, so gcc defaults to returning their
23890          longest length, which in turn sets the far jump attribute to true.
23891
23892          A false negative will not result in bad code being generated, but it
23893          will result in a needless push and pop of the link register.  We
23894          hope that this does not occur too often.
23895
23896          If we need doubleword stack alignment this could affect the other
23897          elimination offsets so we can't risk getting it wrong.  */
23898       if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23899         cfun->machine->arg_pointer_live = 1;
23900       else if (!cfun->machine->arg_pointer_live)
23901         return 0;
23902     }
23903
23904   /* We should not change far_jump_used during or after reload, as there is
23905      no chance to change stack frame layout.  */
23906   if (reload_in_progress || reload_completed)
23907     return 0;
23908
23909   /* Check to see if the function contains a branch
23910      insn with the far jump attribute set.  */
23911   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23912     {
23913       if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23914         {
23915           far_jump = true;
23916         }
23917       func_size += get_attr_length (insn);
23918     }
23919
23920   /* Attribute far_jump will always be true for thumb1 before
23921      shorten_branch pass.  So checking far_jump attribute before
23922      shorten_branch isn't much useful.
23923
23924      Following heuristic tries to estimate more accurately if a far jump
23925      may finally be used.  The heuristic is very conservative as there is
23926      no chance to roll-back the decision of not to use far jump.
23927
23928      Thumb1 long branch offset is -2048 to 2046.  The worst case is each
23929      2-byte insn is associated with a 4 byte constant pool.  Using
23930      function size 2048/3 as the threshold is conservative enough.  */
23931   if (far_jump)
23932     {
23933       if ((func_size * 3) >= 2048)
23934         {
23935           /* Record the fact that we have decided that
23936              the function does use far jumps.  */
23937           cfun->machine->far_jump_used = 1;
23938           return 1;
23939         }
23940     }
23941
23942   return 0;
23943 }
23944
23945 /* Return nonzero if FUNC must be entered in ARM mode.  */
23946 int
23947 is_called_in_ARM_mode (tree func)
23948 {
23949   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23950
23951   /* Ignore the problem about functions whose address is taken.  */
23952   if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23953     return TRUE;
23954
23955 #ifdef ARM_PE
23956   return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23957 #else
23958   return FALSE;
23959 #endif
23960 }
23961
23962 /* Given the stack offsets and register mask in OFFSETS, decide how
23963    many additional registers to push instead of subtracting a constant
23964    from SP.  For epilogues the principle is the same except we use pop.
23965    FOR_PROLOGUE indicates which we're generating.  */
23966 static int
23967 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23968 {
23969   HOST_WIDE_INT amount;
23970   unsigned long live_regs_mask = offsets->saved_regs_mask;
23971   /* Extract a mask of the ones we can give to the Thumb's push/pop
23972      instruction.  */
23973   unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23974   /* Then count how many other high registers will need to be pushed.  */
23975   unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23976   int n_free, reg_base, size;
23977
23978   if (!for_prologue && frame_pointer_needed)
23979     amount = offsets->locals_base - offsets->saved_regs;
23980   else
23981     amount = offsets->outgoing_args - offsets->saved_regs;
23982
23983   /* If the stack frame size is 512 exactly, we can save one load
23984      instruction, which should make this a win even when optimizing
23985      for speed.  */
23986   if (!optimize_size && amount != 512)
23987     return 0;
23988
23989   /* Can't do this if there are high registers to push.  */
23990   if (high_regs_pushed != 0)
23991     return 0;
23992
23993   /* Shouldn't do it in the prologue if no registers would normally
23994      be pushed at all.  In the epilogue, also allow it if we'll have
23995      a pop insn for the PC.  */
23996   if  (l_mask == 0
23997        && (for_prologue
23998            || TARGET_BACKTRACE
23999            || (live_regs_mask & 1 << LR_REGNUM) == 0
24000            || TARGET_INTERWORK
24001            || crtl->args.pretend_args_size != 0))
24002     return 0;
24003
24004   /* Don't do this if thumb_expand_prologue wants to emit instructions
24005      between the push and the stack frame allocation.  */
24006   if (for_prologue
24007       && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24008           || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24009     return 0;
24010
24011   reg_base = 0;
24012   n_free = 0;
24013   if (!for_prologue)
24014     {
24015       size = arm_size_return_regs ();
24016       reg_base = ARM_NUM_INTS (size);
24017       live_regs_mask >>= reg_base;
24018     }
24019
24020   while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24021          && (for_prologue || call_used_regs[reg_base + n_free]))
24022     {
24023       live_regs_mask >>= 1;
24024       n_free++;
24025     }
24026
24027   if (n_free == 0)
24028     return 0;
24029   gcc_assert (amount / 4 * 4 == amount);
24030
24031   if (amount >= 512 && (amount - n_free * 4) < 512)
24032     return (amount - 508) / 4;
24033   if (amount <= n_free * 4)
24034     return amount / 4;
24035   return 0;
24036 }
24037
24038 /* The bits which aren't usefully expanded as rtl.  */
24039 const char *
24040 thumb1_unexpanded_epilogue (void)
24041 {
24042   arm_stack_offsets *offsets;
24043   int regno;
24044   unsigned long live_regs_mask = 0;
24045   int high_regs_pushed = 0;
24046   int extra_pop;
24047   int had_to_push_lr;
24048   int size;
24049
24050   if (cfun->machine->return_used_this_function != 0)
24051     return "";
24052
24053   if (IS_NAKED (arm_current_func_type ()))
24054     return "";
24055
24056   offsets = arm_get_frame_offsets ();
24057   live_regs_mask = offsets->saved_regs_mask;
24058   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24059
24060   /* If we can deduce the registers used from the function's return value.
24061      This is more reliable that examining df_regs_ever_live_p () because that
24062      will be set if the register is ever used in the function, not just if
24063      the register is used to hold a return value.  */
24064   size = arm_size_return_regs ();
24065
24066   extra_pop = thumb1_extra_regs_pushed (offsets, false);
24067   if (extra_pop > 0)
24068     {
24069       unsigned long extra_mask = (1 << extra_pop) - 1;
24070       live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24071     }
24072
24073   /* The prolog may have pushed some high registers to use as
24074      work registers.  e.g. the testsuite file:
24075      gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24076      compiles to produce:
24077         push    {r4, r5, r6, r7, lr}
24078         mov     r7, r9
24079         mov     r6, r8
24080         push    {r6, r7}
24081      as part of the prolog.  We have to undo that pushing here.  */
24082
24083   if (high_regs_pushed)
24084     {
24085       unsigned long mask = live_regs_mask & 0xff;
24086       int next_hi_reg;
24087
24088       /* The available low registers depend on the size of the value we are
24089          returning.  */
24090       if (size <= 12)
24091         mask |=  1 << 3;
24092       if (size <= 8)
24093         mask |= 1 << 2;
24094
24095       if (mask == 0)
24096         /* Oh dear!  We have no low registers into which we can pop
24097            high registers!  */
24098         internal_error
24099           ("no low registers available for popping high registers");
24100
24101       for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24102         if (live_regs_mask & (1 << next_hi_reg))
24103           break;
24104
24105       while (high_regs_pushed)
24106         {
24107           /* Find lo register(s) into which the high register(s) can
24108              be popped.  */
24109           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24110             {
24111               if (mask & (1 << regno))
24112                 high_regs_pushed--;
24113               if (high_regs_pushed == 0)
24114                 break;
24115             }
24116
24117           mask &= (2 << regno) - 1;     /* A noop if regno == 8 */
24118
24119           /* Pop the values into the low register(s).  */
24120           thumb_pop (asm_out_file, mask);
24121
24122           /* Move the value(s) into the high registers.  */
24123           for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24124             {
24125               if (mask & (1 << regno))
24126                 {
24127                   asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24128                                regno);
24129
24130                   for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24131                     if (live_regs_mask & (1 << next_hi_reg))
24132                       break;
24133                 }
24134             }
24135         }
24136       live_regs_mask &= ~0x0f00;
24137     }
24138
24139   had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24140   live_regs_mask &= 0xff;
24141
24142   if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24143     {
24144       /* Pop the return address into the PC.  */
24145       if (had_to_push_lr)
24146         live_regs_mask |= 1 << PC_REGNUM;
24147
24148       /* Either no argument registers were pushed or a backtrace
24149          structure was created which includes an adjusted stack
24150          pointer, so just pop everything.  */
24151       if (live_regs_mask)
24152         thumb_pop (asm_out_file, live_regs_mask);
24153
24154       /* We have either just popped the return address into the
24155          PC or it is was kept in LR for the entire function.
24156          Note that thumb_pop has already called thumb_exit if the
24157          PC was in the list.  */
24158       if (!had_to_push_lr)
24159         thumb_exit (asm_out_file, LR_REGNUM);
24160     }
24161   else
24162     {
24163       /* Pop everything but the return address.  */
24164       if (live_regs_mask)
24165         thumb_pop (asm_out_file, live_regs_mask);
24166
24167       if (had_to_push_lr)
24168         {
24169           if (size > 12)
24170             {
24171               /* We have no free low regs, so save one.  */
24172               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24173                            LAST_ARG_REGNUM);
24174             }
24175
24176           /* Get the return address into a temporary register.  */
24177           thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24178
24179           if (size > 12)
24180             {
24181               /* Move the return address to lr.  */
24182               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24183                            LAST_ARG_REGNUM);
24184               /* Restore the low register.  */
24185               asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24186                            IP_REGNUM);
24187               regno = LR_REGNUM;
24188             }
24189           else
24190             regno = LAST_ARG_REGNUM;
24191         }
24192       else
24193         regno = LR_REGNUM;
24194
24195       /* Remove the argument registers that were pushed onto the stack.  */
24196       asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24197                    SP_REGNUM, SP_REGNUM,
24198                    crtl->args.pretend_args_size);
24199
24200       thumb_exit (asm_out_file, regno);
24201     }
24202
24203   return "";
24204 }
24205
24206 /* Functions to save and restore machine-specific function data.  */
24207 static struct machine_function *
24208 arm_init_machine_status (void)
24209 {
24210   struct machine_function *machine;
24211   machine = ggc_cleared_alloc<machine_function> ();
24212
24213 #if ARM_FT_UNKNOWN != 0
24214   machine->func_type = ARM_FT_UNKNOWN;
24215 #endif
24216   return machine;
24217 }
24218
24219 /* Return an RTX indicating where the return address to the
24220    calling function can be found.  */
24221 rtx
24222 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24223 {
24224   if (count != 0)
24225     return NULL_RTX;
24226
24227   return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24228 }
24229
24230 /* Do anything needed before RTL is emitted for each function.  */
24231 void
24232 arm_init_expanders (void)
24233 {
24234   /* Arrange to initialize and mark the machine per-function status.  */
24235   init_machine_status = arm_init_machine_status;
24236
24237   /* This is to stop the combine pass optimizing away the alignment
24238      adjustment of va_arg.  */
24239   /* ??? It is claimed that this should not be necessary.  */
24240   if (cfun)
24241     mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24242 }
24243
24244
24245 /* Like arm_compute_initial_elimination offset.  Simpler because there
24246    isn't an ABI specified frame pointer for Thumb.  Instead, we set it
24247    to point at the base of the local variables after static stack
24248    space for a function has been allocated.  */
24249
24250 HOST_WIDE_INT
24251 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24252 {
24253   arm_stack_offsets *offsets;
24254
24255   offsets = arm_get_frame_offsets ();
24256
24257   switch (from)
24258     {
24259     case ARG_POINTER_REGNUM:
24260       switch (to)
24261         {
24262         case STACK_POINTER_REGNUM:
24263           return offsets->outgoing_args - offsets->saved_args;
24264
24265         case FRAME_POINTER_REGNUM:
24266           return offsets->soft_frame - offsets->saved_args;
24267
24268         case ARM_HARD_FRAME_POINTER_REGNUM:
24269           return offsets->saved_regs - offsets->saved_args;
24270
24271         case THUMB_HARD_FRAME_POINTER_REGNUM:
24272           return offsets->locals_base - offsets->saved_args;
24273
24274         default:
24275           gcc_unreachable ();
24276         }
24277       break;
24278
24279     case FRAME_POINTER_REGNUM:
24280       switch (to)
24281         {
24282         case STACK_POINTER_REGNUM:
24283           return offsets->outgoing_args - offsets->soft_frame;
24284
24285         case ARM_HARD_FRAME_POINTER_REGNUM:
24286           return offsets->saved_regs - offsets->soft_frame;
24287
24288         case THUMB_HARD_FRAME_POINTER_REGNUM:
24289           return offsets->locals_base - offsets->soft_frame;
24290
24291         default:
24292           gcc_unreachable ();
24293         }
24294       break;
24295
24296     default:
24297       gcc_unreachable ();
24298     }
24299 }
24300
24301 /* Generate the function's prologue.  */
24302
24303 void
24304 thumb1_expand_prologue (void)
24305 {
24306   rtx_insn *insn;
24307
24308   HOST_WIDE_INT amount;
24309   arm_stack_offsets *offsets;
24310   unsigned long func_type;
24311   int regno;
24312   unsigned long live_regs_mask;
24313   unsigned long l_mask;
24314   unsigned high_regs_pushed = 0;
24315
24316   func_type = arm_current_func_type ();
24317
24318   /* Naked functions don't have prologues.  */
24319   if (IS_NAKED (func_type))
24320     return;
24321
24322   if (IS_INTERRUPT (func_type))
24323     {
24324       error ("interrupt Service Routines cannot be coded in Thumb mode");
24325       return;
24326     }
24327
24328   if (is_called_in_ARM_mode (current_function_decl))
24329     emit_insn (gen_prologue_thumb1_interwork ());
24330
24331   offsets = arm_get_frame_offsets ();
24332   live_regs_mask = offsets->saved_regs_mask;
24333
24334   /* Extract a mask of the ones we can give to the Thumb's push instruction.  */
24335   l_mask = live_regs_mask & 0x40ff;
24336   /* Then count how many other high registers will need to be pushed.  */
24337   high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24338
24339   if (crtl->args.pretend_args_size)
24340     {
24341       rtx x = GEN_INT (-crtl->args.pretend_args_size);
24342
24343       if (cfun->machine->uses_anonymous_args)
24344         {
24345           int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24346           unsigned long mask;
24347
24348           mask = 1ul << (LAST_ARG_REGNUM + 1);
24349           mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24350
24351           insn = thumb1_emit_multi_reg_push (mask, 0);
24352         }
24353       else
24354         {
24355           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24356                                         stack_pointer_rtx, x));
24357         }
24358       RTX_FRAME_RELATED_P (insn) = 1;
24359     }
24360
24361   if (TARGET_BACKTRACE)
24362     {
24363       HOST_WIDE_INT offset = 0;
24364       unsigned work_register;
24365       rtx work_reg, x, arm_hfp_rtx;
24366
24367       /* We have been asked to create a stack backtrace structure.
24368          The code looks like this:
24369
24370          0   .align 2
24371          0   func:
24372          0     sub   SP, #16         Reserve space for 4 registers.
24373          2     push  {R7}            Push low registers.
24374          4     add   R7, SP, #20     Get the stack pointer before the push.
24375          6     str   R7, [SP, #8]    Store the stack pointer
24376                                         (before reserving the space).
24377          8     mov   R7, PC          Get hold of the start of this code + 12.
24378         10     str   R7, [SP, #16]   Store it.
24379         12     mov   R7, FP          Get hold of the current frame pointer.
24380         14     str   R7, [SP, #4]    Store it.
24381         16     mov   R7, LR          Get hold of the current return address.
24382         18     str   R7, [SP, #12]   Store it.
24383         20     add   R7, SP, #16     Point at the start of the
24384                                         backtrace structure.
24385         22     mov   FP, R7          Put this value into the frame pointer.  */
24386
24387       work_register = thumb_find_work_register (live_regs_mask);
24388       work_reg = gen_rtx_REG (SImode, work_register);
24389       arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24390
24391       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24392                                     stack_pointer_rtx, GEN_INT (-16)));
24393       RTX_FRAME_RELATED_P (insn) = 1;
24394
24395       if (l_mask)
24396         {
24397           insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24398           RTX_FRAME_RELATED_P (insn) = 1;
24399
24400           offset = bit_count (l_mask) * UNITS_PER_WORD;
24401         }
24402
24403       x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24404       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24405
24406       x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24407       x = gen_frame_mem (SImode, x);
24408       emit_move_insn (x, work_reg);
24409
24410       /* Make sure that the instruction fetching the PC is in the right place
24411          to calculate "start of backtrace creation code + 12".  */
24412       /* ??? The stores using the common WORK_REG ought to be enough to
24413          prevent the scheduler from doing anything weird.  Failing that
24414          we could always move all of the following into an UNSPEC_VOLATILE.  */
24415       if (l_mask)
24416         {
24417           x = gen_rtx_REG (SImode, PC_REGNUM);
24418           emit_move_insn (work_reg, x);
24419
24420           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24421           x = gen_frame_mem (SImode, x);
24422           emit_move_insn (x, work_reg);
24423
24424           emit_move_insn (work_reg, arm_hfp_rtx);
24425
24426           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24427           x = gen_frame_mem (SImode, x);
24428           emit_move_insn (x, work_reg);
24429         }
24430       else
24431         {
24432           emit_move_insn (work_reg, arm_hfp_rtx);
24433
24434           x = plus_constant (Pmode, stack_pointer_rtx, offset);
24435           x = gen_frame_mem (SImode, x);
24436           emit_move_insn (x, work_reg);
24437
24438           x = gen_rtx_REG (SImode, PC_REGNUM);
24439           emit_move_insn (work_reg, x);
24440
24441           x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24442           x = gen_frame_mem (SImode, x);
24443           emit_move_insn (x, work_reg);
24444         }
24445
24446       x = gen_rtx_REG (SImode, LR_REGNUM);
24447       emit_move_insn (work_reg, x);
24448
24449       x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24450       x = gen_frame_mem (SImode, x);
24451       emit_move_insn (x, work_reg);
24452
24453       x = GEN_INT (offset + 12);
24454       emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24455
24456       emit_move_insn (arm_hfp_rtx, work_reg);
24457     }
24458   /* Optimization:  If we are not pushing any low registers but we are going
24459      to push some high registers then delay our first push.  This will just
24460      be a push of LR and we can combine it with the push of the first high
24461      register.  */
24462   else if ((l_mask & 0xff) != 0
24463            || (high_regs_pushed == 0 && l_mask))
24464     {
24465       unsigned long mask = l_mask;
24466       mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24467       insn = thumb1_emit_multi_reg_push (mask, mask);
24468       RTX_FRAME_RELATED_P (insn) = 1;
24469     }
24470
24471   if (high_regs_pushed)
24472     {
24473       unsigned pushable_regs;
24474       unsigned next_hi_reg;
24475       unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24476                                                  : crtl->args.info.nregs;
24477       unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24478
24479       for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24480         if (live_regs_mask & (1 << next_hi_reg))
24481           break;
24482
24483       /* Here we need to mask out registers used for passing arguments
24484          even if they can be pushed.  This is to avoid using them to stash the high
24485          registers.  Such kind of stash may clobber the use of arguments.  */
24486       pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24487
24488       if (pushable_regs == 0)
24489         pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24490
24491       while (high_regs_pushed > 0)
24492         {
24493           unsigned long real_regs_mask = 0;
24494
24495           for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24496             {
24497               if (pushable_regs & (1 << regno))
24498                 {
24499                   emit_move_insn (gen_rtx_REG (SImode, regno),
24500                                   gen_rtx_REG (SImode, next_hi_reg));
24501
24502                   high_regs_pushed --;
24503                   real_regs_mask |= (1 << next_hi_reg);
24504
24505                   if (high_regs_pushed)
24506                     {
24507                       for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24508                            next_hi_reg --)
24509                         if (live_regs_mask & (1 << next_hi_reg))
24510                           break;
24511                     }
24512                   else
24513                     {
24514                       pushable_regs &= ~((1 << regno) - 1);
24515                       break;
24516                     }
24517                 }
24518             }
24519
24520           /* If we had to find a work register and we have not yet
24521              saved the LR then add it to the list of regs to push.  */
24522           if (l_mask == (1 << LR_REGNUM))
24523             {
24524               pushable_regs |= l_mask;
24525               real_regs_mask |= l_mask;
24526               l_mask = 0;
24527             }
24528
24529           insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24530           RTX_FRAME_RELATED_P (insn) = 1;
24531         }
24532     }
24533
24534   /* Load the pic register before setting the frame pointer,
24535      so we can use r7 as a temporary work register.  */
24536   if (flag_pic && arm_pic_register != INVALID_REGNUM)
24537     arm_load_pic_register (live_regs_mask);
24538
24539   if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24540     emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24541                     stack_pointer_rtx);
24542
24543   if (flag_stack_usage_info)
24544     current_function_static_stack_size
24545       = offsets->outgoing_args - offsets->saved_args;
24546
24547   amount = offsets->outgoing_args - offsets->saved_regs;
24548   amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24549   if (amount)
24550     {
24551       if (amount < 512)
24552         {
24553           insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24554                                         GEN_INT (- amount)));
24555           RTX_FRAME_RELATED_P (insn) = 1;
24556         }
24557       else
24558         {
24559           rtx reg, dwarf;
24560
24561           /* The stack decrement is too big for an immediate value in a single
24562              insn.  In theory we could issue multiple subtracts, but after
24563              three of them it becomes more space efficient to place the full
24564              value in the constant pool and load into a register.  (Also the
24565              ARM debugger really likes to see only one stack decrement per
24566              function).  So instead we look for a scratch register into which
24567              we can load the decrement, and then we subtract this from the
24568              stack pointer.  Unfortunately on the thumb the only available
24569              scratch registers are the argument registers, and we cannot use
24570              these as they may hold arguments to the function.  Instead we
24571              attempt to locate a call preserved register which is used by this
24572              function.  If we can find one, then we know that it will have
24573              been pushed at the start of the prologue and so we can corrupt
24574              it now.  */
24575           for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24576             if (live_regs_mask & (1 << regno))
24577               break;
24578
24579           gcc_assert(regno <= LAST_LO_REGNUM);
24580
24581           reg = gen_rtx_REG (SImode, regno);
24582
24583           emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24584
24585           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24586                                         stack_pointer_rtx, reg));
24587
24588           dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24589                                plus_constant (Pmode, stack_pointer_rtx,
24590                                               -amount));
24591           add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24592           RTX_FRAME_RELATED_P (insn) = 1;
24593         }
24594     }
24595
24596   if (frame_pointer_needed)
24597     thumb_set_frame_pointer (offsets);
24598
24599   /* If we are profiling, make sure no instructions are scheduled before
24600      the call to mcount.  Similarly if the user has requested no
24601      scheduling in the prolog.  Similarly if we want non-call exceptions
24602      using the EABI unwinder, to prevent faulting instructions from being
24603      swapped with a stack adjustment.  */
24604   if (crtl->profile || !TARGET_SCHED_PROLOG
24605       || (arm_except_unwind_info (&global_options) == UI_TARGET
24606           && cfun->can_throw_non_call_exceptions))
24607     emit_insn (gen_blockage ());
24608
24609   cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24610   if (live_regs_mask & 0xff)
24611     cfun->machine->lr_save_eliminated = 0;
24612 }
24613
24614 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24615    POP instruction can be generated.  LR should be replaced by PC.  All
24616    the checks required are already done by  USE_RETURN_INSN ().  Hence,
24617    all we really need to check here is if single register is to be
24618    returned, or multiple register return.  */
24619 void
24620 thumb2_expand_return (bool simple_return)
24621 {
24622   int i, num_regs;
24623   unsigned long saved_regs_mask;
24624   arm_stack_offsets *offsets;
24625
24626   offsets = arm_get_frame_offsets ();
24627   saved_regs_mask = offsets->saved_regs_mask;
24628
24629   for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24630     if (saved_regs_mask & (1 << i))
24631       num_regs++;
24632
24633   if (!simple_return && saved_regs_mask)
24634     {
24635       if (num_regs == 1)
24636         {
24637           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24638           rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24639           rtx addr = gen_rtx_MEM (SImode,
24640                                   gen_rtx_POST_INC (SImode,
24641                                                     stack_pointer_rtx));
24642           set_mem_alias_set (addr, get_frame_alias_set ());
24643           XVECEXP (par, 0, 0) = ret_rtx;
24644           XVECEXP (par, 0, 1) = gen_rtx_SET (SImode, reg, addr);
24645           RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24646           emit_jump_insn (par);
24647         }
24648       else
24649         {
24650           saved_regs_mask &= ~ (1 << LR_REGNUM);
24651           saved_regs_mask |=   (1 << PC_REGNUM);
24652           arm_emit_multi_reg_pop (saved_regs_mask);
24653         }
24654     }
24655   else
24656     {
24657       emit_jump_insn (simple_return_rtx);
24658     }
24659 }
24660
24661 void
24662 thumb1_expand_epilogue (void)
24663 {
24664   HOST_WIDE_INT amount;
24665   arm_stack_offsets *offsets;
24666   int regno;
24667
24668   /* Naked functions don't have prologues.  */
24669   if (IS_NAKED (arm_current_func_type ()))
24670     return;
24671
24672   offsets = arm_get_frame_offsets ();
24673   amount = offsets->outgoing_args - offsets->saved_regs;
24674
24675   if (frame_pointer_needed)
24676     {
24677       emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24678       amount = offsets->locals_base - offsets->saved_regs;
24679     }
24680   amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24681
24682   gcc_assert (amount >= 0);
24683   if (amount)
24684     {
24685       emit_insn (gen_blockage ());
24686
24687       if (amount < 512)
24688         emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24689                                GEN_INT (amount)));
24690       else
24691         {
24692           /* r3 is always free in the epilogue.  */
24693           rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24694
24695           emit_insn (gen_movsi (reg, GEN_INT (amount)));
24696           emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24697         }
24698     }
24699
24700   /* Emit a USE (stack_pointer_rtx), so that
24701      the stack adjustment will not be deleted.  */
24702   emit_insn (gen_force_register_use (stack_pointer_rtx));
24703
24704   if (crtl->profile || !TARGET_SCHED_PROLOG)
24705     emit_insn (gen_blockage ());
24706
24707   /* Emit a clobber for each insn that will be restored in the epilogue,
24708      so that flow2 will get register lifetimes correct.  */
24709   for (regno = 0; regno < 13; regno++)
24710     if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24711       emit_clobber (gen_rtx_REG (SImode, regno));
24712
24713   if (! df_regs_ever_live_p (LR_REGNUM))
24714     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24715 }
24716
24717 /* Epilogue code for APCS frame.  */
24718 static void
24719 arm_expand_epilogue_apcs_frame (bool really_return)
24720 {
24721   unsigned long func_type;
24722   unsigned long saved_regs_mask;
24723   int num_regs = 0;
24724   int i;
24725   int floats_from_frame = 0;
24726   arm_stack_offsets *offsets;
24727
24728   gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24729   func_type = arm_current_func_type ();
24730
24731   /* Get frame offsets for ARM.  */
24732   offsets = arm_get_frame_offsets ();
24733   saved_regs_mask = offsets->saved_regs_mask;
24734
24735   /* Find the offset of the floating-point save area in the frame.  */
24736   floats_from_frame
24737     = (offsets->saved_args
24738        + arm_compute_static_chain_stack_bytes ()
24739        - offsets->frame);
24740
24741   /* Compute how many core registers saved and how far away the floats are.  */
24742   for (i = 0; i <= LAST_ARM_REGNUM; i++)
24743     if (saved_regs_mask & (1 << i))
24744       {
24745         num_regs++;
24746         floats_from_frame += 4;
24747       }
24748
24749   if (TARGET_HARD_FLOAT && TARGET_VFP)
24750     {
24751       int start_reg;
24752       rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24753
24754       /* The offset is from IP_REGNUM.  */
24755       int saved_size = arm_get_vfp_saved_size ();
24756       if (saved_size > 0)
24757         {
24758           rtx_insn *insn;
24759           floats_from_frame += saved_size;
24760           insn = emit_insn (gen_addsi3 (ip_rtx,
24761                                         hard_frame_pointer_rtx,
24762                                         GEN_INT (-floats_from_frame)));
24763           arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24764                                        ip_rtx, hard_frame_pointer_rtx);
24765         }
24766
24767       /* Generate VFP register multi-pop.  */
24768       start_reg = FIRST_VFP_REGNUM;
24769
24770       for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24771         /* Look for a case where a reg does not need restoring.  */
24772         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24773             && (!df_regs_ever_live_p (i + 1)
24774                 || call_used_regs[i + 1]))
24775           {
24776             if (start_reg != i)
24777               arm_emit_vfp_multi_reg_pop (start_reg,
24778                                           (i - start_reg) / 2,
24779                                           gen_rtx_REG (SImode,
24780                                                        IP_REGNUM));
24781             start_reg = i + 2;
24782           }
24783
24784       /* Restore the remaining regs that we have discovered (or possibly
24785          even all of them, if the conditional in the for loop never
24786          fired).  */
24787       if (start_reg != i)
24788         arm_emit_vfp_multi_reg_pop (start_reg,
24789                                     (i - start_reg) / 2,
24790                                     gen_rtx_REG (SImode, IP_REGNUM));
24791     }
24792
24793   if (TARGET_IWMMXT)
24794     {
24795       /* The frame pointer is guaranteed to be non-double-word aligned, as
24796          it is set to double-word-aligned old_stack_pointer - 4.  */
24797       rtx_insn *insn;
24798       int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24799
24800       for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24801         if (df_regs_ever_live_p (i) && !call_used_regs[i])
24802           {
24803             rtx addr = gen_frame_mem (V2SImode,
24804                                  plus_constant (Pmode, hard_frame_pointer_rtx,
24805                                                 - lrm_count * 4));
24806             insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24807             REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24808                                                gen_rtx_REG (V2SImode, i),
24809                                                NULL_RTX);
24810             lrm_count += 2;
24811           }
24812     }
24813
24814   /* saved_regs_mask should contain IP which contains old stack pointer
24815      at the time of activation creation.  Since SP and IP are adjacent registers,
24816      we can restore the value directly into SP.  */
24817   gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24818   saved_regs_mask &= ~(1 << IP_REGNUM);
24819   saved_regs_mask |= (1 << SP_REGNUM);
24820
24821   /* There are two registers left in saved_regs_mask - LR and PC.  We
24822      only need to restore LR (the return address), but to
24823      save time we can load it directly into PC, unless we need a
24824      special function exit sequence, or we are not really returning.  */
24825   if (really_return
24826       && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24827       && !crtl->calls_eh_return)
24828     /* Delete LR from the register mask, so that LR on
24829        the stack is loaded into the PC in the register mask.  */
24830     saved_regs_mask &= ~(1 << LR_REGNUM);
24831   else
24832     saved_regs_mask &= ~(1 << PC_REGNUM);
24833
24834   num_regs = bit_count (saved_regs_mask);
24835   if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24836     {
24837       rtx_insn *insn;
24838       emit_insn (gen_blockage ());
24839       /* Unwind the stack to just below the saved registers.  */
24840       insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24841                                     hard_frame_pointer_rtx,
24842                                     GEN_INT (- 4 * num_regs)));
24843
24844       arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24845                                    stack_pointer_rtx, hard_frame_pointer_rtx);
24846     }
24847
24848   arm_emit_multi_reg_pop (saved_regs_mask);
24849
24850   if (IS_INTERRUPT (func_type))
24851     {
24852       /* Interrupt handlers will have pushed the
24853          IP onto the stack, so restore it now.  */
24854       rtx_insn *insn;
24855       rtx addr = gen_rtx_MEM (SImode,
24856                               gen_rtx_POST_INC (SImode,
24857                               stack_pointer_rtx));
24858       set_mem_alias_set (addr, get_frame_alias_set ());
24859       insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24860       REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24861                                          gen_rtx_REG (SImode, IP_REGNUM),
24862                                          NULL_RTX);
24863     }
24864
24865   if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24866     return;
24867
24868   if (crtl->calls_eh_return)
24869     emit_insn (gen_addsi3 (stack_pointer_rtx,
24870                            stack_pointer_rtx,
24871                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24872
24873   if (IS_STACKALIGN (func_type))
24874     /* Restore the original stack pointer.  Before prologue, the stack was
24875        realigned and the original stack pointer saved in r0.  For details,
24876        see comment in arm_expand_prologue.  */
24877     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
24878
24879   emit_jump_insn (simple_return_rtx);
24880 }
24881
24882 /* Generate RTL to represent ARM epilogue.  Really_return is true if the
24883    function is not a sibcall.  */
24884 void
24885 arm_expand_epilogue (bool really_return)
24886 {
24887   unsigned long func_type;
24888   unsigned long saved_regs_mask;
24889   int num_regs = 0;
24890   int i;
24891   int amount;
24892   arm_stack_offsets *offsets;
24893
24894   func_type = arm_current_func_type ();
24895
24896   /* Naked functions don't have epilogue.  Hence, generate return pattern, and
24897      let output_return_instruction take care of instruction emission if any.  */
24898   if (IS_NAKED (func_type)
24899       || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24900     {
24901       if (really_return)
24902         emit_jump_insn (simple_return_rtx);
24903       return;
24904     }
24905
24906   /* If we are throwing an exception, then we really must be doing a
24907      return, so we can't tail-call.  */
24908   gcc_assert (!crtl->calls_eh_return || really_return);
24909
24910   if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24911     {
24912       arm_expand_epilogue_apcs_frame (really_return);
24913       return;
24914     }
24915
24916   /* Get frame offsets for ARM.  */
24917   offsets = arm_get_frame_offsets ();
24918   saved_regs_mask = offsets->saved_regs_mask;
24919   num_regs = bit_count (saved_regs_mask);
24920
24921   if (frame_pointer_needed)
24922     {
24923       rtx_insn *insn;
24924       /* Restore stack pointer if necessary.  */
24925       if (TARGET_ARM)
24926         {
24927           /* In ARM mode, frame pointer points to first saved register.
24928              Restore stack pointer to last saved register.  */
24929           amount = offsets->frame - offsets->saved_regs;
24930
24931           /* Force out any pending memory operations that reference stacked data
24932              before stack de-allocation occurs.  */
24933           emit_insn (gen_blockage ());
24934           insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24935                             hard_frame_pointer_rtx,
24936                             GEN_INT (amount)));
24937           arm_add_cfa_adjust_cfa_note (insn, amount,
24938                                        stack_pointer_rtx,
24939                                        hard_frame_pointer_rtx);
24940
24941           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24942              deleted.  */
24943           emit_insn (gen_force_register_use (stack_pointer_rtx));
24944         }
24945       else
24946         {
24947           /* In Thumb-2 mode, the frame pointer points to the last saved
24948              register.  */
24949           amount = offsets->locals_base - offsets->saved_regs;
24950           if (amount)
24951             {
24952               insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24953                                 hard_frame_pointer_rtx,
24954                                 GEN_INT (amount)));
24955               arm_add_cfa_adjust_cfa_note (insn, amount,
24956                                            hard_frame_pointer_rtx,
24957                                            hard_frame_pointer_rtx);
24958             }
24959
24960           /* Force out any pending memory operations that reference stacked data
24961              before stack de-allocation occurs.  */
24962           emit_insn (gen_blockage ());
24963           insn = emit_insn (gen_movsi (stack_pointer_rtx,
24964                                        hard_frame_pointer_rtx));
24965           arm_add_cfa_adjust_cfa_note (insn, 0,
24966                                        stack_pointer_rtx,
24967                                        hard_frame_pointer_rtx);
24968           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24969              deleted.  */
24970           emit_insn (gen_force_register_use (stack_pointer_rtx));
24971         }
24972     }
24973   else
24974     {
24975       /* Pop off outgoing args and local frame to adjust stack pointer to
24976          last saved register.  */
24977       amount = offsets->outgoing_args - offsets->saved_regs;
24978       if (amount)
24979         {
24980           rtx_insn *tmp;
24981           /* Force out any pending memory operations that reference stacked data
24982              before stack de-allocation occurs.  */
24983           emit_insn (gen_blockage ());
24984           tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24985                                        stack_pointer_rtx,
24986                                        GEN_INT (amount)));
24987           arm_add_cfa_adjust_cfa_note (tmp, amount,
24988                                        stack_pointer_rtx, stack_pointer_rtx);
24989           /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24990              not deleted.  */
24991           emit_insn (gen_force_register_use (stack_pointer_rtx));
24992         }
24993     }
24994
24995   if (TARGET_HARD_FLOAT && TARGET_VFP)
24996     {
24997       /* Generate VFP register multi-pop.  */
24998       int end_reg = LAST_VFP_REGNUM + 1;
24999
25000       /* Scan the registers in reverse order.  We need to match
25001          any groupings made in the prologue and generate matching
25002          vldm operations.  The need to match groups is because,
25003          unlike pop, vldm can only do consecutive regs.  */
25004       for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25005         /* Look for a case where a reg does not need restoring.  */
25006         if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25007             && (!df_regs_ever_live_p (i + 1)
25008                 || call_used_regs[i + 1]))
25009           {
25010             /* Restore the regs discovered so far (from reg+2 to
25011                end_reg).  */
25012             if (end_reg > i + 2)
25013               arm_emit_vfp_multi_reg_pop (i + 2,
25014                                           (end_reg - (i + 2)) / 2,
25015                                           stack_pointer_rtx);
25016             end_reg = i;
25017           }
25018
25019       /* Restore the remaining regs that we have discovered (or possibly
25020          even all of them, if the conditional in the for loop never
25021          fired).  */
25022       if (end_reg > i + 2)
25023         arm_emit_vfp_multi_reg_pop (i + 2,
25024                                     (end_reg - (i + 2)) / 2,
25025                                     stack_pointer_rtx);
25026     }
25027
25028   if (TARGET_IWMMXT)
25029     for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25030       if (df_regs_ever_live_p (i) && !call_used_regs[i])
25031         {
25032           rtx_insn *insn;
25033           rtx addr = gen_rtx_MEM (V2SImode,
25034                                   gen_rtx_POST_INC (SImode,
25035                                                     stack_pointer_rtx));
25036           set_mem_alias_set (addr, get_frame_alias_set ());
25037           insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25038           REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25039                                              gen_rtx_REG (V2SImode, i),
25040                                              NULL_RTX);
25041           arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25042                                        stack_pointer_rtx, stack_pointer_rtx);
25043         }
25044
25045   if (saved_regs_mask)
25046     {
25047       rtx insn;
25048       bool return_in_pc = false;
25049
25050       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25051           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25052           && !IS_STACKALIGN (func_type)
25053           && really_return
25054           && crtl->args.pretend_args_size == 0
25055           && saved_regs_mask & (1 << LR_REGNUM)
25056           && !crtl->calls_eh_return)
25057         {
25058           saved_regs_mask &= ~(1 << LR_REGNUM);
25059           saved_regs_mask |= (1 << PC_REGNUM);
25060           return_in_pc = true;
25061         }
25062
25063       if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25064         {
25065           for (i = 0; i <= LAST_ARM_REGNUM; i++)
25066             if (saved_regs_mask & (1 << i))
25067               {
25068                 rtx addr = gen_rtx_MEM (SImode,
25069                                         gen_rtx_POST_INC (SImode,
25070                                                           stack_pointer_rtx));
25071                 set_mem_alias_set (addr, get_frame_alias_set ());
25072
25073                 if (i == PC_REGNUM)
25074                   {
25075                     insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25076                     XVECEXP (insn, 0, 0) = ret_rtx;
25077                     XVECEXP (insn, 0, 1) = gen_rtx_SET (SImode,
25078                                                         gen_rtx_REG (SImode, i),
25079                                                         addr);
25080                     RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25081                     insn = emit_jump_insn (insn);
25082                   }
25083                 else
25084                   {
25085                     insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25086                                                  addr));
25087                     REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25088                                                        gen_rtx_REG (SImode, i),
25089                                                        NULL_RTX);
25090                     arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25091                                                  stack_pointer_rtx,
25092                                                  stack_pointer_rtx);
25093                   }
25094               }
25095         }
25096       else
25097         {
25098           if (TARGET_LDRD
25099               && current_tune->prefer_ldrd_strd
25100               && !optimize_function_for_size_p (cfun))
25101             {
25102               if (TARGET_THUMB2)
25103                 thumb2_emit_ldrd_pop (saved_regs_mask);
25104               else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25105                 arm_emit_ldrd_pop (saved_regs_mask);
25106               else
25107                 arm_emit_multi_reg_pop (saved_regs_mask);
25108             }
25109           else
25110             arm_emit_multi_reg_pop (saved_regs_mask);
25111         }
25112
25113       if (return_in_pc == true)
25114         return;
25115     }
25116
25117   if (crtl->args.pretend_args_size)
25118     {
25119       int i, j;
25120       rtx dwarf = NULL_RTX;
25121       rtx_insn *tmp =
25122         emit_insn (gen_addsi3 (stack_pointer_rtx,
25123                                stack_pointer_rtx,
25124                                GEN_INT (crtl->args.pretend_args_size)));
25125
25126       RTX_FRAME_RELATED_P (tmp) = 1;
25127
25128       if (cfun->machine->uses_anonymous_args)
25129         {
25130           /* Restore pretend args.  Refer arm_expand_prologue on how to save
25131              pretend_args in stack.  */
25132           int num_regs = crtl->args.pretend_args_size / 4;
25133           saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25134           for (j = 0, i = 0; j < num_regs; i++)
25135             if (saved_regs_mask & (1 << i))
25136               {
25137                 rtx reg = gen_rtx_REG (SImode, i);
25138                 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25139                 j++;
25140               }
25141           REG_NOTES (tmp) = dwarf;
25142         }
25143       arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25144                                    stack_pointer_rtx, stack_pointer_rtx);
25145     }
25146
25147   if (!really_return)
25148     return;
25149
25150   if (crtl->calls_eh_return)
25151     emit_insn (gen_addsi3 (stack_pointer_rtx,
25152                            stack_pointer_rtx,
25153                            gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25154
25155   if (IS_STACKALIGN (func_type))
25156     /* Restore the original stack pointer.  Before prologue, the stack was
25157        realigned and the original stack pointer saved in r0.  For details,
25158        see comment in arm_expand_prologue.  */
25159     emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, 0)));
25160
25161   emit_jump_insn (simple_return_rtx);
25162 }
25163
25164 /* Implementation of insn prologue_thumb1_interwork.  This is the first
25165    "instruction" of a function called in ARM mode.  Swap to thumb mode.  */
25166
25167 const char *
25168 thumb1_output_interwork (void)
25169 {
25170   const char * name;
25171   FILE *f = asm_out_file;
25172
25173   gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25174   gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25175               == SYMBOL_REF);
25176   name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25177
25178   /* Generate code sequence to switch us into Thumb mode.  */
25179   /* The .code 32 directive has already been emitted by
25180      ASM_DECLARE_FUNCTION_NAME.  */
25181   asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25182   asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25183
25184   /* Generate a label, so that the debugger will notice the
25185      change in instruction sets.  This label is also used by
25186      the assembler to bypass the ARM code when this function
25187      is called from a Thumb encoded function elsewhere in the
25188      same file.  Hence the definition of STUB_NAME here must
25189      agree with the definition in gas/config/tc-arm.c.  */
25190
25191 #define STUB_NAME ".real_start_of"
25192
25193   fprintf (f, "\t.code\t16\n");
25194 #ifdef ARM_PE
25195   if (arm_dllexport_name_p (name))
25196     name = arm_strip_name_encoding (name);
25197 #endif
25198   asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25199   fprintf (f, "\t.thumb_func\n");
25200   asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25201
25202   return "";
25203 }
25204
25205 /* Handle the case of a double word load into a low register from
25206    a computed memory address.  The computed address may involve a
25207    register which is overwritten by the load.  */
25208 const char *
25209 thumb_load_double_from_address (rtx *operands)
25210 {
25211   rtx addr;
25212   rtx base;
25213   rtx offset;
25214   rtx arg1;
25215   rtx arg2;
25216
25217   gcc_assert (REG_P (operands[0]));
25218   gcc_assert (MEM_P (operands[1]));
25219
25220   /* Get the memory address.  */
25221   addr = XEXP (operands[1], 0);
25222
25223   /* Work out how the memory address is computed.  */
25224   switch (GET_CODE (addr))
25225     {
25226     case REG:
25227       operands[2] = adjust_address (operands[1], SImode, 4);
25228
25229       if (REGNO (operands[0]) == REGNO (addr))
25230         {
25231           output_asm_insn ("ldr\t%H0, %2", operands);
25232           output_asm_insn ("ldr\t%0, %1", operands);
25233         }
25234       else
25235         {
25236           output_asm_insn ("ldr\t%0, %1", operands);
25237           output_asm_insn ("ldr\t%H0, %2", operands);
25238         }
25239       break;
25240
25241     case CONST:
25242       /* Compute <address> + 4 for the high order load.  */
25243       operands[2] = adjust_address (operands[1], SImode, 4);
25244
25245       output_asm_insn ("ldr\t%0, %1", operands);
25246       output_asm_insn ("ldr\t%H0, %2", operands);
25247       break;
25248
25249     case PLUS:
25250       arg1   = XEXP (addr, 0);
25251       arg2   = XEXP (addr, 1);
25252
25253       if (CONSTANT_P (arg1))
25254         base = arg2, offset = arg1;
25255       else
25256         base = arg1, offset = arg2;
25257
25258       gcc_assert (REG_P (base));
25259
25260       /* Catch the case of <address> = <reg> + <reg> */
25261       if (REG_P (offset))
25262         {
25263           int reg_offset = REGNO (offset);
25264           int reg_base   = REGNO (base);
25265           int reg_dest   = REGNO (operands[0]);
25266
25267           /* Add the base and offset registers together into the
25268              higher destination register.  */
25269           asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25270                        reg_dest + 1, reg_base, reg_offset);
25271
25272           /* Load the lower destination register from the address in
25273              the higher destination register.  */
25274           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25275                        reg_dest, reg_dest + 1);
25276
25277           /* Load the higher destination register from its own address
25278              plus 4.  */
25279           asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25280                        reg_dest + 1, reg_dest + 1);
25281         }
25282       else
25283         {
25284           /* Compute <address> + 4 for the high order load.  */
25285           operands[2] = adjust_address (operands[1], SImode, 4);
25286
25287           /* If the computed address is held in the low order register
25288              then load the high order register first, otherwise always
25289              load the low order register first.  */
25290           if (REGNO (operands[0]) == REGNO (base))
25291             {
25292               output_asm_insn ("ldr\t%H0, %2", operands);
25293               output_asm_insn ("ldr\t%0, %1", operands);
25294             }
25295           else
25296             {
25297               output_asm_insn ("ldr\t%0, %1", operands);
25298               output_asm_insn ("ldr\t%H0, %2", operands);
25299             }
25300         }
25301       break;
25302
25303     case LABEL_REF:
25304       /* With no registers to worry about we can just load the value
25305          directly.  */
25306       operands[2] = adjust_address (operands[1], SImode, 4);
25307
25308       output_asm_insn ("ldr\t%H0, %2", operands);
25309       output_asm_insn ("ldr\t%0, %1", operands);
25310       break;
25311
25312     default:
25313       gcc_unreachable ();
25314     }
25315
25316   return "";
25317 }
25318
25319 const char *
25320 thumb_output_move_mem_multiple (int n, rtx *operands)
25321 {
25322   rtx tmp;
25323
25324   switch (n)
25325     {
25326     case 2:
25327       if (REGNO (operands[4]) > REGNO (operands[5]))
25328         {
25329           tmp = operands[4];
25330           operands[4] = operands[5];
25331           operands[5] = tmp;
25332         }
25333       output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25334       output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25335       break;
25336
25337     case 3:
25338       if (REGNO (operands[4]) > REGNO (operands[5]))
25339         std::swap (operands[4], operands[5]);
25340       if (REGNO (operands[5]) > REGNO (operands[6]))
25341         std::swap (operands[5], operands[6]);
25342       if (REGNO (operands[4]) > REGNO (operands[5]))
25343         std::swap (operands[4], operands[5]);
25344
25345       output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25346       output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25347       break;
25348
25349     default:
25350       gcc_unreachable ();
25351     }
25352
25353   return "";
25354 }
25355
25356 /* Output a call-via instruction for thumb state.  */
25357 const char *
25358 thumb_call_via_reg (rtx reg)
25359 {
25360   int regno = REGNO (reg);
25361   rtx *labelp;
25362
25363   gcc_assert (regno < LR_REGNUM);
25364
25365   /* If we are in the normal text section we can use a single instance
25366      per compilation unit.  If we are doing function sections, then we need
25367      an entry per section, since we can't rely on reachability.  */
25368   if (in_section == text_section)
25369     {
25370       thumb_call_reg_needed = 1;
25371
25372       if (thumb_call_via_label[regno] == NULL)
25373         thumb_call_via_label[regno] = gen_label_rtx ();
25374       labelp = thumb_call_via_label + regno;
25375     }
25376   else
25377     {
25378       if (cfun->machine->call_via[regno] == NULL)
25379         cfun->machine->call_via[regno] = gen_label_rtx ();
25380       labelp = cfun->machine->call_via + regno;
25381     }
25382
25383   output_asm_insn ("bl\t%a0", labelp);
25384   return "";
25385 }
25386
25387 /* Routines for generating rtl.  */
25388 void
25389 thumb_expand_movmemqi (rtx *operands)
25390 {
25391   rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25392   rtx in  = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25393   HOST_WIDE_INT len = INTVAL (operands[2]);
25394   HOST_WIDE_INT offset = 0;
25395
25396   while (len >= 12)
25397     {
25398       emit_insn (gen_movmem12b (out, in, out, in));
25399       len -= 12;
25400     }
25401
25402   if (len >= 8)
25403     {
25404       emit_insn (gen_movmem8b (out, in, out, in));
25405       len -= 8;
25406     }
25407
25408   if (len >= 4)
25409     {
25410       rtx reg = gen_reg_rtx (SImode);
25411       emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25412       emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25413       len -= 4;
25414       offset += 4;
25415     }
25416
25417   if (len >= 2)
25418     {
25419       rtx reg = gen_reg_rtx (HImode);
25420       emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25421                                               plus_constant (Pmode, in,
25422                                                              offset))));
25423       emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25424                                                                 offset)),
25425                             reg));
25426       len -= 2;
25427       offset += 2;
25428     }
25429
25430   if (len)
25431     {
25432       rtx reg = gen_reg_rtx (QImode);
25433       emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25434                                               plus_constant (Pmode, in,
25435                                                              offset))));
25436       emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25437                                                                 offset)),
25438                             reg));
25439     }
25440 }
25441
25442 void
25443 thumb_reload_out_hi (rtx *operands)
25444 {
25445   emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25446 }
25447
25448 /* Handle reading a half-word from memory during reload.  */
25449 void
25450 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25451 {
25452   gcc_unreachable ();
25453 }
25454
25455 /* Return the length of a function name prefix
25456     that starts with the character 'c'.  */
25457 static int
25458 arm_get_strip_length (int c)
25459 {
25460   switch (c)
25461     {
25462     ARM_NAME_ENCODING_LENGTHS
25463       default: return 0;
25464     }
25465 }
25466
25467 /* Return a pointer to a function's name with any
25468    and all prefix encodings stripped from it.  */
25469 const char *
25470 arm_strip_name_encoding (const char *name)
25471 {
25472   int skip;
25473
25474   while ((skip = arm_get_strip_length (* name)))
25475     name += skip;
25476
25477   return name;
25478 }
25479
25480 /* If there is a '*' anywhere in the name's prefix, then
25481    emit the stripped name verbatim, otherwise prepend an
25482    underscore if leading underscores are being used.  */
25483 void
25484 arm_asm_output_labelref (FILE *stream, const char *name)
25485 {
25486   int skip;
25487   int verbatim = 0;
25488
25489   while ((skip = arm_get_strip_length (* name)))
25490     {
25491       verbatim |= (*name == '*');
25492       name += skip;
25493     }
25494
25495   if (verbatim)
25496     fputs (name, stream);
25497   else
25498     asm_fprintf (stream, "%U%s", name);
25499 }
25500
25501 /* This function is used to emit an EABI tag and its associated value.
25502    We emit the numerical value of the tag in case the assembler does not
25503    support textual tags.  (Eg gas prior to 2.20).  If requested we include
25504    the tag name in a comment so that anyone reading the assembler output
25505    will know which tag is being set.
25506
25507    This function is not static because arm-c.c needs it too.  */
25508
25509 void
25510 arm_emit_eabi_attribute (const char *name, int num, int val)
25511 {
25512   asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25513   if (flag_verbose_asm || flag_debug_asm)
25514     asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25515   asm_fprintf (asm_out_file, "\n");
25516 }
25517
25518 static void
25519 arm_file_start (void)
25520 {
25521   int val;
25522
25523   if (TARGET_UNIFIED_ASM)
25524     asm_fprintf (asm_out_file, "\t.syntax unified\n");
25525
25526   if (TARGET_BPABI)
25527     {
25528       const char *fpu_name;
25529       if (arm_selected_arch)
25530         {
25531           /* armv7ve doesn't support any extensions.  */
25532           if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25533             {
25534               /* Keep backward compatability for assemblers
25535                  which don't support armv7ve.  */
25536               asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25537               asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25538               asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25539               asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25540               asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25541             }
25542           else
25543             {
25544               const char* pos = strchr (arm_selected_arch->name, '+');
25545               if (pos)
25546                 {
25547                   char buf[15];
25548                   gcc_assert (strlen (arm_selected_arch->name)
25549                               <= sizeof (buf) / sizeof (*pos));
25550                   strncpy (buf, arm_selected_arch->name,
25551                                 (pos - arm_selected_arch->name) * sizeof (*pos));
25552                   buf[pos - arm_selected_arch->name] = '\0';
25553                   asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25554                   asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25555                 }
25556               else
25557                 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25558             }
25559         }
25560       else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25561         asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25562       else
25563         {
25564           const char* truncated_name
25565             = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25566           asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25567         }
25568
25569       if (TARGET_SOFT_FLOAT)
25570         {
25571           fpu_name = "softvfp";
25572         }
25573       else
25574         {
25575           fpu_name = arm_fpu_desc->name;
25576           if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25577             {
25578               if (TARGET_HARD_FLOAT)
25579                 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 3);
25580               if (TARGET_HARD_FLOAT_ABI)
25581                 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25582             }
25583         }
25584       asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25585
25586       /* Some of these attributes only apply when the corresponding features
25587          are used.  However we don't have any easy way of figuring this out.
25588          Conservatively record the setting that would have been used.  */
25589
25590       if (flag_rounding_math)
25591         arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25592
25593       if (!flag_unsafe_math_optimizations)
25594         {
25595           arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25596           arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25597         }
25598       if (flag_signaling_nans)
25599         arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25600
25601       arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25602                            flag_finite_math_only ? 1 : 3);
25603
25604       arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25605       arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25606       arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25607                                flag_short_enums ? 1 : 2);
25608
25609       /* Tag_ABI_optimization_goals.  */
25610       if (optimize_size)
25611         val = 4;
25612       else if (optimize >= 2)
25613         val = 2;
25614       else if (optimize)
25615         val = 1;
25616       else
25617         val = 6;
25618       arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25619
25620       arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25621                                unaligned_access);
25622
25623       if (arm_fp16_format)
25624         arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25625                              (int) arm_fp16_format);
25626
25627       if (arm_lang_output_object_attributes_hook)
25628         arm_lang_output_object_attributes_hook();
25629     }
25630
25631   default_file_start ();
25632 }
25633
25634 static void
25635 arm_file_end (void)
25636 {
25637   int regno;
25638
25639   if (NEED_INDICATE_EXEC_STACK)
25640     /* Add .note.GNU-stack.  */
25641     file_end_indicate_exec_stack ();
25642
25643   if (! thumb_call_reg_needed)
25644     return;
25645
25646   switch_to_section (text_section);
25647   asm_fprintf (asm_out_file, "\t.code 16\n");
25648   ASM_OUTPUT_ALIGN (asm_out_file, 1);
25649
25650   for (regno = 0; regno < LR_REGNUM; regno++)
25651     {
25652       rtx label = thumb_call_via_label[regno];
25653
25654       if (label != 0)
25655         {
25656           targetm.asm_out.internal_label (asm_out_file, "L",
25657                                           CODE_LABEL_NUMBER (label));
25658           asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25659         }
25660     }
25661 }
25662
25663 #ifndef ARM_PE
25664 /* Symbols in the text segment can be accessed without indirecting via the
25665    constant pool; it may take an extra binary operation, but this is still
25666    faster than indirecting via memory.  Don't do this when not optimizing,
25667    since we won't be calculating al of the offsets necessary to do this
25668    simplification.  */
25669
25670 static void
25671 arm_encode_section_info (tree decl, rtx rtl, int first)
25672 {
25673   if (optimize > 0 && TREE_CONSTANT (decl))
25674     SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25675
25676   default_encode_section_info (decl, rtl, first);
25677 }
25678 #endif /* !ARM_PE */
25679
25680 static void
25681 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25682 {
25683   if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25684       && !strcmp (prefix, "L"))
25685     {
25686       arm_ccfsm_state = 0;
25687       arm_target_insn = NULL;
25688     }
25689   default_internal_label (stream, prefix, labelno);
25690 }
25691
25692 /* Output code to add DELTA to the first argument, and then jump
25693    to FUNCTION.  Used for C++ multiple inheritance.  */
25694 static void
25695 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25696                      HOST_WIDE_INT delta,
25697                      HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25698                      tree function)
25699 {
25700   static int thunk_label = 0;
25701   char label[256];
25702   char labelpc[256];
25703   int mi_delta = delta;
25704   const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25705   int shift = 0;
25706   int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25707                     ? 1 : 0);
25708   if (mi_delta < 0)
25709     mi_delta = - mi_delta;
25710
25711   final_start_function (emit_barrier (), file, 1);
25712
25713   if (TARGET_THUMB1)
25714     {
25715       int labelno = thunk_label++;
25716       ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25717       /* Thunks are entered in arm mode when avaiable.  */
25718       if (TARGET_THUMB1_ONLY)
25719         {
25720           /* push r3 so we can use it as a temporary.  */
25721           /* TODO: Omit this save if r3 is not used.  */
25722           fputs ("\tpush {r3}\n", file);
25723           fputs ("\tldr\tr3, ", file);
25724         }
25725       else
25726         {
25727           fputs ("\tldr\tr12, ", file);
25728         }
25729       assemble_name (file, label);
25730       fputc ('\n', file);
25731       if (flag_pic)
25732         {
25733           /* If we are generating PIC, the ldr instruction below loads
25734              "(target - 7) - .LTHUNKPCn" into r12.  The pc reads as
25735              the address of the add + 8, so we have:
25736
25737              r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25738                  = target + 1.
25739
25740              Note that we have "+ 1" because some versions of GNU ld
25741              don't set the low bit of the result for R_ARM_REL32
25742              relocations against thumb function symbols.
25743              On ARMv6M this is +4, not +8.  */
25744           ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25745           assemble_name (file, labelpc);
25746           fputs (":\n", file);
25747           if (TARGET_THUMB1_ONLY)
25748             {
25749               /* This is 2 insns after the start of the thunk, so we know it
25750                  is 4-byte aligned.  */
25751               fputs ("\tadd\tr3, pc, r3\n", file);
25752               fputs ("\tmov r12, r3\n", file);
25753             }
25754           else
25755             fputs ("\tadd\tr12, pc, r12\n", file);
25756         }
25757       else if (TARGET_THUMB1_ONLY)
25758         fputs ("\tmov r12, r3\n", file);
25759     }
25760   if (TARGET_THUMB1_ONLY)
25761     {
25762       if (mi_delta > 255)
25763         {
25764           fputs ("\tldr\tr3, ", file);
25765           assemble_name (file, label);
25766           fputs ("+4\n", file);
25767           asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25768                        mi_op, this_regno, this_regno);
25769         }
25770       else if (mi_delta != 0)
25771         {
25772           /* Thumb1 unified syntax requires s suffix in instruction name when
25773              one of the operands is immediate.  */
25774           asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25775                        mi_op, this_regno, this_regno,
25776                        mi_delta);
25777         }
25778     }
25779   else
25780     {
25781       /* TODO: Use movw/movt for large constants when available.  */
25782       while (mi_delta != 0)
25783         {
25784           if ((mi_delta & (3 << shift)) == 0)
25785             shift += 2;
25786           else
25787             {
25788               asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25789                            mi_op, this_regno, this_regno,
25790                            mi_delta & (0xff << shift));
25791               mi_delta &= ~(0xff << shift);
25792               shift += 8;
25793             }
25794         }
25795     }
25796   if (TARGET_THUMB1)
25797     {
25798       if (TARGET_THUMB1_ONLY)
25799         fputs ("\tpop\t{r3}\n", file);
25800
25801       fprintf (file, "\tbx\tr12\n");
25802       ASM_OUTPUT_ALIGN (file, 2);
25803       assemble_name (file, label);
25804       fputs (":\n", file);
25805       if (flag_pic)
25806         {
25807           /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn".  */
25808           rtx tem = XEXP (DECL_RTL (function), 0);
25809           /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25810              pipeline offset is four rather than eight.  Adjust the offset
25811              accordingly.  */
25812           tem = plus_constant (GET_MODE (tem), tem,
25813                                TARGET_THUMB1_ONLY ? -3 : -7);
25814           tem = gen_rtx_MINUS (GET_MODE (tem),
25815                                tem,
25816                                gen_rtx_SYMBOL_REF (Pmode,
25817                                                    ggc_strdup (labelpc)));
25818           assemble_integer (tem, 4, BITS_PER_WORD, 1);
25819         }
25820       else
25821         /* Output ".word .LTHUNKn".  */
25822         assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25823
25824       if (TARGET_THUMB1_ONLY && mi_delta > 255)
25825         assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25826     }
25827   else
25828     {
25829       fputs ("\tb\t", file);
25830       assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25831       if (NEED_PLT_RELOC)
25832         fputs ("(PLT)", file);
25833       fputc ('\n', file);
25834     }
25835
25836   final_end_function ();
25837 }
25838
25839 int
25840 arm_emit_vector_const (FILE *file, rtx x)
25841 {
25842   int i;
25843   const char * pattern;
25844
25845   gcc_assert (GET_CODE (x) == CONST_VECTOR);
25846
25847   switch (GET_MODE (x))
25848     {
25849     case V2SImode: pattern = "%08x"; break;
25850     case V4HImode: pattern = "%04x"; break;
25851     case V8QImode: pattern = "%02x"; break;
25852     default:       gcc_unreachable ();
25853     }
25854
25855   fprintf (file, "0x");
25856   for (i = CONST_VECTOR_NUNITS (x); i--;)
25857     {
25858       rtx element;
25859
25860       element = CONST_VECTOR_ELT (x, i);
25861       fprintf (file, pattern, INTVAL (element));
25862     }
25863
25864   return 1;
25865 }
25866
25867 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25868    HFmode constant pool entries are actually loaded with ldr.  */
25869 void
25870 arm_emit_fp16_const (rtx c)
25871 {
25872   REAL_VALUE_TYPE r;
25873   long bits;
25874
25875   REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25876   bits = real_to_target (NULL, &r, HFmode);
25877   if (WORDS_BIG_ENDIAN)
25878     assemble_zeros (2);
25879   assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25880   if (!WORDS_BIG_ENDIAN)
25881     assemble_zeros (2);
25882 }
25883
25884 const char *
25885 arm_output_load_gr (rtx *operands)
25886 {
25887   rtx reg;
25888   rtx offset;
25889   rtx wcgr;
25890   rtx sum;
25891
25892   if (!MEM_P (operands [1])
25893       || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25894       || !REG_P (reg = XEXP (sum, 0))
25895       || !CONST_INT_P (offset = XEXP (sum, 1))
25896       || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25897     return "wldrw%?\t%0, %1";
25898
25899   /* Fix up an out-of-range load of a GR register.  */
25900   output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25901   wcgr = operands[0];
25902   operands[0] = reg;
25903   output_asm_insn ("ldr%?\t%0, %1", operands);
25904
25905   operands[0] = wcgr;
25906   operands[1] = reg;
25907   output_asm_insn ("tmcr%?\t%0, %1", operands);
25908   output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25909
25910   return "";
25911 }
25912
25913 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25914
25915    On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25916    named arg and all anonymous args onto the stack.
25917    XXX I know the prologue shouldn't be pushing registers, but it is faster
25918    that way.  */
25919
25920 static void
25921 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25922                             machine_mode mode,
25923                             tree type,
25924                             int *pretend_size,
25925                             int second_time ATTRIBUTE_UNUSED)
25926 {
25927   CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25928   int nregs;
25929
25930   cfun->machine->uses_anonymous_args = 1;
25931   if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25932     {
25933       nregs = pcum->aapcs_ncrn;
25934       if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25935         nregs++;
25936     }
25937   else
25938     nregs = pcum->nregs;
25939
25940   if (nregs < NUM_ARG_REGS)
25941     *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25942 }
25943
25944 /* We can't rely on the caller doing the proper promotion when
25945    using APCS or ATPCS.  */
25946
25947 static bool
25948 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25949 {
25950     return !TARGET_AAPCS_BASED;
25951 }
25952
25953 static machine_mode
25954 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25955                            machine_mode mode,
25956                            int *punsignedp ATTRIBUTE_UNUSED,
25957                            const_tree fntype ATTRIBUTE_UNUSED,
25958                            int for_return ATTRIBUTE_UNUSED)
25959 {
25960   if (GET_MODE_CLASS (mode) == MODE_INT
25961       && GET_MODE_SIZE (mode) < 4)
25962     return SImode;
25963
25964   return mode;
25965 }
25966
25967 /* AAPCS based ABIs use short enums by default.  */
25968
25969 static bool
25970 arm_default_short_enums (void)
25971 {
25972   return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25973 }
25974
25975
25976 /* AAPCS requires that anonymous bitfields affect structure alignment.  */
25977
25978 static bool
25979 arm_align_anon_bitfield (void)
25980 {
25981   return TARGET_AAPCS_BASED;
25982 }
25983
25984
25985 /* The generic C++ ABI says 64-bit (long long).  The EABI says 32-bit.  */
25986
25987 static tree
25988 arm_cxx_guard_type (void)
25989 {
25990   return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
25991 }
25992
25993
25994 /* The EABI says test the least significant bit of a guard variable.  */
25995
25996 static bool
25997 arm_cxx_guard_mask_bit (void)
25998 {
25999   return TARGET_AAPCS_BASED;
26000 }
26001
26002
26003 /* The EABI specifies that all array cookies are 8 bytes long.  */
26004
26005 static tree
26006 arm_get_cookie_size (tree type)
26007 {
26008   tree size;
26009
26010   if (!TARGET_AAPCS_BASED)
26011     return default_cxx_get_cookie_size (type);
26012
26013   size = build_int_cst (sizetype, 8);
26014   return size;
26015 }
26016
26017
26018 /* The EABI says that array cookies should also contain the element size.  */
26019
26020 static bool
26021 arm_cookie_has_size (void)
26022 {
26023   return TARGET_AAPCS_BASED;
26024 }
26025
26026
26027 /* The EABI says constructors and destructors should return a pointer to
26028    the object constructed/destroyed.  */
26029
26030 static bool
26031 arm_cxx_cdtor_returns_this (void)
26032 {
26033   return TARGET_AAPCS_BASED;
26034 }
26035
26036 /* The EABI says that an inline function may never be the key
26037    method.  */
26038
26039 static bool
26040 arm_cxx_key_method_may_be_inline (void)
26041 {
26042   return !TARGET_AAPCS_BASED;
26043 }
26044
26045 static void
26046 arm_cxx_determine_class_data_visibility (tree decl)
26047 {
26048   if (!TARGET_AAPCS_BASED
26049       || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26050     return;
26051
26052   /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26053      is exported.  However, on systems without dynamic vague linkage,
26054      \S 3.2.5.6 says that COMDAT class data has hidden linkage.  */
26055   if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26056     DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26057   else
26058     DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26059   DECL_VISIBILITY_SPECIFIED (decl) = 1;
26060 }
26061
26062 static bool
26063 arm_cxx_class_data_always_comdat (void)
26064 {
26065   /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26066      vague linkage if the class has no key function.  */
26067   return !TARGET_AAPCS_BASED;
26068 }
26069
26070
26071 /* The EABI says __aeabi_atexit should be used to register static
26072    destructors.  */
26073
26074 static bool
26075 arm_cxx_use_aeabi_atexit (void)
26076 {
26077   return TARGET_AAPCS_BASED;
26078 }
26079
26080
26081 void
26082 arm_set_return_address (rtx source, rtx scratch)
26083 {
26084   arm_stack_offsets *offsets;
26085   HOST_WIDE_INT delta;
26086   rtx addr;
26087   unsigned long saved_regs;
26088
26089   offsets = arm_get_frame_offsets ();
26090   saved_regs = offsets->saved_regs_mask;
26091
26092   if ((saved_regs & (1 << LR_REGNUM)) == 0)
26093     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26094   else
26095     {
26096       if (frame_pointer_needed)
26097         addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26098       else
26099         {
26100           /* LR will be the first saved register.  */
26101           delta = offsets->outgoing_args - (offsets->frame + 4);
26102
26103
26104           if (delta >= 4096)
26105             {
26106               emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26107                                      GEN_INT (delta & ~4095)));
26108               addr = scratch;
26109               delta &= 4095;
26110             }
26111           else
26112             addr = stack_pointer_rtx;
26113
26114           addr = plus_constant (Pmode, addr, delta);
26115         }
26116       /* The store needs to be marked as frame related in order to prevent
26117          DSE from deleting it as dead if it is based on fp.  */
26118       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26119       RTX_FRAME_RELATED_P (insn) = 1;
26120       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26121     }
26122 }
26123
26124
26125 void
26126 thumb_set_return_address (rtx source, rtx scratch)
26127 {
26128   arm_stack_offsets *offsets;
26129   HOST_WIDE_INT delta;
26130   HOST_WIDE_INT limit;
26131   int reg;
26132   rtx addr;
26133   unsigned long mask;
26134
26135   emit_use (source);
26136
26137   offsets = arm_get_frame_offsets ();
26138   mask = offsets->saved_regs_mask;
26139   if (mask & (1 << LR_REGNUM))
26140     {
26141       limit = 1024;
26142       /* Find the saved regs.  */
26143       if (frame_pointer_needed)
26144         {
26145           delta = offsets->soft_frame - offsets->saved_args;
26146           reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26147           if (TARGET_THUMB1)
26148             limit = 128;
26149         }
26150       else
26151         {
26152           delta = offsets->outgoing_args - offsets->saved_args;
26153           reg = SP_REGNUM;
26154         }
26155       /* Allow for the stack frame.  */
26156       if (TARGET_THUMB1 && TARGET_BACKTRACE)
26157         delta -= 16;
26158       /* The link register is always the first saved register.  */
26159       delta -= 4;
26160
26161       /* Construct the address.  */
26162       addr = gen_rtx_REG (SImode, reg);
26163       if (delta > limit)
26164         {
26165           emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26166           emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26167           addr = scratch;
26168         }
26169       else
26170         addr = plus_constant (Pmode, addr, delta);
26171
26172       /* The store needs to be marked as frame related in order to prevent
26173          DSE from deleting it as dead if it is based on fp.  */
26174       rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26175       RTX_FRAME_RELATED_P (insn) = 1;
26176       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26177     }
26178   else
26179     emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26180 }
26181
26182 /* Implements target hook vector_mode_supported_p.  */
26183 bool
26184 arm_vector_mode_supported_p (machine_mode mode)
26185 {
26186   /* Neon also supports V2SImode, etc. listed in the clause below.  */
26187   if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26188       || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26189     return true;
26190
26191   if ((TARGET_NEON || TARGET_IWMMXT)
26192       && ((mode == V2SImode)
26193           || (mode == V4HImode)
26194           || (mode == V8QImode)))
26195     return true;
26196
26197   if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26198       || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26199       || mode == V2HAmode))
26200     return true;
26201
26202   return false;
26203 }
26204
26205 /* Implements target hook array_mode_supported_p.  */
26206
26207 static bool
26208 arm_array_mode_supported_p (machine_mode mode,
26209                             unsigned HOST_WIDE_INT nelems)
26210 {
26211   if (TARGET_NEON
26212       && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26213       && (nelems >= 2 && nelems <= 4))
26214     return true;
26215
26216   return false;
26217 }
26218
26219 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26220    registers when autovectorizing for Neon, at least until multiple vector
26221    widths are supported properly by the middle-end.  */
26222
26223 static machine_mode
26224 arm_preferred_simd_mode (machine_mode mode)
26225 {
26226   if (TARGET_NEON)
26227     switch (mode)
26228       {
26229       case SFmode:
26230         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26231       case SImode:
26232         return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26233       case HImode:
26234         return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26235       case QImode:
26236         return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26237       case DImode:
26238         if (!TARGET_NEON_VECTORIZE_DOUBLE)
26239           return V2DImode;
26240         break;
26241
26242       default:;
26243       }
26244
26245   if (TARGET_REALLY_IWMMXT)
26246     switch (mode)
26247       {
26248       case SImode:
26249         return V2SImode;
26250       case HImode:
26251         return V4HImode;
26252       case QImode:
26253         return V8QImode;
26254
26255       default:;
26256       }
26257
26258   return word_mode;
26259 }
26260
26261 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26262
26263    We need to define this for LO_REGS on Thumb-1.  Otherwise we can end up
26264    using r0-r4 for function arguments, r7 for the stack frame and don't have
26265    enough left over to do doubleword arithmetic.  For Thumb-2 all the
26266    potentially problematic instructions accept high registers so this is not
26267    necessary.  Care needs to be taken to avoid adding new Thumb-2 patterns
26268    that require many low registers.  */
26269 static bool
26270 arm_class_likely_spilled_p (reg_class_t rclass)
26271 {
26272   if ((TARGET_THUMB1 && rclass == LO_REGS)
26273       || rclass  == CC_REG)
26274     return true;
26275
26276   return false;
26277 }
26278
26279 /* Implements target hook small_register_classes_for_mode_p.  */
26280 bool
26281 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26282 {
26283   return TARGET_THUMB1;
26284 }
26285
26286 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  SImode shifts use normal
26287    ARM insns and therefore guarantee that the shift count is modulo 256.
26288    DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26289    guarantee no particular behavior for out-of-range counts.  */
26290
26291 static unsigned HOST_WIDE_INT
26292 arm_shift_truncation_mask (machine_mode mode)
26293 {
26294   return mode == SImode ? 255 : 0;
26295 }
26296
26297
26298 /* Map internal gcc register numbers to DWARF2 register numbers.  */
26299
26300 unsigned int
26301 arm_dbx_register_number (unsigned int regno)
26302 {
26303   if (regno < 16)
26304     return regno;
26305
26306   if (IS_VFP_REGNUM (regno))
26307     {
26308       /* See comment in arm_dwarf_register_span.  */
26309       if (VFP_REGNO_OK_FOR_SINGLE (regno))
26310         return 64 + regno - FIRST_VFP_REGNUM;
26311       else
26312         return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26313     }
26314
26315   if (IS_IWMMXT_GR_REGNUM (regno))
26316     return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26317
26318   if (IS_IWMMXT_REGNUM (regno))
26319     return 112 + regno - FIRST_IWMMXT_REGNUM;
26320
26321   gcc_unreachable ();
26322 }
26323
26324 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26325    GCC models tham as 64 32-bit registers, so we need to describe this to
26326    the DWARF generation code.  Other registers can use the default.  */
26327 static rtx
26328 arm_dwarf_register_span (rtx rtl)
26329 {
26330   machine_mode mode;
26331   unsigned regno;
26332   rtx parts[16];
26333   int nregs;
26334   int i;
26335
26336   regno = REGNO (rtl);
26337   if (!IS_VFP_REGNUM (regno))
26338     return NULL_RTX;
26339
26340   /* XXX FIXME: The EABI defines two VFP register ranges:
26341         64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26342         256-287: D0-D31
26343      The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26344      corresponding D register.  Until GDB supports this, we shall use the
26345      legacy encodings.  We also use these encodings for D0-D15 for
26346      compatibility with older debuggers.  */
26347   mode = GET_MODE (rtl);
26348   if (GET_MODE_SIZE (mode) < 8)
26349     return NULL_RTX;
26350
26351   if (VFP_REGNO_OK_FOR_SINGLE (regno))
26352     {
26353       nregs = GET_MODE_SIZE (mode) / 4;
26354       for (i = 0; i < nregs; i += 2)
26355         if (TARGET_BIG_END)
26356           {
26357             parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26358             parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26359           }
26360         else
26361           {
26362             parts[i] = gen_rtx_REG (SImode, regno + i);
26363             parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26364           }
26365     }
26366   else
26367     {
26368       nregs = GET_MODE_SIZE (mode) / 8;
26369       for (i = 0; i < nregs; i++)
26370         parts[i] = gen_rtx_REG (DImode, regno + i);
26371     }
26372
26373   return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26374 }
26375
26376 #if ARM_UNWIND_INFO
26377 /* Emit unwind directives for a store-multiple instruction or stack pointer
26378    push during alignment.
26379    These should only ever be generated by the function prologue code, so
26380    expect them to have a particular form.
26381    The store-multiple instruction sometimes pushes pc as the last register,
26382    although it should not be tracked into unwind information, or for -Os
26383    sometimes pushes some dummy registers before first register that needs
26384    to be tracked in unwind information; such dummy registers are there just
26385    to avoid separate stack adjustment, and will not be restored in the
26386    epilogue.  */
26387
26388 static void
26389 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26390 {
26391   int i;
26392   HOST_WIDE_INT offset;
26393   HOST_WIDE_INT nregs;
26394   int reg_size;
26395   unsigned reg;
26396   unsigned lastreg;
26397   unsigned padfirst = 0, padlast = 0;
26398   rtx e;
26399
26400   e = XVECEXP (p, 0, 0);
26401   gcc_assert (GET_CODE (e) == SET);
26402
26403   /* First insn will adjust the stack pointer.  */
26404   gcc_assert (GET_CODE (e) == SET
26405               && REG_P (SET_DEST (e))
26406               && REGNO (SET_DEST (e)) == SP_REGNUM
26407               && GET_CODE (SET_SRC (e)) == PLUS);
26408
26409   offset = -INTVAL (XEXP (SET_SRC (e), 1));
26410   nregs = XVECLEN (p, 0) - 1;
26411   gcc_assert (nregs);
26412
26413   reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26414   if (reg < 16)
26415     {
26416       /* For -Os dummy registers can be pushed at the beginning to
26417          avoid separate stack pointer adjustment.  */
26418       e = XVECEXP (p, 0, 1);
26419       e = XEXP (SET_DEST (e), 0);
26420       if (GET_CODE (e) == PLUS)
26421         padfirst = INTVAL (XEXP (e, 1));
26422       gcc_assert (padfirst == 0 || optimize_size);
26423       /* The function prologue may also push pc, but not annotate it as it is
26424          never restored.  We turn this into a stack pointer adjustment.  */
26425       e = XVECEXP (p, 0, nregs);
26426       e = XEXP (SET_DEST (e), 0);
26427       if (GET_CODE (e) == PLUS)
26428         padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26429       else
26430         padlast = offset - 4;
26431       gcc_assert (padlast == 0 || padlast == 4);
26432       if (padlast == 4)
26433         fprintf (asm_out_file, "\t.pad #4\n");
26434       reg_size = 4;
26435       fprintf (asm_out_file, "\t.save {");
26436     }
26437   else if (IS_VFP_REGNUM (reg))
26438     {
26439       reg_size = 8;
26440       fprintf (asm_out_file, "\t.vsave {");
26441     }
26442   else
26443     /* Unknown register type.  */
26444     gcc_unreachable ();
26445
26446   /* If the stack increment doesn't match the size of the saved registers,
26447      something has gone horribly wrong.  */
26448   gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26449
26450   offset = padfirst;
26451   lastreg = 0;
26452   /* The remaining insns will describe the stores.  */
26453   for (i = 1; i <= nregs; i++)
26454     {
26455       /* Expect (set (mem <addr>) (reg)).
26456          Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)).  */
26457       e = XVECEXP (p, 0, i);
26458       gcc_assert (GET_CODE (e) == SET
26459                   && MEM_P (SET_DEST (e))
26460                   && REG_P (SET_SRC (e)));
26461
26462       reg = REGNO (SET_SRC (e));
26463       gcc_assert (reg >= lastreg);
26464
26465       if (i != 1)
26466         fprintf (asm_out_file, ", ");
26467       /* We can't use %r for vfp because we need to use the
26468          double precision register names.  */
26469       if (IS_VFP_REGNUM (reg))
26470         asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26471       else
26472         asm_fprintf (asm_out_file, "%r", reg);
26473
26474 #ifdef ENABLE_CHECKING
26475       /* Check that the addresses are consecutive.  */
26476       e = XEXP (SET_DEST (e), 0);
26477       if (GET_CODE (e) == PLUS)
26478         gcc_assert (REG_P (XEXP (e, 0))
26479                     && REGNO (XEXP (e, 0)) == SP_REGNUM
26480                     && CONST_INT_P (XEXP (e, 1))
26481                     && offset == INTVAL (XEXP (e, 1)));
26482       else
26483         gcc_assert (i == 1
26484                     && REG_P (e)
26485                     && REGNO (e) == SP_REGNUM);
26486       offset += reg_size;
26487 #endif
26488     }
26489   fprintf (asm_out_file, "}\n");
26490   if (padfirst)
26491     fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26492 }
26493
26494 /*  Emit unwind directives for a SET.  */
26495
26496 static void
26497 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26498 {
26499   rtx e0;
26500   rtx e1;
26501   unsigned reg;
26502
26503   e0 = XEXP (p, 0);
26504   e1 = XEXP (p, 1);
26505   switch (GET_CODE (e0))
26506     {
26507     case MEM:
26508       /* Pushing a single register.  */
26509       if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26510           || !REG_P (XEXP (XEXP (e0, 0), 0))
26511           || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26512         abort ();
26513
26514       asm_fprintf (asm_out_file, "\t.save ");
26515       if (IS_VFP_REGNUM (REGNO (e1)))
26516         asm_fprintf(asm_out_file, "{d%d}\n",
26517                     (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26518       else
26519         asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26520       break;
26521
26522     case REG:
26523       if (REGNO (e0) == SP_REGNUM)
26524         {
26525           /* A stack increment.  */
26526           if (GET_CODE (e1) != PLUS
26527               || !REG_P (XEXP (e1, 0))
26528               || REGNO (XEXP (e1, 0)) != SP_REGNUM
26529               || !CONST_INT_P (XEXP (e1, 1)))
26530             abort ();
26531
26532           asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26533                        -INTVAL (XEXP (e1, 1)));
26534         }
26535       else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26536         {
26537           HOST_WIDE_INT offset;
26538
26539           if (GET_CODE (e1) == PLUS)
26540             {
26541               if (!REG_P (XEXP (e1, 0))
26542                   || !CONST_INT_P (XEXP (e1, 1)))
26543                 abort ();
26544               reg = REGNO (XEXP (e1, 0));
26545               offset = INTVAL (XEXP (e1, 1));
26546               asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26547                            HARD_FRAME_POINTER_REGNUM, reg,
26548                            offset);
26549             }
26550           else if (REG_P (e1))
26551             {
26552               reg = REGNO (e1);
26553               asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26554                            HARD_FRAME_POINTER_REGNUM, reg);
26555             }
26556           else
26557             abort ();
26558         }
26559       else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26560         {
26561           /* Move from sp to reg.  */
26562           asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26563         }
26564      else if (GET_CODE (e1) == PLUS
26565               && REG_P (XEXP (e1, 0))
26566               && REGNO (XEXP (e1, 0)) == SP_REGNUM
26567               && CONST_INT_P (XEXP (e1, 1)))
26568         {
26569           /* Set reg to offset from sp.  */
26570           asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26571                        REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26572         }
26573       else
26574         abort ();
26575       break;
26576
26577     default:
26578       abort ();
26579     }
26580 }
26581
26582
26583 /* Emit unwind directives for the given insn.  */
26584
26585 static void
26586 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26587 {
26588   rtx note, pat;
26589   bool handled_one = false;
26590
26591   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26592     return;
26593
26594   if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26595       && (TREE_NOTHROW (current_function_decl)
26596           || crtl->all_throwers_are_sibcalls))
26597     return;
26598
26599   if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26600     return;
26601
26602   for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26603     {
26604       switch (REG_NOTE_KIND (note))
26605         {
26606         case REG_FRAME_RELATED_EXPR:
26607           pat = XEXP (note, 0);
26608           goto found;
26609
26610         case REG_CFA_REGISTER:
26611           pat = XEXP (note, 0);
26612           if (pat == NULL)
26613             {
26614               pat = PATTERN (insn);
26615               if (GET_CODE (pat) == PARALLEL)
26616                 pat = XVECEXP (pat, 0, 0);
26617             }
26618
26619           /* Only emitted for IS_STACKALIGN re-alignment.  */
26620           {
26621             rtx dest, src;
26622             unsigned reg;
26623
26624             src = SET_SRC (pat);
26625             dest = SET_DEST (pat);
26626
26627             gcc_assert (src == stack_pointer_rtx);
26628             reg = REGNO (dest);
26629             asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26630                          reg + 0x90, reg);
26631           }
26632           handled_one = true;
26633           break;
26634
26635         /* The INSN is generated in epilogue.  It is set as RTX_FRAME_RELATED_P
26636            to get correct dwarf information for shrink-wrap.  We should not
26637            emit unwind information for it because these are used either for
26638            pretend arguments or notes to adjust sp and restore registers from
26639            stack.  */
26640         case REG_CFA_DEF_CFA:
26641         case REG_CFA_ADJUST_CFA:
26642         case REG_CFA_RESTORE:
26643           return;
26644
26645         case REG_CFA_EXPRESSION:
26646         case REG_CFA_OFFSET:
26647           /* ??? Only handling here what we actually emit.  */
26648           gcc_unreachable ();
26649
26650         default:
26651           break;
26652         }
26653     }
26654   if (handled_one)
26655     return;
26656   pat = PATTERN (insn);
26657  found:
26658
26659   switch (GET_CODE (pat))
26660     {
26661     case SET:
26662       arm_unwind_emit_set (asm_out_file, pat);
26663       break;
26664
26665     case SEQUENCE:
26666       /* Store multiple.  */
26667       arm_unwind_emit_sequence (asm_out_file, pat);
26668       break;
26669
26670     default:
26671       abort();
26672     }
26673 }
26674
26675
26676 /* Output a reference from a function exception table to the type_info
26677    object X.  The EABI specifies that the symbol should be relocated by
26678    an R_ARM_TARGET2 relocation.  */
26679
26680 static bool
26681 arm_output_ttype (rtx x)
26682 {
26683   fputs ("\t.word\t", asm_out_file);
26684   output_addr_const (asm_out_file, x);
26685   /* Use special relocations for symbol references.  */
26686   if (!CONST_INT_P (x))
26687     fputs ("(TARGET2)", asm_out_file);
26688   fputc ('\n', asm_out_file);
26689
26690   return TRUE;
26691 }
26692
26693 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
26694
26695 static void
26696 arm_asm_emit_except_personality (rtx personality)
26697 {
26698   fputs ("\t.personality\t", asm_out_file);
26699   output_addr_const (asm_out_file, personality);
26700   fputc ('\n', asm_out_file);
26701 }
26702
26703 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
26704
26705 static void
26706 arm_asm_init_sections (void)
26707 {
26708   exception_section = get_unnamed_section (0, output_section_asm_op,
26709                                            "\t.handlerdata");
26710 }
26711 #endif /* ARM_UNWIND_INFO */
26712
26713 /* Output unwind directives for the start/end of a function.  */
26714
26715 void
26716 arm_output_fn_unwind (FILE * f, bool prologue)
26717 {
26718   if (arm_except_unwind_info (&global_options) != UI_TARGET)
26719     return;
26720
26721   if (prologue)
26722     fputs ("\t.fnstart\n", f);
26723   else
26724     {
26725       /* If this function will never be unwound, then mark it as such.
26726          The came condition is used in arm_unwind_emit to suppress
26727          the frame annotations.  */
26728       if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26729           && (TREE_NOTHROW (current_function_decl)
26730               || crtl->all_throwers_are_sibcalls))
26731         fputs("\t.cantunwind\n", f);
26732
26733       fputs ("\t.fnend\n", f);
26734     }
26735 }
26736
26737 static bool
26738 arm_emit_tls_decoration (FILE *fp, rtx x)
26739 {
26740   enum tls_reloc reloc;
26741   rtx val;
26742
26743   val = XVECEXP (x, 0, 0);
26744   reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26745
26746   output_addr_const (fp, val);
26747
26748   switch (reloc)
26749     {
26750     case TLS_GD32:
26751       fputs ("(tlsgd)", fp);
26752       break;
26753     case TLS_LDM32:
26754       fputs ("(tlsldm)", fp);
26755       break;
26756     case TLS_LDO32:
26757       fputs ("(tlsldo)", fp);
26758       break;
26759     case TLS_IE32:
26760       fputs ("(gottpoff)", fp);
26761       break;
26762     case TLS_LE32:
26763       fputs ("(tpoff)", fp);
26764       break;
26765     case TLS_DESCSEQ:
26766       fputs ("(tlsdesc)", fp);
26767       break;
26768     default:
26769       gcc_unreachable ();
26770     }
26771
26772   switch (reloc)
26773     {
26774     case TLS_GD32:
26775     case TLS_LDM32:
26776     case TLS_IE32:
26777     case TLS_DESCSEQ:
26778       fputs (" + (. - ", fp);
26779       output_addr_const (fp, XVECEXP (x, 0, 2));
26780       /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26781       fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26782       output_addr_const (fp, XVECEXP (x, 0, 3));
26783       fputc (')', fp);
26784       break;
26785     default:
26786       break;
26787     }
26788
26789   return TRUE;
26790 }
26791
26792 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
26793
26794 static void
26795 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26796 {
26797   gcc_assert (size == 4);
26798   fputs ("\t.word\t", file);
26799   output_addr_const (file, x);
26800   fputs ("(tlsldo)", file);
26801 }
26802
26803 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
26804
26805 static bool
26806 arm_output_addr_const_extra (FILE *fp, rtx x)
26807 {
26808   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26809     return arm_emit_tls_decoration (fp, x);
26810   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26811     {
26812       char label[256];
26813       int labelno = INTVAL (XVECEXP (x, 0, 0));
26814
26815       ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26816       assemble_name_raw (fp, label);
26817
26818       return TRUE;
26819     }
26820   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26821     {
26822       assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26823       if (GOT_PCREL)
26824         fputs ("+.", fp);
26825       fputs ("-(", fp);
26826       output_addr_const (fp, XVECEXP (x, 0, 0));
26827       fputc (')', fp);
26828       return TRUE;
26829     }
26830   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26831     {
26832       output_addr_const (fp, XVECEXP (x, 0, 0));
26833       if (GOT_PCREL)
26834         fputs ("+.", fp);
26835       fputs ("-(", fp);
26836       output_addr_const (fp, XVECEXP (x, 0, 1));
26837       fputc (')', fp);
26838       return TRUE;
26839     }
26840   else if (GET_CODE (x) == CONST_VECTOR)
26841     return arm_emit_vector_const (fp, x);
26842
26843   return FALSE;
26844 }
26845
26846 /* Output assembly for a shift instruction.
26847    SET_FLAGS determines how the instruction modifies the condition codes.
26848    0 - Do not set condition codes.
26849    1 - Set condition codes.
26850    2 - Use smallest instruction.  */
26851 const char *
26852 arm_output_shift(rtx * operands, int set_flags)
26853 {
26854   char pattern[100];
26855   static const char flag_chars[3] = {'?', '.', '!'};
26856   const char *shift;
26857   HOST_WIDE_INT val;
26858   char c;
26859
26860   c = flag_chars[set_flags];
26861   if (TARGET_UNIFIED_ASM)
26862     {
26863       shift = shift_op(operands[3], &val);
26864       if (shift)
26865         {
26866           if (val != -1)
26867             operands[2] = GEN_INT(val);
26868           sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26869         }
26870       else
26871         sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26872     }
26873   else
26874     sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26875   output_asm_insn (pattern, operands);
26876   return "";
26877 }
26878
26879 /* Output assembly for a WMMX immediate shift instruction.  */
26880 const char *
26881 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26882 {
26883   int shift = INTVAL (operands[2]);
26884   char templ[50];
26885   machine_mode opmode = GET_MODE (operands[0]);
26886
26887   gcc_assert (shift >= 0);
26888
26889   /* If the shift value in the register versions is > 63 (for D qualifier),
26890      31 (for W qualifier) or 15 (for H qualifier).  */
26891   if (((opmode == V4HImode) && (shift > 15))
26892         || ((opmode == V2SImode) && (shift > 31))
26893         || ((opmode == DImode) && (shift > 63)))
26894   {
26895     if (wror_or_wsra)
26896       {
26897         sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26898         output_asm_insn (templ, operands);
26899         if (opmode == DImode)
26900           {
26901             sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26902             output_asm_insn (templ, operands);
26903           }
26904       }
26905     else
26906       {
26907         /* The destination register will contain all zeros.  */
26908         sprintf (templ, "wzero\t%%0");
26909         output_asm_insn (templ, operands);
26910       }
26911     return "";
26912   }
26913
26914   if ((opmode == DImode) && (shift > 32))
26915     {
26916       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26917       output_asm_insn (templ, operands);
26918       sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26919       output_asm_insn (templ, operands);
26920     }
26921   else
26922     {
26923       sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26924       output_asm_insn (templ, operands);
26925     }
26926   return "";
26927 }
26928
26929 /* Output assembly for a WMMX tinsr instruction.  */
26930 const char *
26931 arm_output_iwmmxt_tinsr (rtx *operands)
26932 {
26933   int mask = INTVAL (operands[3]);
26934   int i;
26935   char templ[50];
26936   int units = mode_nunits[GET_MODE (operands[0])];
26937   gcc_assert ((mask & (mask - 1)) == 0);
26938   for (i = 0; i < units; ++i)
26939     {
26940       if ((mask & 0x01) == 1)
26941         {
26942           break;
26943         }
26944       mask >>= 1;
26945     }
26946   gcc_assert (i < units);
26947   {
26948     switch (GET_MODE (operands[0]))
26949       {
26950       case V8QImode:
26951         sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26952         break;
26953       case V4HImode:
26954         sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26955         break;
26956       case V2SImode:
26957         sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26958         break;
26959       default:
26960         gcc_unreachable ();
26961         break;
26962       }
26963     output_asm_insn (templ, operands);
26964   }
26965   return "";
26966 }
26967
26968 /* Output a Thumb-1 casesi dispatch sequence.  */
26969 const char *
26970 thumb1_output_casesi (rtx *operands)
26971 {
26972   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26973
26974   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26975
26976   switch (GET_MODE(diff_vec))
26977     {
26978     case QImode:
26979       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26980               "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26981     case HImode:
26982       return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26983               "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26984     case SImode:
26985       return "bl\t%___gnu_thumb1_case_si";
26986     default:
26987       gcc_unreachable ();
26988     }
26989 }
26990
26991 /* Output a Thumb-2 casesi instruction.  */
26992 const char *
26993 thumb2_output_casesi (rtx *operands)
26994 {
26995   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
26996
26997   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26998
26999   output_asm_insn ("cmp\t%0, %1", operands);
27000   output_asm_insn ("bhi\t%l3", operands);
27001   switch (GET_MODE(diff_vec))
27002     {
27003     case QImode:
27004       return "tbb\t[%|pc, %0]";
27005     case HImode:
27006       return "tbh\t[%|pc, %0, lsl #1]";
27007     case SImode:
27008       if (flag_pic)
27009         {
27010           output_asm_insn ("adr\t%4, %l2", operands);
27011           output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27012           output_asm_insn ("add\t%4, %4, %5", operands);
27013           return "bx\t%4";
27014         }
27015       else
27016         {
27017           output_asm_insn ("adr\t%4, %l2", operands);
27018           return "ldr\t%|pc, [%4, %0, lsl #2]";
27019         }
27020     default:
27021       gcc_unreachable ();
27022     }
27023 }
27024
27025 /* Most ARM cores are single issue, but some newer ones can dual issue.
27026    The scheduler descriptions rely on this being correct.  */
27027 static int
27028 arm_issue_rate (void)
27029 {
27030   switch (arm_tune)
27031     {
27032     case cortexa15:
27033     case cortexa57:
27034       return 3;
27035
27036     case cortexm7:
27037     case cortexr4:
27038     case cortexr4f:
27039     case cortexr5:
27040     case genericv7a:
27041     case cortexa5:
27042     case cortexa7:
27043     case cortexa8:
27044     case cortexa9:
27045     case cortexa12:
27046     case cortexa17:
27047     case cortexa53:
27048     case fa726te:
27049     case marvell_pj4:
27050       return 2;
27051
27052     default:
27053       return 1;
27054     }
27055 }
27056
27057 const char *
27058 arm_mangle_type (const_tree type)
27059 {
27060   /* The ARM ABI documents (10th October 2008) say that "__va_list"
27061      has to be managled as if it is in the "std" namespace.  */
27062   if (TARGET_AAPCS_BASED
27063       && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27064     return "St9__va_list";
27065
27066   /* Half-precision float.  */
27067   if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27068     return "Dh";
27069
27070   /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27071      builtin type.  */
27072   if (TYPE_NAME (type) != NULL)
27073     return arm_mangle_builtin_type (type);
27074
27075   /* Use the default mangling.  */
27076   return NULL;
27077 }
27078
27079 /* Order of allocation of core registers for Thumb: this allocation is
27080    written over the corresponding initial entries of the array
27081    initialized with REG_ALLOC_ORDER.  We allocate all low registers
27082    first.  Saving and restoring a low register is usually cheaper than
27083    using a call-clobbered high register.  */
27084
27085 static const int thumb_core_reg_alloc_order[] =
27086 {
27087    3,  2,  1,  0,  4,  5,  6,  7,
27088   14, 12,  8,  9, 10, 11
27089 };
27090
27091 /* Adjust register allocation order when compiling for Thumb.  */
27092
27093 void
27094 arm_order_regs_for_local_alloc (void)
27095 {
27096   const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27097   memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27098   if (TARGET_THUMB)
27099     memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27100             sizeof (thumb_core_reg_alloc_order));
27101 }
27102
27103 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
27104
27105 bool
27106 arm_frame_pointer_required (void)
27107 {
27108   return (cfun->has_nonlocal_label
27109           || SUBTARGET_FRAME_POINTER_REQUIRED
27110           || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27111 }
27112
27113 /* Only thumb1 can't support conditional execution, so return true if
27114    the target is not thumb1.  */
27115 static bool
27116 arm_have_conditional_execution (void)
27117 {
27118   return !TARGET_THUMB1;
27119 }
27120
27121 /* The AAPCS sets the maximum alignment of a vector to 64 bits.  */
27122 static HOST_WIDE_INT
27123 arm_vector_alignment (const_tree type)
27124 {
27125   HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27126
27127   if (TARGET_AAPCS_BASED)
27128     align = MIN (align, 64);
27129
27130   return align;
27131 }
27132
27133 static unsigned int
27134 arm_autovectorize_vector_sizes (void)
27135 {
27136   return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27137 }
27138
27139 static bool
27140 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27141 {
27142   /* Vectors which aren't in packed structures will not be less aligned than
27143      the natural alignment of their element type, so this is safe.  */
27144   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27145     return !is_packed;
27146
27147   return default_builtin_vector_alignment_reachable (type, is_packed);
27148 }
27149
27150 static bool
27151 arm_builtin_support_vector_misalignment (machine_mode mode,
27152                                          const_tree type, int misalignment,
27153                                          bool is_packed)
27154 {
27155   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27156     {
27157       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27158
27159       if (is_packed)
27160         return align == 1;
27161
27162       /* If the misalignment is unknown, we should be able to handle the access
27163          so long as it is not to a member of a packed data structure.  */
27164       if (misalignment == -1)
27165         return true;
27166
27167       /* Return true if the misalignment is a multiple of the natural alignment
27168          of the vector's element type.  This is probably always going to be
27169          true in practice, since we've already established that this isn't a
27170          packed access.  */
27171       return ((misalignment % align) == 0);
27172     }
27173
27174   return default_builtin_support_vector_misalignment (mode, type, misalignment,
27175                                                       is_packed);
27176 }
27177
27178 static void
27179 arm_conditional_register_usage (void)
27180 {
27181   int regno;
27182
27183   if (TARGET_THUMB1 && optimize_size)
27184     {
27185       /* When optimizing for size on Thumb-1, it's better not
27186         to use the HI regs, because of the overhead of
27187         stacking them.  */
27188       for (regno = FIRST_HI_REGNUM;
27189            regno <= LAST_HI_REGNUM; ++regno)
27190         fixed_regs[regno] = call_used_regs[regno] = 1;
27191     }
27192
27193   /* The link register can be clobbered by any branch insn,
27194      but we have no way to track that at present, so mark
27195      it as unavailable.  */
27196   if (TARGET_THUMB1)
27197     fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27198
27199   if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27200     {
27201       /* VFPv3 registers are disabled when earlier VFP
27202          versions are selected due to the definition of
27203          LAST_VFP_REGNUM.  */
27204       for (regno = FIRST_VFP_REGNUM;
27205            regno <= LAST_VFP_REGNUM; ++ regno)
27206         {
27207           fixed_regs[regno] = 0;
27208           call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27209             || regno >= FIRST_VFP_REGNUM + 32;
27210         }
27211     }
27212
27213   if (TARGET_REALLY_IWMMXT)
27214     {
27215       regno = FIRST_IWMMXT_GR_REGNUM;
27216       /* The 2002/10/09 revision of the XScale ABI has wCG0
27217          and wCG1 as call-preserved registers.  The 2002/11/21
27218          revision changed this so that all wCG registers are
27219          scratch registers.  */
27220       for (regno = FIRST_IWMMXT_GR_REGNUM;
27221            regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27222         fixed_regs[regno] = 0;
27223       /* The XScale ABI has wR0 - wR9 as scratch registers,
27224          the rest as call-preserved registers.  */
27225       for (regno = FIRST_IWMMXT_REGNUM;
27226            regno <= LAST_IWMMXT_REGNUM; ++ regno)
27227         {
27228           fixed_regs[regno] = 0;
27229           call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27230         }
27231     }
27232
27233   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27234     {
27235       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27236       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27237     }
27238   else if (TARGET_APCS_STACK)
27239     {
27240       fixed_regs[10]     = 1;
27241       call_used_regs[10] = 1;
27242     }
27243   /* -mcaller-super-interworking reserves r11 for calls to
27244      _interwork_r11_call_via_rN().  Making the register global
27245      is an easy way of ensuring that it remains valid for all
27246      calls.  */
27247   if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27248       || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27249     {
27250       fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27251       call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27252       if (TARGET_CALLER_INTERWORKING)
27253         global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27254     }
27255   SUBTARGET_CONDITIONAL_REGISTER_USAGE
27256 }
27257
27258 static reg_class_t
27259 arm_preferred_rename_class (reg_class_t rclass)
27260 {
27261   /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27262      using GENERIC_REGS.  During register rename pass, we prefer LO_REGS,
27263      and code size can be reduced.  */
27264   if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27265     return LO_REGS;
27266   else
27267     return NO_REGS;
27268 }
27269
27270 /* Compute the atrribute "length" of insn "*push_multi".
27271    So this function MUST be kept in sync with that insn pattern.  */
27272 int
27273 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27274 {
27275   int i, regno, hi_reg;
27276   int num_saves = XVECLEN (parallel_op, 0);
27277
27278   /* ARM mode.  */
27279   if (TARGET_ARM)
27280     return 4;
27281   /* Thumb1 mode.  */
27282   if (TARGET_THUMB1)
27283     return 2;
27284
27285   /* Thumb2 mode.  */
27286   regno = REGNO (first_op);
27287   hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27288   for (i = 1; i < num_saves && !hi_reg; i++)
27289     {
27290       regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27291       hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27292     }
27293
27294   if (!hi_reg)
27295     return 2;
27296   return 4;
27297 }
27298
27299 /* Compute the number of instructions emitted by output_move_double.  */
27300 int
27301 arm_count_output_move_double_insns (rtx *operands)
27302 {
27303   int count;
27304   rtx ops[2];
27305   /* output_move_double may modify the operands array, so call it
27306      here on a copy of the array.  */
27307   ops[0] = operands[0];
27308   ops[1] = operands[1];
27309   output_move_double (ops, false, &count);
27310   return count;
27311 }
27312
27313 int
27314 vfp3_const_double_for_fract_bits (rtx operand)
27315 {
27316   REAL_VALUE_TYPE r0;
27317
27318   if (!CONST_DOUBLE_P (operand))
27319     return 0;
27320
27321   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27322   if (exact_real_inverse (DFmode, &r0))
27323     {
27324       if (exact_real_truncate (DFmode, &r0))
27325         {
27326           HOST_WIDE_INT value = real_to_integer (&r0);
27327           value = value & 0xffffffff;
27328           if ((value != 0) && ( (value & (value - 1)) == 0))
27329             return int_log2 (value);
27330         }
27331     }
27332   return 0;
27333 }
27334
27335 int
27336 vfp3_const_double_for_bits (rtx operand)
27337 {
27338   REAL_VALUE_TYPE r0;
27339
27340   if (!CONST_DOUBLE_P (operand))
27341     return 0;
27342
27343   REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27344   if (exact_real_truncate (DFmode, &r0))
27345     {
27346       HOST_WIDE_INT value = real_to_integer (&r0);
27347       value = value & 0xffffffff;
27348       if ((value != 0) && ( (value & (value - 1)) == 0))
27349         return int_log2 (value);
27350     }
27351
27352   return 0;
27353 }
27354 \f
27355 /* Emit a memory barrier around an atomic sequence according to MODEL.  */
27356
27357 static void
27358 arm_pre_atomic_barrier (enum memmodel model)
27359 {
27360   if (need_atomic_barrier_p (model, true))
27361     emit_insn (gen_memory_barrier ());
27362 }
27363
27364 static void
27365 arm_post_atomic_barrier (enum memmodel model)
27366 {
27367   if (need_atomic_barrier_p (model, false))
27368     emit_insn (gen_memory_barrier ());
27369 }
27370
27371 /* Emit the load-exclusive and store-exclusive instructions.
27372    Use acquire and release versions if necessary.  */
27373
27374 static void
27375 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27376 {
27377   rtx (*gen) (rtx, rtx);
27378
27379   if (acq)
27380     {
27381       switch (mode)
27382         {
27383         case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27384         case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27385         case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27386         case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27387         default:
27388           gcc_unreachable ();
27389         }
27390     }
27391   else
27392     {
27393       switch (mode)
27394         {
27395         case QImode: gen = gen_arm_load_exclusiveqi; break;
27396         case HImode: gen = gen_arm_load_exclusivehi; break;
27397         case SImode: gen = gen_arm_load_exclusivesi; break;
27398         case DImode: gen = gen_arm_load_exclusivedi; break;
27399         default:
27400           gcc_unreachable ();
27401         }
27402     }
27403
27404   emit_insn (gen (rval, mem));
27405 }
27406
27407 static void
27408 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27409                           rtx mem, bool rel)
27410 {
27411   rtx (*gen) (rtx, rtx, rtx);
27412
27413   if (rel)
27414     {
27415       switch (mode)
27416         {
27417         case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27418         case HImode: gen = gen_arm_store_release_exclusivehi; break;
27419         case SImode: gen = gen_arm_store_release_exclusivesi; break;
27420         case DImode: gen = gen_arm_store_release_exclusivedi; break;
27421         default:
27422           gcc_unreachable ();
27423         }
27424     }
27425   else
27426     {
27427       switch (mode)
27428         {
27429         case QImode: gen = gen_arm_store_exclusiveqi; break;
27430         case HImode: gen = gen_arm_store_exclusivehi; break;
27431         case SImode: gen = gen_arm_store_exclusivesi; break;
27432         case DImode: gen = gen_arm_store_exclusivedi; break;
27433         default:
27434           gcc_unreachable ();
27435         }
27436     }
27437
27438   emit_insn (gen (bval, rval, mem));
27439 }
27440
27441 /* Mark the previous jump instruction as unlikely.  */
27442
27443 static void
27444 emit_unlikely_jump (rtx insn)
27445 {
27446   int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27447
27448   insn = emit_jump_insn (insn);
27449   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27450 }
27451
27452 /* Expand a compare and swap pattern.  */
27453
27454 void
27455 arm_expand_compare_and_swap (rtx operands[])
27456 {
27457   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27458   machine_mode mode;
27459   rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27460
27461   bval = operands[0];
27462   rval = operands[1];
27463   mem = operands[2];
27464   oldval = operands[3];
27465   newval = operands[4];
27466   is_weak = operands[5];
27467   mod_s = operands[6];
27468   mod_f = operands[7];
27469   mode = GET_MODE (mem);
27470
27471   /* Normally the succ memory model must be stronger than fail, but in the
27472      unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27473      promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
27474
27475   if (TARGET_HAVE_LDACQ
27476       && INTVAL (mod_f) == MEMMODEL_ACQUIRE
27477       && INTVAL (mod_s) == MEMMODEL_RELEASE)
27478     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27479
27480   switch (mode)
27481     {
27482     case QImode:
27483     case HImode:
27484       /* For narrow modes, we're going to perform the comparison in SImode,
27485          so do the zero-extension now.  */
27486       rval = gen_reg_rtx (SImode);
27487       oldval = convert_modes (SImode, mode, oldval, true);
27488       /* FALLTHRU */
27489
27490     case SImode:
27491       /* Force the value into a register if needed.  We waited until after
27492          the zero-extension above to do this properly.  */
27493       if (!arm_add_operand (oldval, SImode))
27494         oldval = force_reg (SImode, oldval);
27495       break;
27496
27497     case DImode:
27498       if (!cmpdi_operand (oldval, mode))
27499         oldval = force_reg (mode, oldval);
27500       break;
27501
27502     default:
27503       gcc_unreachable ();
27504     }
27505
27506   switch (mode)
27507     {
27508     case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27509     case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27510     case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27511     case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27512     default:
27513       gcc_unreachable ();
27514     }
27515
27516   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27517
27518   if (mode == QImode || mode == HImode)
27519     emit_move_insn (operands[1], gen_lowpart (mode, rval));
27520
27521   /* In all cases, we arrange for success to be signaled by Z set.
27522      This arrangement allows for the boolean result to be used directly
27523      in a subsequent branch, post optimization.  */
27524   x = gen_rtx_REG (CCmode, CC_REGNUM);
27525   x = gen_rtx_EQ (SImode, x, const0_rtx);
27526   emit_insn (gen_rtx_SET (VOIDmode, bval, x));
27527 }
27528
27529 /* Split a compare and swap pattern.  It is IMPLEMENTATION DEFINED whether
27530    another memory store between the load-exclusive and store-exclusive can
27531    reset the monitor from Exclusive to Open state.  This means we must wait
27532    until after reload to split the pattern, lest we get a register spill in
27533    the middle of the atomic sequence.  */
27534
27535 void
27536 arm_split_compare_and_swap (rtx operands[])
27537 {
27538   rtx rval, mem, oldval, newval, scratch;
27539   machine_mode mode;
27540   enum memmodel mod_s, mod_f;
27541   bool is_weak;
27542   rtx_code_label *label1, *label2;
27543   rtx x, cond;
27544
27545   rval = operands[0];
27546   mem = operands[1];
27547   oldval = operands[2];
27548   newval = operands[3];
27549   is_weak = (operands[4] != const0_rtx);
27550   mod_s = (enum memmodel) INTVAL (operands[5]);
27551   mod_f = (enum memmodel) INTVAL (operands[6]);
27552   scratch = operands[7];
27553   mode = GET_MODE (mem);
27554
27555   bool use_acquire = TARGET_HAVE_LDACQ
27556                      && !(mod_s == MEMMODEL_RELAXED
27557                           || mod_s == MEMMODEL_CONSUME
27558                           || mod_s == MEMMODEL_RELEASE);
27559
27560   bool use_release = TARGET_HAVE_LDACQ
27561                      && !(mod_s == MEMMODEL_RELAXED
27562                           || mod_s == MEMMODEL_CONSUME
27563                           || mod_s == MEMMODEL_ACQUIRE);
27564
27565   /* Checks whether a barrier is needed and emits one accordingly.  */
27566   if (!(use_acquire || use_release))
27567     arm_pre_atomic_barrier (mod_s);
27568
27569   label1 = NULL;
27570   if (!is_weak)
27571     {
27572       label1 = gen_label_rtx ();
27573       emit_label (label1);
27574     }
27575   label2 = gen_label_rtx ();
27576
27577   arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27578
27579   cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27580   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27581   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27582                             gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27583   emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27584
27585   arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27586
27587   /* Weak or strong, we want EQ to be true for success, so that we
27588      match the flags that we got from the compare above.  */
27589   cond = gen_rtx_REG (CCmode, CC_REGNUM);
27590   x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27591   emit_insn (gen_rtx_SET (VOIDmode, cond, x));
27592
27593   if (!is_weak)
27594     {
27595       x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27596       x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27597                                 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27598       emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
27599     }
27600
27601   if (mod_f != MEMMODEL_RELAXED)
27602     emit_label (label2);
27603
27604   /* Checks whether a barrier is needed and emits one accordingly.  */
27605   if (!(use_acquire || use_release))
27606     arm_post_atomic_barrier (mod_s);
27607
27608   if (mod_f == MEMMODEL_RELAXED)
27609     emit_label (label2);
27610 }
27611
27612 void
27613 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27614                      rtx value, rtx model_rtx, rtx cond)
27615 {
27616   enum memmodel model = (enum memmodel) INTVAL (model_rtx);
27617   machine_mode mode = GET_MODE (mem);
27618   machine_mode wmode = (mode == DImode ? DImode : SImode);
27619   rtx_code_label *label;
27620   rtx x;
27621
27622   bool use_acquire = TARGET_HAVE_LDACQ
27623                      && !(model == MEMMODEL_RELAXED
27624                           || model == MEMMODEL_CONSUME
27625                           || model == MEMMODEL_RELEASE);
27626
27627   bool use_release = TARGET_HAVE_LDACQ
27628                      && !(model == MEMMODEL_RELAXED
27629                           || model == MEMMODEL_CONSUME
27630                           || model == MEMMODEL_ACQUIRE);
27631
27632   /* Checks whether a barrier is needed and emits one accordingly.  */
27633   if (!(use_acquire || use_release))
27634     arm_pre_atomic_barrier (model);
27635
27636   label = gen_label_rtx ();
27637   emit_label (label);
27638
27639   if (new_out)
27640     new_out = gen_lowpart (wmode, new_out);
27641   if (old_out)
27642     old_out = gen_lowpart (wmode, old_out);
27643   else
27644     old_out = new_out;
27645   value = simplify_gen_subreg (wmode, value, mode, 0);
27646
27647   arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27648
27649   switch (code)
27650     {
27651     case SET:
27652       new_out = value;
27653       break;
27654
27655     case NOT:
27656       x = gen_rtx_AND (wmode, old_out, value);
27657       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27658       x = gen_rtx_NOT (wmode, new_out);
27659       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27660       break;
27661
27662     case MINUS:
27663       if (CONST_INT_P (value))
27664         {
27665           value = GEN_INT (-INTVAL (value));
27666           code = PLUS;
27667         }
27668       /* FALLTHRU */
27669
27670     case PLUS:
27671       if (mode == DImode)
27672         {
27673           /* DImode plus/minus need to clobber flags.  */
27674           /* The adddi3 and subdi3 patterns are incorrectly written so that
27675              they require matching operands, even when we could easily support
27676              three operands.  Thankfully, this can be fixed up post-splitting,
27677              as the individual add+adc patterns do accept three operands and
27678              post-reload cprop can make these moves go away.  */
27679           emit_move_insn (new_out, old_out);
27680           if (code == PLUS)
27681             x = gen_adddi3 (new_out, new_out, value);
27682           else
27683             x = gen_subdi3 (new_out, new_out, value);
27684           emit_insn (x);
27685           break;
27686         }
27687       /* FALLTHRU */
27688
27689     default:
27690       x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27691       emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
27692       break;
27693     }
27694
27695   arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27696                             use_release);
27697
27698   x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27699   emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27700
27701   /* Checks whether a barrier is needed and emits one accordingly.  */
27702   if (!(use_acquire || use_release))
27703     arm_post_atomic_barrier (model);
27704 }
27705 \f
27706 #define MAX_VECT_LEN 16
27707
27708 struct expand_vec_perm_d
27709 {
27710   rtx target, op0, op1;
27711   unsigned char perm[MAX_VECT_LEN];
27712   machine_mode vmode;
27713   unsigned char nelt;
27714   bool one_vector_p;
27715   bool testing_p;
27716 };
27717
27718 /* Generate a variable permutation.  */
27719
27720 static void
27721 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27722 {
27723   machine_mode vmode = GET_MODE (target);
27724   bool one_vector_p = rtx_equal_p (op0, op1);
27725
27726   gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27727   gcc_checking_assert (GET_MODE (op0) == vmode);
27728   gcc_checking_assert (GET_MODE (op1) == vmode);
27729   gcc_checking_assert (GET_MODE (sel) == vmode);
27730   gcc_checking_assert (TARGET_NEON);
27731
27732   if (one_vector_p)
27733     {
27734       if (vmode == V8QImode)
27735         emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27736       else
27737         emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27738     }
27739   else
27740     {
27741       rtx pair;
27742
27743       if (vmode == V8QImode)
27744         {
27745           pair = gen_reg_rtx (V16QImode);
27746           emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27747           pair = gen_lowpart (TImode, pair);
27748           emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27749         }
27750       else
27751         {
27752           pair = gen_reg_rtx (OImode);
27753           emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27754           emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27755         }
27756     }
27757 }
27758
27759 void
27760 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27761 {
27762   machine_mode vmode = GET_MODE (target);
27763   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27764   bool one_vector_p = rtx_equal_p (op0, op1);
27765   rtx rmask[MAX_VECT_LEN], mask;
27766
27767   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
27768      numbering of elements for big-endian, we must reverse the order.  */
27769   gcc_checking_assert (!BYTES_BIG_ENDIAN);
27770
27771   /* The VTBL instruction does not use a modulo index, so we must take care
27772      of that ourselves.  */
27773   mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27774   for (i = 0; i < nelt; ++i)
27775     rmask[i] = mask;
27776   mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27777   sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27778
27779   arm_expand_vec_perm_1 (target, op0, op1, sel);
27780 }
27781
27782 /* Generate or test for an insn that supports a constant permutation.  */
27783
27784 /* Recognize patterns for the VUZP insns.  */
27785
27786 static bool
27787 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27788 {
27789   unsigned int i, odd, mask, nelt = d->nelt;
27790   rtx out0, out1, in0, in1, x;
27791   rtx (*gen)(rtx, rtx, rtx, rtx);
27792
27793   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27794     return false;
27795
27796   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27797   if (d->perm[0] == 0)
27798     odd = 0;
27799   else if (d->perm[0] == 1)
27800     odd = 1;
27801   else
27802     return false;
27803   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27804
27805   for (i = 0; i < nelt; i++)
27806     {
27807       unsigned elt = (i * 2 + odd) & mask;
27808       if (d->perm[i] != elt)
27809         return false;
27810     }
27811
27812   /* Success!  */
27813   if (d->testing_p)
27814     return true;
27815
27816   switch (d->vmode)
27817     {
27818     case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27819     case V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
27820     case V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
27821     case V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
27822     case V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
27823     case V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
27824     case V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
27825     case V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
27826     default:
27827       gcc_unreachable ();
27828     }
27829
27830   in0 = d->op0;
27831   in1 = d->op1;
27832   if (BYTES_BIG_ENDIAN)
27833     {
27834       x = in0, in0 = in1, in1 = x;
27835       odd = !odd;
27836     }
27837
27838   out0 = d->target;
27839   out1 = gen_reg_rtx (d->vmode);
27840   if (odd)
27841     x = out0, out0 = out1, out1 = x;
27842
27843   emit_insn (gen (out0, in0, in1, out1));
27844   return true;
27845 }
27846
27847 /* Recognize patterns for the VZIP insns.  */
27848
27849 static bool
27850 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27851 {
27852   unsigned int i, high, mask, nelt = d->nelt;
27853   rtx out0, out1, in0, in1, x;
27854   rtx (*gen)(rtx, rtx, rtx, rtx);
27855
27856   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27857     return false;
27858
27859   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
27860   high = nelt / 2;
27861   if (d->perm[0] == high)
27862     ;
27863   else if (d->perm[0] == 0)
27864     high = 0;
27865   else
27866     return false;
27867   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27868
27869   for (i = 0; i < nelt / 2; i++)
27870     {
27871       unsigned elt = (i + high) & mask;
27872       if (d->perm[i * 2] != elt)
27873         return false;
27874       elt = (elt + nelt) & mask;
27875       if (d->perm[i * 2 + 1] != elt)
27876         return false;
27877     }
27878
27879   /* Success!  */
27880   if (d->testing_p)
27881     return true;
27882
27883   switch (d->vmode)
27884     {
27885     case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27886     case V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
27887     case V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
27888     case V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
27889     case V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
27890     case V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
27891     case V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
27892     case V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
27893     default:
27894       gcc_unreachable ();
27895     }
27896
27897   in0 = d->op0;
27898   in1 = d->op1;
27899   if (BYTES_BIG_ENDIAN)
27900     {
27901       x = in0, in0 = in1, in1 = x;
27902       high = !high;
27903     }
27904
27905   out0 = d->target;
27906   out1 = gen_reg_rtx (d->vmode);
27907   if (high)
27908     x = out0, out0 = out1, out1 = x;
27909
27910   emit_insn (gen (out0, in0, in1, out1));
27911   return true;
27912 }
27913
27914 /* Recognize patterns for the VREV insns.  */
27915
27916 static bool
27917 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27918 {
27919   unsigned int i, j, diff, nelt = d->nelt;
27920   rtx (*gen)(rtx, rtx);
27921
27922   if (!d->one_vector_p)
27923     return false;
27924
27925   diff = d->perm[0];
27926   switch (diff)
27927     {
27928     case 7:
27929       switch (d->vmode)
27930         {
27931         case V16QImode: gen = gen_neon_vrev64v16qi; break;
27932         case V8QImode:  gen = gen_neon_vrev64v8qi;  break;
27933         default:
27934           return false;
27935         }
27936       break;
27937     case 3:
27938       switch (d->vmode)
27939         {
27940         case V16QImode: gen = gen_neon_vrev32v16qi; break;
27941         case V8QImode:  gen = gen_neon_vrev32v8qi;  break;
27942         case V8HImode:  gen = gen_neon_vrev64v8hi;  break;
27943         case V4HImode:  gen = gen_neon_vrev64v4hi;  break;
27944         default:
27945           return false;
27946         }
27947       break;
27948     case 1:
27949       switch (d->vmode)
27950         {
27951         case V16QImode: gen = gen_neon_vrev16v16qi; break;
27952         case V8QImode:  gen = gen_neon_vrev16v8qi;  break;
27953         case V8HImode:  gen = gen_neon_vrev32v8hi;  break;
27954         case V4HImode:  gen = gen_neon_vrev32v4hi;  break;
27955         case V4SImode:  gen = gen_neon_vrev64v4si;  break;
27956         case V2SImode:  gen = gen_neon_vrev64v2si;  break;
27957         case V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
27958         case V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
27959         default:
27960           return false;
27961         }
27962       break;
27963     default:
27964       return false;
27965     }
27966
27967   for (i = 0; i < nelt ; i += diff + 1)
27968     for (j = 0; j <= diff; j += 1)
27969       {
27970         /* This is guaranteed to be true as the value of diff
27971            is 7, 3, 1 and we should have enough elements in the
27972            queue to generate this. Getting a vector mask with a
27973            value of diff other than these values implies that
27974            something is wrong by the time we get here.  */
27975         gcc_assert (i + j < nelt);
27976         if (d->perm[i + j] != i + diff - j)
27977           return false;
27978       }
27979
27980   /* Success! */
27981   if (d->testing_p)
27982     return true;
27983
27984   emit_insn (gen (d->target, d->op0));
27985   return true;
27986 }
27987
27988 /* Recognize patterns for the VTRN insns.  */
27989
27990 static bool
27991 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27992 {
27993   unsigned int i, odd, mask, nelt = d->nelt;
27994   rtx out0, out1, in0, in1, x;
27995   rtx (*gen)(rtx, rtx, rtx, rtx);
27996
27997   if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27998     return false;
27999
28000   /* Note that these are little-endian tests.  Adjust for big-endian later.  */
28001   if (d->perm[0] == 0)
28002     odd = 0;
28003   else if (d->perm[0] == 1)
28004     odd = 1;
28005   else
28006     return false;
28007   mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28008
28009   for (i = 0; i < nelt; i += 2)
28010     {
28011       if (d->perm[i] != i + odd)
28012         return false;
28013       if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28014         return false;
28015     }
28016
28017   /* Success!  */
28018   if (d->testing_p)
28019     return true;
28020
28021   switch (d->vmode)
28022     {
28023     case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28024     case V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
28025     case V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
28026     case V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
28027     case V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
28028     case V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
28029     case V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
28030     case V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
28031     default:
28032       gcc_unreachable ();
28033     }
28034
28035   in0 = d->op0;
28036   in1 = d->op1;
28037   if (BYTES_BIG_ENDIAN)
28038     {
28039       x = in0, in0 = in1, in1 = x;
28040       odd = !odd;
28041     }
28042
28043   out0 = d->target;
28044   out1 = gen_reg_rtx (d->vmode);
28045   if (odd)
28046     x = out0, out0 = out1, out1 = x;
28047
28048   emit_insn (gen (out0, in0, in1, out1));
28049   return true;
28050 }
28051
28052 /* Recognize patterns for the VEXT insns.  */
28053
28054 static bool
28055 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28056 {
28057   unsigned int i, nelt = d->nelt;
28058   rtx (*gen) (rtx, rtx, rtx, rtx);
28059   rtx offset;
28060
28061   unsigned int location;
28062
28063   unsigned int next  = d->perm[0] + 1;
28064
28065   /* TODO: Handle GCC's numbering of elements for big-endian.  */
28066   if (BYTES_BIG_ENDIAN)
28067     return false;
28068
28069   /* Check if the extracted indexes are increasing by one.  */
28070   for (i = 1; i < nelt; next++, i++)
28071     {
28072       /* If we hit the most significant element of the 2nd vector in
28073          the previous iteration, no need to test further.  */
28074       if (next == 2 * nelt)
28075         return false;
28076
28077       /* If we are operating on only one vector: it could be a
28078          rotation.  If there are only two elements of size < 64, let
28079          arm_evpc_neon_vrev catch it.  */
28080       if (d->one_vector_p && (next == nelt))
28081         {
28082           if ((nelt == 2) && (d->vmode != V2DImode))
28083             return false;
28084           else
28085             next = 0;
28086         }
28087
28088       if (d->perm[i] != next)
28089         return false;
28090     }
28091
28092   location = d->perm[0];
28093
28094   switch (d->vmode)
28095     {
28096     case V16QImode: gen = gen_neon_vextv16qi; break;
28097     case V8QImode: gen = gen_neon_vextv8qi; break;
28098     case V4HImode: gen = gen_neon_vextv4hi; break;
28099     case V8HImode: gen = gen_neon_vextv8hi; break;
28100     case V2SImode: gen = gen_neon_vextv2si; break;
28101     case V4SImode: gen = gen_neon_vextv4si; break;
28102     case V2SFmode: gen = gen_neon_vextv2sf; break;
28103     case V4SFmode: gen = gen_neon_vextv4sf; break;
28104     case V2DImode: gen = gen_neon_vextv2di; break;
28105     default:
28106       return false;
28107     }
28108
28109   /* Success! */
28110   if (d->testing_p)
28111     return true;
28112
28113   offset = GEN_INT (location);
28114   emit_insn (gen (d->target, d->op0, d->op1, offset));
28115   return true;
28116 }
28117
28118 /* The NEON VTBL instruction is a fully variable permuation that's even
28119    stronger than what we expose via VEC_PERM_EXPR.  What it doesn't do
28120    is mask the index operand as VEC_PERM_EXPR requires.  Therefore we
28121    can do slightly better by expanding this as a constant where we don't
28122    have to apply a mask.  */
28123
28124 static bool
28125 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28126 {
28127   rtx rperm[MAX_VECT_LEN], sel;
28128   machine_mode vmode = d->vmode;
28129   unsigned int i, nelt = d->nelt;
28130
28131   /* TODO: ARM's VTBL indexing is little-endian.  In order to handle GCC's
28132      numbering of elements for big-endian, we must reverse the order.  */
28133   if (BYTES_BIG_ENDIAN)
28134     return false;
28135
28136   if (d->testing_p)
28137     return true;
28138
28139   /* Generic code will try constant permutation twice.  Once with the
28140      original mode and again with the elements lowered to QImode.
28141      So wait and don't do the selector expansion ourselves.  */
28142   if (vmode != V8QImode && vmode != V16QImode)
28143     return false;
28144
28145   for (i = 0; i < nelt; ++i)
28146     rperm[i] = GEN_INT (d->perm[i]);
28147   sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28148   sel = force_reg (vmode, sel);
28149
28150   arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28151   return true;
28152 }
28153
28154 static bool
28155 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28156 {
28157   /* Check if the input mask matches vext before reordering the
28158      operands.  */
28159   if (TARGET_NEON)
28160     if (arm_evpc_neon_vext (d))
28161       return true;
28162
28163   /* The pattern matching functions above are written to look for a small
28164      number to begin the sequence (0, 1, N/2).  If we begin with an index
28165      from the second operand, we can swap the operands.  */
28166   if (d->perm[0] >= d->nelt)
28167     {
28168       unsigned i, nelt = d->nelt;
28169       rtx x;
28170
28171       for (i = 0; i < nelt; ++i)
28172         d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28173
28174       x = d->op0;
28175       d->op0 = d->op1;
28176       d->op1 = x;
28177     }
28178
28179   if (TARGET_NEON)
28180     {
28181       if (arm_evpc_neon_vuzp (d))
28182         return true;
28183       if (arm_evpc_neon_vzip (d))
28184         return true;
28185       if (arm_evpc_neon_vrev (d))
28186         return true;
28187       if (arm_evpc_neon_vtrn (d))
28188         return true;
28189       return arm_evpc_neon_vtbl (d);
28190     }
28191   return false;
28192 }
28193
28194 /* Expand a vec_perm_const pattern.  */
28195
28196 bool
28197 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28198 {
28199   struct expand_vec_perm_d d;
28200   int i, nelt, which;
28201
28202   d.target = target;
28203   d.op0 = op0;
28204   d.op1 = op1;
28205
28206   d.vmode = GET_MODE (target);
28207   gcc_assert (VECTOR_MODE_P (d.vmode));
28208   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28209   d.testing_p = false;
28210
28211   for (i = which = 0; i < nelt; ++i)
28212     {
28213       rtx e = XVECEXP (sel, 0, i);
28214       int ei = INTVAL (e) & (2 * nelt - 1);
28215       which |= (ei < nelt ? 1 : 2);
28216       d.perm[i] = ei;
28217     }
28218
28219   switch (which)
28220     {
28221     default:
28222       gcc_unreachable();
28223
28224     case 3:
28225       d.one_vector_p = false;
28226       if (!rtx_equal_p (op0, op1))
28227         break;
28228
28229       /* The elements of PERM do not suggest that only the first operand
28230          is used, but both operands are identical.  Allow easier matching
28231          of the permutation by folding the permutation into the single
28232          input vector.  */
28233       /* FALLTHRU */
28234     case 2:
28235       for (i = 0; i < nelt; ++i)
28236         d.perm[i] &= nelt - 1;
28237       d.op0 = op1;
28238       d.one_vector_p = true;
28239       break;
28240
28241     case 1:
28242       d.op1 = op0;
28243       d.one_vector_p = true;
28244       break;
28245     }
28246
28247   return arm_expand_vec_perm_const_1 (&d);
28248 }
28249
28250 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK.  */
28251
28252 static bool
28253 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28254                                  const unsigned char *sel)
28255 {
28256   struct expand_vec_perm_d d;
28257   unsigned int i, nelt, which;
28258   bool ret;
28259
28260   d.vmode = vmode;
28261   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28262   d.testing_p = true;
28263   memcpy (d.perm, sel, nelt);
28264
28265   /* Categorize the set of elements in the selector.  */
28266   for (i = which = 0; i < nelt; ++i)
28267     {
28268       unsigned char e = d.perm[i];
28269       gcc_assert (e < 2 * nelt);
28270       which |= (e < nelt ? 1 : 2);
28271     }
28272
28273   /* For all elements from second vector, fold the elements to first.  */
28274   if (which == 2)
28275     for (i = 0; i < nelt; ++i)
28276       d.perm[i] -= nelt;
28277
28278   /* Check whether the mask can be applied to the vector type.  */
28279   d.one_vector_p = (which != 3);
28280
28281   d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28282   d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28283   if (!d.one_vector_p)
28284     d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28285
28286   start_sequence ();
28287   ret = arm_expand_vec_perm_const_1 (&d);
28288   end_sequence ();
28289
28290   return ret;
28291 }
28292
28293 bool
28294 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28295 {
28296   /* If we are soft float and we do not have ldrd
28297      then all auto increment forms are ok.  */
28298   if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28299     return true;
28300
28301   switch (code)
28302     {
28303       /* Post increment and Pre Decrement are supported for all
28304          instruction forms except for vector forms.  */
28305     case ARM_POST_INC:
28306     case ARM_PRE_DEC:
28307       if (VECTOR_MODE_P (mode))
28308         {
28309           if (code != ARM_PRE_DEC)
28310             return true;
28311           else
28312             return false;
28313         }
28314
28315       return true;
28316
28317     case ARM_POST_DEC:
28318     case ARM_PRE_INC:
28319       /* Without LDRD and mode size greater than
28320          word size, there is no point in auto-incrementing
28321          because ldm and stm will not have these forms.  */
28322       if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28323         return false;
28324
28325       /* Vector and floating point modes do not support
28326          these auto increment forms.  */
28327       if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28328         return false;
28329
28330       return true;
28331
28332     default:
28333       return false;
28334
28335     }
28336
28337   return false;
28338 }
28339
28340 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28341    on ARM, since we know that shifts by negative amounts are no-ops.
28342    Additionally, the default expansion code is not available or suitable
28343    for post-reload insn splits (this can occur when the register allocator
28344    chooses not to do a shift in NEON).
28345
28346    This function is used in both initial expand and post-reload splits, and
28347    handles all kinds of 64-bit shifts.
28348
28349    Input requirements:
28350     - It is safe for the input and output to be the same register, but
28351       early-clobber rules apply for the shift amount and scratch registers.
28352     - Shift by register requires both scratch registers.  In all other cases
28353       the scratch registers may be NULL.
28354     - Ashiftrt by a register also clobbers the CC register.  */
28355 void
28356 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28357                                rtx amount, rtx scratch1, rtx scratch2)
28358 {
28359   rtx out_high = gen_highpart (SImode, out);
28360   rtx out_low = gen_lowpart (SImode, out);
28361   rtx in_high = gen_highpart (SImode, in);
28362   rtx in_low = gen_lowpart (SImode, in);
28363
28364   /* Terminology:
28365         in = the register pair containing the input value.
28366         out = the destination register pair.
28367         up = the high- or low-part of each pair.
28368         down = the opposite part to "up".
28369      In a shift, we can consider bits to shift from "up"-stream to
28370      "down"-stream, so in a left-shift "up" is the low-part and "down"
28371      is the high-part of each register pair.  */
28372
28373   rtx out_up   = code == ASHIFT ? out_low : out_high;
28374   rtx out_down = code == ASHIFT ? out_high : out_low;
28375   rtx in_up   = code == ASHIFT ? in_low : in_high;
28376   rtx in_down = code == ASHIFT ? in_high : in_low;
28377
28378   gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28379   gcc_assert (out
28380               && (REG_P (out) || GET_CODE (out) == SUBREG)
28381               && GET_MODE (out) == DImode);
28382   gcc_assert (in
28383               && (REG_P (in) || GET_CODE (in) == SUBREG)
28384               && GET_MODE (in) == DImode);
28385   gcc_assert (amount
28386               && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28387                    && GET_MODE (amount) == SImode)
28388                   || CONST_INT_P (amount)));
28389   gcc_assert (scratch1 == NULL
28390               || (GET_CODE (scratch1) == SCRATCH)
28391               || (GET_MODE (scratch1) == SImode
28392                   && REG_P (scratch1)));
28393   gcc_assert (scratch2 == NULL
28394               || (GET_CODE (scratch2) == SCRATCH)
28395               || (GET_MODE (scratch2) == SImode
28396                   && REG_P (scratch2)));
28397   gcc_assert (!REG_P (out) || !REG_P (amount)
28398               || !HARD_REGISTER_P (out)
28399               || (REGNO (out) != REGNO (amount)
28400                   && REGNO (out) + 1 != REGNO (amount)));
28401
28402   /* Macros to make following code more readable.  */
28403   #define SUB_32(DEST,SRC) \
28404             gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28405   #define RSB_32(DEST,SRC) \
28406             gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28407   #define SUB_S_32(DEST,SRC) \
28408             gen_addsi3_compare0 ((DEST), (SRC), \
28409                                  GEN_INT (-32))
28410   #define SET(DEST,SRC) \
28411             gen_rtx_SET (SImode, (DEST), (SRC))
28412   #define SHIFT(CODE,SRC,AMOUNT) \
28413             gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28414   #define LSHIFT(CODE,SRC,AMOUNT) \
28415             gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28416                             SImode, (SRC), (AMOUNT))
28417   #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28418             gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28419                             SImode, (SRC), (AMOUNT))
28420   #define ORR(A,B) \
28421             gen_rtx_IOR (SImode, (A), (B))
28422   #define BRANCH(COND,LABEL) \
28423             gen_arm_cond_branch ((LABEL), \
28424                                  gen_rtx_ ## COND (CCmode, cc_reg, \
28425                                                    const0_rtx), \
28426                                  cc_reg)
28427
28428   /* Shifts by register and shifts by constant are handled separately.  */
28429   if (CONST_INT_P (amount))
28430     {
28431       /* We have a shift-by-constant.  */
28432
28433       /* First, handle out-of-range shift amounts.
28434          In both cases we try to match the result an ARM instruction in a
28435          shift-by-register would give.  This helps reduce execution
28436          differences between optimization levels, but it won't stop other
28437          parts of the compiler doing different things.  This is "undefined
28438          behaviour, in any case.  */
28439       if (INTVAL (amount) <= 0)
28440         emit_insn (gen_movdi (out, in));
28441       else if (INTVAL (amount) >= 64)
28442         {
28443           if (code == ASHIFTRT)
28444             {
28445               rtx const31_rtx = GEN_INT (31);
28446               emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28447               emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28448             }
28449           else
28450             emit_insn (gen_movdi (out, const0_rtx));
28451         }
28452
28453       /* Now handle valid shifts. */
28454       else if (INTVAL (amount) < 32)
28455         {
28456           /* Shifts by a constant less than 32.  */
28457           rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28458
28459           emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28460           emit_insn (SET (out_down,
28461                           ORR (REV_LSHIFT (code, in_up, reverse_amount),
28462                                out_down)));
28463           emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28464         }
28465       else
28466         {
28467           /* Shifts by a constant greater than 31.  */
28468           rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28469
28470           emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28471           if (code == ASHIFTRT)
28472             emit_insn (gen_ashrsi3 (out_up, in_up,
28473                                     GEN_INT (31)));
28474           else
28475             emit_insn (SET (out_up, const0_rtx));
28476         }
28477     }
28478   else
28479     {
28480       /* We have a shift-by-register.  */
28481       rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28482
28483       /* This alternative requires the scratch registers.  */
28484       gcc_assert (scratch1 && REG_P (scratch1));
28485       gcc_assert (scratch2 && REG_P (scratch2));
28486
28487       /* We will need the values "amount-32" and "32-amount" later.
28488          Swapping them around now allows the later code to be more general. */
28489       switch (code)
28490         {
28491         case ASHIFT:
28492           emit_insn (SUB_32 (scratch1, amount));
28493           emit_insn (RSB_32 (scratch2, amount));
28494           break;
28495         case ASHIFTRT:
28496           emit_insn (RSB_32 (scratch1, amount));
28497           /* Also set CC = amount > 32.  */
28498           emit_insn (SUB_S_32 (scratch2, amount));
28499           break;
28500         case LSHIFTRT:
28501           emit_insn (RSB_32 (scratch1, amount));
28502           emit_insn (SUB_32 (scratch2, amount));
28503           break;
28504         default:
28505           gcc_unreachable ();
28506         }
28507
28508       /* Emit code like this:
28509
28510          arithmetic-left:
28511             out_down = in_down << amount;
28512             out_down = (in_up << (amount - 32)) | out_down;
28513             out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28514             out_up = in_up << amount;
28515
28516          arithmetic-right:
28517             out_down = in_down >> amount;
28518             out_down = (in_up << (32 - amount)) | out_down;
28519             if (amount < 32)
28520               out_down = ((signed)in_up >> (amount - 32)) | out_down;
28521             out_up = in_up << amount;
28522
28523          logical-right:
28524             out_down = in_down >> amount;
28525             out_down = (in_up << (32 - amount)) | out_down;
28526             if (amount < 32)
28527               out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28528             out_up = in_up << amount;
28529
28530           The ARM and Thumb2 variants are the same but implemented slightly
28531           differently.  If this were only called during expand we could just
28532           use the Thumb2 case and let combine do the right thing, but this
28533           can also be called from post-reload splitters.  */
28534
28535       emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28536
28537       if (!TARGET_THUMB2)
28538         {
28539           /* Emit code for ARM mode.  */
28540           emit_insn (SET (out_down,
28541                           ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28542           if (code == ASHIFTRT)
28543             {
28544               rtx_code_label *done_label = gen_label_rtx ();
28545               emit_jump_insn (BRANCH (LT, done_label));
28546               emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28547                                              out_down)));
28548               emit_label (done_label);
28549             }
28550           else
28551             emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28552                                            out_down)));
28553         }
28554       else
28555         {
28556           /* Emit code for Thumb2 mode.
28557              Thumb2 can't do shift and or in one insn.  */
28558           emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28559           emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28560
28561           if (code == ASHIFTRT)
28562             {
28563               rtx_code_label *done_label = gen_label_rtx ();
28564               emit_jump_insn (BRANCH (LT, done_label));
28565               emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28566               emit_insn (SET (out_down, ORR (out_down, scratch2)));
28567               emit_label (done_label);
28568             }
28569           else
28570             {
28571               emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28572               emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28573             }
28574         }
28575
28576       emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28577     }
28578
28579   #undef SUB_32
28580   #undef RSB_32
28581   #undef SUB_S_32
28582   #undef SET
28583   #undef SHIFT
28584   #undef LSHIFT
28585   #undef REV_LSHIFT
28586   #undef ORR
28587   #undef BRANCH
28588 }
28589
28590
28591 /* Returns true if a valid comparison operation and makes
28592    the operands in a form that is valid.  */
28593 bool
28594 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28595 {
28596   enum rtx_code code = GET_CODE (*comparison);
28597   int code_int;
28598   machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28599     ? GET_MODE (*op2) : GET_MODE (*op1);
28600
28601   gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28602
28603   if (code == UNEQ || code == LTGT)
28604     return false;
28605
28606   code_int = (int)code;
28607   arm_canonicalize_comparison (&code_int, op1, op2, 0);
28608   PUT_CODE (*comparison, (enum rtx_code)code_int);
28609
28610   switch (mode)
28611     {
28612     case SImode:
28613       if (!arm_add_operand (*op1, mode))
28614         *op1 = force_reg (mode, *op1);
28615       if (!arm_add_operand (*op2, mode))
28616         *op2 = force_reg (mode, *op2);
28617       return true;
28618
28619     case DImode:
28620       if (!cmpdi_operand (*op1, mode))
28621         *op1 = force_reg (mode, *op1);
28622       if (!cmpdi_operand (*op2, mode))
28623         *op2 = force_reg (mode, *op2);
28624       return true;
28625
28626     case SFmode:
28627     case DFmode:
28628       if (!arm_float_compare_operand (*op1, mode))
28629         *op1 = force_reg (mode, *op1);
28630       if (!arm_float_compare_operand (*op2, mode))
28631         *op2 = force_reg (mode, *op2);
28632       return true;
28633     default:
28634       break;
28635     }
28636
28637   return false;
28638
28639 }
28640
28641 /* Maximum number of instructions to set block of memory.  */
28642 static int
28643 arm_block_set_max_insns (void)
28644 {
28645   if (optimize_function_for_size_p (cfun))
28646     return 4;
28647   else
28648     return current_tune->max_insns_inline_memset;
28649 }
28650
28651 /* Return TRUE if it's profitable to set block of memory for
28652    non-vectorized case.  VAL is the value to set the memory
28653    with.  LENGTH is the number of bytes to set.  ALIGN is the
28654    alignment of the destination memory in bytes.  UNALIGNED_P
28655    is TRUE if we can only set the memory with instructions
28656    meeting alignment requirements.  USE_STRD_P is TRUE if we
28657    can use strd to set the memory.  */
28658 static bool
28659 arm_block_set_non_vect_profit_p (rtx val,
28660                                  unsigned HOST_WIDE_INT length,
28661                                  unsigned HOST_WIDE_INT align,
28662                                  bool unaligned_p, bool use_strd_p)
28663 {
28664   int num = 0;
28665   /* For leftovers in bytes of 0-7, we can set the memory block using
28666      strb/strh/str with minimum instruction number.  */
28667   const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28668
28669   if (unaligned_p)
28670     {
28671       num = arm_const_inline_cost (SET, val);
28672       num += length / align + length % align;
28673     }
28674   else if (use_strd_p)
28675     {
28676       num = arm_const_double_inline_cost (val);
28677       num += (length >> 3) + leftover[length & 7];
28678     }
28679   else
28680     {
28681       num = arm_const_inline_cost (SET, val);
28682       num += (length >> 2) + leftover[length & 3];
28683     }
28684
28685   /* We may be able to combine last pair STRH/STRB into a single STR
28686      by shifting one byte back.  */
28687   if (unaligned_access && length > 3 && (length & 3) == 3)
28688     num--;
28689
28690   return (num <= arm_block_set_max_insns ());
28691 }
28692
28693 /* Return TRUE if it's profitable to set block of memory for
28694    vectorized case.  LENGTH is the number of bytes to set.
28695    ALIGN is the alignment of destination memory in bytes.
28696    MODE is the vector mode used to set the memory.  */
28697 static bool
28698 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28699                              unsigned HOST_WIDE_INT align,
28700                              machine_mode mode)
28701 {
28702   int num;
28703   bool unaligned_p = ((align & 3) != 0);
28704   unsigned int nelt = GET_MODE_NUNITS (mode);
28705
28706   /* Instruction loading constant value.  */
28707   num = 1;
28708   /* Instructions storing the memory.  */
28709   num += (length + nelt - 1) / nelt;
28710   /* Instructions adjusting the address expression.  Only need to
28711      adjust address expression if it's 4 bytes aligned and bytes
28712      leftover can only be stored by mis-aligned store instruction.  */
28713   if (!unaligned_p && (length & 3) != 0)
28714     num++;
28715
28716   /* Store the first 16 bytes using vst1:v16qi for the aligned case.  */
28717   if (!unaligned_p && mode == V16QImode)
28718     num--;
28719
28720   return (num <= arm_block_set_max_insns ());
28721 }
28722
28723 /* Set a block of memory using vectorization instructions for the
28724    unaligned case.  We fill the first LENGTH bytes of the memory
28725    area starting from DSTBASE with byte constant VALUE.  ALIGN is
28726    the alignment requirement of memory.  Return TRUE if succeeded.  */
28727 static bool
28728 arm_block_set_unaligned_vect (rtx dstbase,
28729                               unsigned HOST_WIDE_INT length,
28730                               unsigned HOST_WIDE_INT value,
28731                               unsigned HOST_WIDE_INT align)
28732 {
28733   unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28734   rtx dst, mem;
28735   rtx val_elt, val_vec, reg;
28736   rtx rval[MAX_VECT_LEN];
28737   rtx (*gen_func) (rtx, rtx);
28738   machine_mode mode;
28739   unsigned HOST_WIDE_INT v = value;
28740
28741   gcc_assert ((align & 0x3) != 0);
28742   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28743   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28744   if (length >= nelt_v16)
28745     {
28746       mode = V16QImode;
28747       gen_func = gen_movmisalignv16qi;
28748     }
28749   else
28750     {
28751       mode = V8QImode;
28752       gen_func = gen_movmisalignv8qi;
28753     }
28754   nelt_mode = GET_MODE_NUNITS (mode);
28755   gcc_assert (length >= nelt_mode);
28756   /* Skip if it isn't profitable.  */
28757   if (!arm_block_set_vect_profit_p (length, align, mode))
28758     return false;
28759
28760   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28761   mem = adjust_automodify_address (dstbase, mode, dst, 0);
28762
28763   v = sext_hwi (v, BITS_PER_WORD);
28764   val_elt = GEN_INT (v);
28765   for (j = 0; j < nelt_mode; j++)
28766     rval[j] = val_elt;
28767
28768   reg = gen_reg_rtx (mode);
28769   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28770   /* Emit instruction loading the constant value.  */
28771   emit_move_insn (reg, val_vec);
28772
28773   /* Handle nelt_mode bytes in a vector.  */
28774   for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28775     {
28776       emit_insn ((*gen_func) (mem, reg));
28777       if (i + 2 * nelt_mode <= length)
28778         emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28779     }
28780
28781   /* If there are not less than nelt_v8 bytes leftover, we must be in
28782      V16QI mode.  */
28783   gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28784
28785   /* Handle (8, 16) bytes leftover.  */
28786   if (i + nelt_v8 < length)
28787     {
28788       emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28789       /* We are shifting bytes back, set the alignment accordingly.  */
28790       if ((length & 1) != 0 && align >= 2)
28791         set_mem_align (mem, BITS_PER_UNIT);
28792
28793       emit_insn (gen_movmisalignv16qi (mem, reg));
28794     }
28795   /* Handle (0, 8] bytes leftover.  */
28796   else if (i < length && i + nelt_v8 >= length)
28797     {
28798       if (mode == V16QImode)
28799         {
28800           reg = gen_lowpart (V8QImode, reg);
28801           mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28802         }
28803       emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28804                                               + (nelt_mode - nelt_v8))));
28805       /* We are shifting bytes back, set the alignment accordingly.  */
28806       if ((length & 1) != 0 && align >= 2)
28807         set_mem_align (mem, BITS_PER_UNIT);
28808
28809       emit_insn (gen_movmisalignv8qi (mem, reg));
28810     }
28811
28812   return true;
28813 }
28814
28815 /* Set a block of memory using vectorization instructions for the
28816    aligned case.  We fill the first LENGTH bytes of the memory area
28817    starting from DSTBASE with byte constant VALUE.  ALIGN is the
28818    alignment requirement of memory.  Return TRUE if succeeded.  */
28819 static bool
28820 arm_block_set_aligned_vect (rtx dstbase,
28821                             unsigned HOST_WIDE_INT length,
28822                             unsigned HOST_WIDE_INT value,
28823                             unsigned HOST_WIDE_INT align)
28824 {
28825   unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28826   rtx dst, addr, mem;
28827   rtx val_elt, val_vec, reg;
28828   rtx rval[MAX_VECT_LEN];
28829   machine_mode mode;
28830   unsigned HOST_WIDE_INT v = value;
28831
28832   gcc_assert ((align & 0x3) == 0);
28833   nelt_v8 = GET_MODE_NUNITS (V8QImode);
28834   nelt_v16 = GET_MODE_NUNITS (V16QImode);
28835   if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28836     mode = V16QImode;
28837   else
28838     mode = V8QImode;
28839
28840   nelt_mode = GET_MODE_NUNITS (mode);
28841   gcc_assert (length >= nelt_mode);
28842   /* Skip if it isn't profitable.  */
28843   if (!arm_block_set_vect_profit_p (length, align, mode))
28844     return false;
28845
28846   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28847
28848   v = sext_hwi (v, BITS_PER_WORD);
28849   val_elt = GEN_INT (v);
28850   for (j = 0; j < nelt_mode; j++)
28851     rval[j] = val_elt;
28852
28853   reg = gen_reg_rtx (mode);
28854   val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28855   /* Emit instruction loading the constant value.  */
28856   emit_move_insn (reg, val_vec);
28857
28858   i = 0;
28859   /* Handle first 16 bytes specially using vst1:v16qi instruction.  */
28860   if (mode == V16QImode)
28861     {
28862       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28863       emit_insn (gen_movmisalignv16qi (mem, reg));
28864       i += nelt_mode;
28865       /* Handle (8, 16) bytes leftover using vst1:v16qi again.  */
28866       if (i + nelt_v8 < length && i + nelt_v16 > length)
28867         {
28868           emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28869           mem = adjust_automodify_address (dstbase, mode, dst, 0);
28870           /* We are shifting bytes back, set the alignment accordingly.  */
28871           if ((length & 0x3) == 0)
28872             set_mem_align (mem, BITS_PER_UNIT * 4);
28873           else if ((length & 0x1) == 0)
28874             set_mem_align (mem, BITS_PER_UNIT * 2);
28875           else
28876             set_mem_align (mem, BITS_PER_UNIT);
28877
28878           emit_insn (gen_movmisalignv16qi (mem, reg));
28879           return true;
28880         }
28881       /* Fall through for bytes leftover.  */
28882       mode = V8QImode;
28883       nelt_mode = GET_MODE_NUNITS (mode);
28884       reg = gen_lowpart (V8QImode, reg);
28885     }
28886
28887   /* Handle 8 bytes in a vector.  */
28888   for (; (i + nelt_mode <= length); i += nelt_mode)
28889     {
28890       addr = plus_constant (Pmode, dst, i);
28891       mem = adjust_automodify_address (dstbase, mode, addr, i);
28892       emit_move_insn (mem, reg);
28893     }
28894
28895   /* Handle single word leftover by shifting 4 bytes back.  We can
28896      use aligned access for this case.  */
28897   if (i + UNITS_PER_WORD == length)
28898     {
28899       addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28900       mem = adjust_automodify_address (dstbase, mode,
28901                                        addr, i - UNITS_PER_WORD);
28902       /* We are shifting 4 bytes back, set the alignment accordingly.  */
28903       if (align > UNITS_PER_WORD)
28904         set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28905
28906       emit_move_insn (mem, reg);
28907     }
28908   /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28909      We have to use unaligned access for this case.  */
28910   else if (i < length)
28911     {
28912       emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28913       mem = adjust_automodify_address (dstbase, mode, dst, 0);
28914       /* We are shifting bytes back, set the alignment accordingly.  */
28915       if ((length & 1) == 0)
28916         set_mem_align (mem, BITS_PER_UNIT * 2);
28917       else
28918         set_mem_align (mem, BITS_PER_UNIT);
28919
28920       emit_insn (gen_movmisalignv8qi (mem, reg));
28921     }
28922
28923   return true;
28924 }
28925
28926 /* Set a block of memory using plain strh/strb instructions, only
28927    using instructions allowed by ALIGN on processor.  We fill the
28928    first LENGTH bytes of the memory area starting from DSTBASE
28929    with byte constant VALUE.  ALIGN is the alignment requirement
28930    of memory.  */
28931 static bool
28932 arm_block_set_unaligned_non_vect (rtx dstbase,
28933                                   unsigned HOST_WIDE_INT length,
28934                                   unsigned HOST_WIDE_INT value,
28935                                   unsigned HOST_WIDE_INT align)
28936 {
28937   unsigned int i;
28938   rtx dst, addr, mem;
28939   rtx val_exp, val_reg, reg;
28940   machine_mode mode;
28941   HOST_WIDE_INT v = value;
28942
28943   gcc_assert (align == 1 || align == 2);
28944
28945   if (align == 2)
28946     v |= (value << BITS_PER_UNIT);
28947
28948   v = sext_hwi (v, BITS_PER_WORD);
28949   val_exp = GEN_INT (v);
28950   /* Skip if it isn't profitable.  */
28951   if (!arm_block_set_non_vect_profit_p (val_exp, length,
28952                                         align, true, false))
28953     return false;
28954
28955   dst = copy_addr_to_reg (XEXP (dstbase, 0));
28956   mode = (align == 2 ? HImode : QImode);
28957   val_reg = force_reg (SImode, val_exp);
28958   reg = gen_lowpart (mode, val_reg);
28959
28960   for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28961     {
28962       addr = plus_constant (Pmode, dst, i);
28963       mem = adjust_automodify_address (dstbase, mode, addr, i);
28964       emit_move_insn (mem, reg);
28965     }
28966
28967   /* Handle single byte leftover.  */
28968   if (i + 1 == length)
28969     {
28970       reg = gen_lowpart (QImode, val_reg);
28971       addr = plus_constant (Pmode, dst, i);
28972       mem = adjust_automodify_address (dstbase, QImode, addr, i);
28973       emit_move_insn (mem, reg);
28974       i++;
28975     }
28976
28977   gcc_assert (i == length);
28978   return true;
28979 }
28980
28981 /* Set a block of memory using plain strd/str/strh/strb instructions,
28982    to permit unaligned copies on processors which support unaligned
28983    semantics for those instructions.  We fill the first LENGTH bytes
28984    of the memory area starting from DSTBASE with byte constant VALUE.
28985    ALIGN is the alignment requirement of memory.  */
28986 static bool
28987 arm_block_set_aligned_non_vect (rtx dstbase,
28988                                 unsigned HOST_WIDE_INT length,
28989                                 unsigned HOST_WIDE_INT value,
28990                                 unsigned HOST_WIDE_INT align)
28991 {
28992   unsigned int i;
28993   rtx dst, addr, mem;
28994   rtx val_exp, val_reg, reg;
28995   unsigned HOST_WIDE_INT v;
28996   bool use_strd_p;
28997
28998   use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28999                 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29000
29001   v = (value | (value << 8) | (value << 16) | (value << 24));
29002   if (length < UNITS_PER_WORD)
29003     v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29004
29005   if (use_strd_p)
29006     v |= (v << BITS_PER_WORD);
29007   else
29008     v = sext_hwi (v, BITS_PER_WORD);
29009
29010   val_exp = GEN_INT (v);
29011   /* Skip if it isn't profitable.  */
29012   if (!arm_block_set_non_vect_profit_p (val_exp, length,
29013                                         align, false, use_strd_p))
29014     {
29015       if (!use_strd_p)
29016         return false;
29017
29018       /* Try without strd.  */
29019       v = (v >> BITS_PER_WORD);
29020       v = sext_hwi (v, BITS_PER_WORD);
29021       val_exp = GEN_INT (v);
29022       use_strd_p = false;
29023       if (!arm_block_set_non_vect_profit_p (val_exp, length,
29024                                             align, false, use_strd_p))
29025         return false;
29026     }
29027
29028   i = 0;
29029   dst = copy_addr_to_reg (XEXP (dstbase, 0));
29030   /* Handle double words using strd if possible.  */
29031   if (use_strd_p)
29032     {
29033       val_reg = force_reg (DImode, val_exp);
29034       reg = val_reg;
29035       for (; (i + 8 <= length); i += 8)
29036         {
29037           addr = plus_constant (Pmode, dst, i);
29038           mem = adjust_automodify_address (dstbase, DImode, addr, i);
29039           emit_move_insn (mem, reg);
29040         }
29041     }
29042   else
29043     val_reg = force_reg (SImode, val_exp);
29044
29045   /* Handle words.  */
29046   reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29047   for (; (i + 4 <= length); i += 4)
29048     {
29049       addr = plus_constant (Pmode, dst, i);
29050       mem = adjust_automodify_address (dstbase, SImode, addr, i);
29051       if ((align & 3) == 0)
29052         emit_move_insn (mem, reg);
29053       else
29054         emit_insn (gen_unaligned_storesi (mem, reg));
29055     }
29056
29057   /* Merge last pair of STRH and STRB into a STR if possible.  */
29058   if (unaligned_access && i > 0 && (i + 3) == length)
29059     {
29060       addr = plus_constant (Pmode, dst, i - 1);
29061       mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29062       /* We are shifting one byte back, set the alignment accordingly.  */
29063       if ((align & 1) == 0)
29064         set_mem_align (mem, BITS_PER_UNIT);
29065
29066       /* Most likely this is an unaligned access, and we can't tell at
29067          compilation time.  */
29068       emit_insn (gen_unaligned_storesi (mem, reg));
29069       return true;
29070     }
29071
29072   /* Handle half word leftover.  */
29073   if (i + 2 <= length)
29074     {
29075       reg = gen_lowpart (HImode, val_reg);
29076       addr = plus_constant (Pmode, dst, i);
29077       mem = adjust_automodify_address (dstbase, HImode, addr, i);
29078       if ((align & 1) == 0)
29079         emit_move_insn (mem, reg);
29080       else
29081         emit_insn (gen_unaligned_storehi (mem, reg));
29082
29083       i += 2;
29084     }
29085
29086   /* Handle single byte leftover.  */
29087   if (i + 1 == length)
29088     {
29089       reg = gen_lowpart (QImode, val_reg);
29090       addr = plus_constant (Pmode, dst, i);
29091       mem = adjust_automodify_address (dstbase, QImode, addr, i);
29092       emit_move_insn (mem, reg);
29093     }
29094
29095   return true;
29096 }
29097
29098 /* Set a block of memory using vectorization instructions for both
29099    aligned and unaligned cases.  We fill the first LENGTH bytes of
29100    the memory area starting from DSTBASE with byte constant VALUE.
29101    ALIGN is the alignment requirement of memory.  */
29102 static bool
29103 arm_block_set_vect (rtx dstbase,
29104                     unsigned HOST_WIDE_INT length,
29105                     unsigned HOST_WIDE_INT value,
29106                     unsigned HOST_WIDE_INT align)
29107 {
29108   /* Check whether we need to use unaligned store instruction.  */
29109   if (((align & 3) != 0 || (length & 3) != 0)
29110       /* Check whether unaligned store instruction is available.  */
29111       && (!unaligned_access || BYTES_BIG_ENDIAN))
29112     return false;
29113
29114   if ((align & 3) == 0)
29115     return arm_block_set_aligned_vect (dstbase, length, value, align);
29116   else
29117     return arm_block_set_unaligned_vect (dstbase, length, value, align);
29118 }
29119
29120 /* Expand string store operation.  Firstly we try to do that by using
29121    vectorization instructions, then try with ARM unaligned access and
29122    double-word store if profitable.  OPERANDS[0] is the destination,
29123    OPERANDS[1] is the number of bytes, operands[2] is the value to
29124    initialize the memory, OPERANDS[3] is the known alignment of the
29125    destination.  */
29126 bool
29127 arm_gen_setmem (rtx *operands)
29128 {
29129   rtx dstbase = operands[0];
29130   unsigned HOST_WIDE_INT length;
29131   unsigned HOST_WIDE_INT value;
29132   unsigned HOST_WIDE_INT align;
29133
29134   if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29135     return false;
29136
29137   length = UINTVAL (operands[1]);
29138   if (length > 64)
29139     return false;
29140
29141   value = (UINTVAL (operands[2]) & 0xFF);
29142   align = UINTVAL (operands[3]);
29143   if (TARGET_NEON && length >= 8
29144       && current_tune->string_ops_prefer_neon
29145       && arm_block_set_vect (dstbase, length, value, align))
29146     return true;
29147
29148   if (!unaligned_access && (align & 3) != 0)
29149     return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29150
29151   return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29152 }
29153
29154 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
29155
29156 static unsigned HOST_WIDE_INT
29157 arm_asan_shadow_offset (void)
29158 {
29159   return (unsigned HOST_WIDE_INT) 1 << 29;
29160 }
29161
29162
29163 /* This is a temporary fix for PR60655.  Ideally we need
29164    to handle most of these cases in the generic part but
29165    currently we reject minus (..) (sym_ref).  We try to
29166    ameliorate the case with minus (sym_ref1) (sym_ref2)
29167    where they are in the same section.  */
29168
29169 static bool
29170 arm_const_not_ok_for_debug_p (rtx p)
29171 {
29172   tree decl_op0 = NULL;
29173   tree decl_op1 = NULL;
29174
29175   if (GET_CODE (p) == MINUS)
29176     {
29177       if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29178         {
29179           decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29180           if (decl_op1
29181               && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29182               && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29183             {
29184               if ((TREE_CODE (decl_op1) == VAR_DECL
29185                    || TREE_CODE (decl_op1) == CONST_DECL)
29186                   && (TREE_CODE (decl_op0) == VAR_DECL
29187                       || TREE_CODE (decl_op0) == CONST_DECL))
29188                 return (get_variable_section (decl_op1, false)
29189                         != get_variable_section (decl_op0, false));
29190
29191               if (TREE_CODE (decl_op1) == LABEL_DECL
29192                   && TREE_CODE (decl_op0) == LABEL_DECL)
29193                 return (DECL_CONTEXT (decl_op1)
29194                         != DECL_CONTEXT (decl_op0));
29195             }
29196
29197           return true;
29198         }
29199     }
29200
29201   return false;
29202 }
29203
29204 /* return TRUE if x is a reference to a value in a constant pool */
29205 extern bool
29206 arm_is_constant_pool_ref (rtx x)
29207 {
29208   return (MEM_P (x)
29209           && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29210           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29211 }
29212
29213 /* If MEM is in the form of [base+offset], extract the two parts
29214    of address and set to BASE and OFFSET, otherwise return false
29215    after clearing BASE and OFFSET.  */
29216
29217 static bool
29218 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29219 {
29220   rtx addr;
29221
29222   gcc_assert (MEM_P (mem));
29223
29224   addr = XEXP (mem, 0);
29225
29226   /* Strip off const from addresses like (const (addr)).  */
29227   if (GET_CODE (addr) == CONST)
29228     addr = XEXP (addr, 0);
29229
29230   if (GET_CODE (addr) == REG)
29231     {
29232       *base = addr;
29233       *offset = const0_rtx;
29234       return true;
29235     }
29236
29237   if (GET_CODE (addr) == PLUS
29238       && GET_CODE (XEXP (addr, 0)) == REG
29239       && CONST_INT_P (XEXP (addr, 1)))
29240     {
29241       *base = XEXP (addr, 0);
29242       *offset = XEXP (addr, 1);
29243       return true;
29244     }
29245
29246   *base = NULL_RTX;
29247   *offset = NULL_RTX;
29248
29249   return false;
29250 }
29251
29252 /* If INSN is a load or store of address in the form of [base+offset],
29253    extract the two parts and set to BASE and OFFSET.  IS_LOAD is set
29254    to TRUE if it's a load.  Return TRUE if INSN is such an instruction,
29255    otherwise return FALSE.  */
29256
29257 static bool
29258 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29259 {
29260   rtx x, dest, src;
29261
29262   gcc_assert (INSN_P (insn));
29263   x = PATTERN (insn);
29264   if (GET_CODE (x) != SET)
29265     return false;
29266
29267   src = SET_SRC (x);
29268   dest = SET_DEST (x);
29269   if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29270     {
29271       *is_load = false;
29272       extract_base_offset_in_addr (dest, base, offset);
29273     }
29274   else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29275     {
29276       *is_load = true;
29277       extract_base_offset_in_addr (src, base, offset);
29278     }
29279   else
29280     return false;
29281
29282   return (*base != NULL_RTX && *offset != NULL_RTX);
29283 }
29284
29285 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29286
29287    Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29288    and PRI are only calculated for these instructions.  For other instruction,
29289    FUSION_PRI and PRI are simply set to MAX_PRI.  In the future, other kind
29290    instruction fusion can be supported by returning different priorities.
29291
29292    It's important that irrelevant instructions get the largest FUSION_PRI.  */
29293
29294 static void
29295 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29296                            int *fusion_pri, int *pri)
29297 {
29298   int tmp, off_val;
29299   bool is_load;
29300   rtx base, offset;
29301
29302   gcc_assert (INSN_P (insn));
29303
29304   tmp = max_pri - 1;
29305   if (!fusion_load_store (insn, &base, &offset, &is_load))
29306     {
29307       *pri = tmp;
29308       *fusion_pri = tmp;
29309       return;
29310     }
29311
29312   /* Load goes first.  */
29313   if (is_load)
29314     *fusion_pri = tmp - 1;
29315   else
29316     *fusion_pri = tmp - 2;
29317
29318   tmp /= 2;
29319
29320   /* INSN with smaller base register goes first.  */
29321   tmp -= ((REGNO (base) & 0xff) << 20);
29322
29323   /* INSN with smaller offset goes first.  */
29324   off_val = (int)(INTVAL (offset));
29325   if (off_val >= 0)
29326     tmp -= (off_val & 0xfffff);
29327   else
29328     tmp += ((- off_val) & 0xfffff);
29329
29330   *pri = tmp;
29331   return;
29332 }
29333 #include "gt-arm.h"