1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
44 #include "dominance.h"
50 #include "cfgcleanup.h"
52 #include "basic-block.h"
54 #include "hard-reg-set.h"
59 #include "statistics.h"
61 #include "fixed-value.h"
62 #include "insn-config.h"
72 #include "target-def.h"
73 #include "targhooks.h"
77 #include "langhooks.h"
78 #include "diagnostic-core.h"
79 #include "hash-table.h"
80 #include "tree-ssa-alias.h"
81 #include "internal-fn.h"
82 #include "gimple-fold.h"
84 #include "gimple-expr.h"
91 #include "tree-vectorizer.h"
92 #include "aarch64-cost-tables.h"
96 #include "tm-constrs.h"
98 /* Defined for convenience. */
99 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
101 /* Classifies an address.
104 A simple base register plus immediate offset.
107 A base register indexed by immediate offset with writeback.
110 A base register indexed by (optionally scaled) register.
113 A base register indexed by (optionally scaled) zero-extended register.
116 A base register indexed by (optionally scaled) sign-extended register.
119 A LO_SUM rtx with a base register and "LO12" symbol relocation.
122 A constant symbolic address, in pc-relative literal pool. */
124 enum aarch64_address_type
{
134 struct aarch64_address_info
{
135 enum aarch64_address_type type
;
139 enum aarch64_symbol_type symbol_type
;
142 struct simd_immediate_info
151 /* The current code model. */
152 enum aarch64_code_model aarch64_cmodel
;
155 #undef TARGET_HAVE_TLS
156 #define TARGET_HAVE_TLS 1
159 static bool aarch64_lra_p (void);
160 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
161 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
163 machine_mode
*, int *,
165 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
166 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
167 static void aarch64_override_options_after_change (void);
168 static bool aarch64_vector_mode_supported_p (machine_mode
);
169 static unsigned bit_count (unsigned HOST_WIDE_INT
);
170 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
171 const unsigned char *sel
);
172 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
174 /* Major revision number of the ARM Architecture implemented by the target. */
175 unsigned aarch64_architecture_version
;
177 /* The processor for which instructions should be scheduled. */
178 enum aarch64_processor aarch64_tune
= cortexa53
;
180 /* The current tuning set. */
181 const struct tune_params
*aarch64_tune_params
;
183 /* Mask to specify which instructions we are allowed to generate. */
184 unsigned long aarch64_isa_flags
= 0;
186 /* Mask to specify which instruction scheduling options should be used. */
187 unsigned long aarch64_tune_flags
= 0;
189 /* Tuning parameters. */
191 #if HAVE_DESIGNATED_INITIALIZERS
192 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
194 #define NAMED_PARAM(NAME, VAL) (VAL)
197 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
201 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
204 static const struct cpu_addrcost_table generic_addrcost_table
=
206 #if HAVE_DESIGNATED_INITIALIZERS
215 NAMED_PARAM (pre_modify
, 0),
216 NAMED_PARAM (post_modify
, 0),
217 NAMED_PARAM (register_offset
, 0),
218 NAMED_PARAM (register_extend
, 0),
219 NAMED_PARAM (imm_offset
, 0)
222 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
225 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
227 #if HAVE_DESIGNATED_INITIALIZERS
236 NAMED_PARAM (pre_modify
, 0),
237 NAMED_PARAM (post_modify
, 0),
238 NAMED_PARAM (register_offset
, 0),
239 NAMED_PARAM (register_extend
, 0),
240 NAMED_PARAM (imm_offset
, 0),
243 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
246 static const struct cpu_regmove_cost generic_regmove_cost
=
248 NAMED_PARAM (GP2GP
, 1),
249 /* Avoid the use of slow int<->fp moves for spilling by setting
250 their cost higher than memmov_cost. */
251 NAMED_PARAM (GP2FP
, 5),
252 NAMED_PARAM (FP2GP
, 5),
253 NAMED_PARAM (FP2FP
, 2)
256 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
258 NAMED_PARAM (GP2GP
, 1),
259 /* Avoid the use of slow int<->fp moves for spilling by setting
260 their cost higher than memmov_cost. */
261 NAMED_PARAM (GP2FP
, 5),
262 NAMED_PARAM (FP2GP
, 5),
263 NAMED_PARAM (FP2FP
, 2)
266 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
268 NAMED_PARAM (GP2GP
, 1),
269 /* Avoid the use of slow int<->fp moves for spilling by setting
270 their cost higher than memmov_cost. */
271 NAMED_PARAM (GP2FP
, 5),
272 NAMED_PARAM (FP2GP
, 5),
273 NAMED_PARAM (FP2FP
, 2)
276 static const struct cpu_regmove_cost thunderx_regmove_cost
=
278 NAMED_PARAM (GP2GP
, 2),
279 NAMED_PARAM (GP2FP
, 2),
280 NAMED_PARAM (FP2GP
, 6),
281 NAMED_PARAM (FP2FP
, 4)
284 /* Generic costs for vector insn classes. */
285 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
288 static const struct cpu_vector_cost generic_vector_cost
=
290 NAMED_PARAM (scalar_stmt_cost
, 1),
291 NAMED_PARAM (scalar_load_cost
, 1),
292 NAMED_PARAM (scalar_store_cost
, 1),
293 NAMED_PARAM (vec_stmt_cost
, 1),
294 NAMED_PARAM (vec_to_scalar_cost
, 1),
295 NAMED_PARAM (scalar_to_vec_cost
, 1),
296 NAMED_PARAM (vec_align_load_cost
, 1),
297 NAMED_PARAM (vec_unalign_load_cost
, 1),
298 NAMED_PARAM (vec_unalign_store_cost
, 1),
299 NAMED_PARAM (vec_store_cost
, 1),
300 NAMED_PARAM (cond_taken_branch_cost
, 3),
301 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
304 /* Generic costs for vector insn classes. */
305 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
308 static const struct cpu_vector_cost cortexa57_vector_cost
=
310 NAMED_PARAM (scalar_stmt_cost
, 1),
311 NAMED_PARAM (scalar_load_cost
, 4),
312 NAMED_PARAM (scalar_store_cost
, 1),
313 NAMED_PARAM (vec_stmt_cost
, 3),
314 NAMED_PARAM (vec_to_scalar_cost
, 8),
315 NAMED_PARAM (scalar_to_vec_cost
, 8),
316 NAMED_PARAM (vec_align_load_cost
, 5),
317 NAMED_PARAM (vec_unalign_load_cost
, 5),
318 NAMED_PARAM (vec_unalign_store_cost
, 1),
319 NAMED_PARAM (vec_store_cost
, 1),
320 NAMED_PARAM (cond_taken_branch_cost
, 1),
321 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
324 #define AARCH64_FUSE_NOTHING (0)
325 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
326 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
327 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
328 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
329 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
331 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
334 static const struct tune_params generic_tunings
=
336 &cortexa57_extra_costs
,
337 &generic_addrcost_table
,
338 &generic_regmove_cost
,
339 &generic_vector_cost
,
340 NAMED_PARAM (memmov_cost
, 4),
341 NAMED_PARAM (issue_rate
, 2),
342 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_NOTHING
),
343 8, /* function_align. */
346 2, /* int_reassoc_width. */
347 4, /* fp_reassoc_width. */
348 1 /* vec_reassoc_width. */
351 static const struct tune_params cortexa53_tunings
=
353 &cortexa53_extra_costs
,
354 &generic_addrcost_table
,
355 &cortexa53_regmove_cost
,
356 &generic_vector_cost
,
357 NAMED_PARAM (memmov_cost
, 4),
358 NAMED_PARAM (issue_rate
, 2),
359 NAMED_PARAM (fuseable_ops
, (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
360 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
)),
361 8, /* function_align. */
364 2, /* int_reassoc_width. */
365 4, /* fp_reassoc_width. */
366 1 /* vec_reassoc_width. */
369 static const struct tune_params cortexa57_tunings
=
371 &cortexa57_extra_costs
,
372 &cortexa57_addrcost_table
,
373 &cortexa57_regmove_cost
,
374 &cortexa57_vector_cost
,
375 NAMED_PARAM (memmov_cost
, 4),
376 NAMED_PARAM (issue_rate
, 3),
377 NAMED_PARAM (fuseable_ops
, (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK
)),
378 16, /* function_align. */
381 2, /* int_reassoc_width. */
382 4, /* fp_reassoc_width. */
383 1 /* vec_reassoc_width. */
386 static const struct tune_params thunderx_tunings
=
388 &thunderx_extra_costs
,
389 &generic_addrcost_table
,
390 &thunderx_regmove_cost
,
391 &generic_vector_cost
,
392 NAMED_PARAM (memmov_cost
, 6),
393 NAMED_PARAM (issue_rate
, 2),
394 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_CMP_BRANCH
),
395 8, /* function_align. */
398 2, /* int_reassoc_width. */
399 4, /* fp_reassoc_width. */
400 1 /* vec_reassoc_width. */
403 /* A processor implementing AArch64. */
406 const char *const name
;
407 enum aarch64_processor core
;
409 unsigned architecture_version
;
410 const unsigned long flags
;
411 const struct tune_params
*const tune
;
414 /* Processor cores implementing AArch64. */
415 static const struct processor all_cores
[] =
417 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
418 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
419 #include "aarch64-cores.def"
421 {"generic", cortexa53
, "8", 8, AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
422 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
425 /* Architectures implementing AArch64. */
426 static const struct processor all_architectures
[] =
428 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
429 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
430 #include "aarch64-arches.def"
432 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
435 /* Target specification. These are populated as commandline arguments
436 are processed, or NULL if not specified. */
437 static const struct processor
*selected_arch
;
438 static const struct processor
*selected_cpu
;
439 static const struct processor
*selected_tune
;
441 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
443 /* An ISA extension in the co-processor and main instruction set space. */
444 struct aarch64_option_extension
446 const char *const name
;
447 const unsigned long flags_on
;
448 const unsigned long flags_off
;
451 /* ISA extensions in AArch64. */
452 static const struct aarch64_option_extension all_extensions
[] =
454 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
455 {NAME, FLAGS_ON, FLAGS_OFF},
456 #include "aarch64-option-extensions.def"
457 #undef AARCH64_OPT_EXTENSION
461 /* Used to track the size of an address when generating a pre/post
462 increment address. */
463 static machine_mode aarch64_memory_reference_mode
;
465 /* Used to force GTY into this file. */
466 static GTY(()) int gty_dummy
;
468 /* A table of valid AArch64 "bitmask immediate" values for
469 logical instructions. */
471 #define AARCH64_NUM_BITMASKS 5334
472 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
474 typedef enum aarch64_cond_code
476 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
477 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
478 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
482 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
484 /* The condition codes of the processor, and the inverse function. */
485 static const char * const aarch64_condition_codes
[] =
487 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
488 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
492 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED
)
498 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
499 enum machine_mode mode
)
501 if (VECTOR_MODE_P (mode
))
502 return aarch64_tune_params
->vec_reassoc_width
;
503 if (INTEGRAL_MODE_P (mode
))
504 return aarch64_tune_params
->int_reassoc_width
;
505 if (FLOAT_MODE_P (mode
))
506 return aarch64_tune_params
->fp_reassoc_width
;
510 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
512 aarch64_dbx_register_number (unsigned regno
)
514 if (GP_REGNUM_P (regno
))
515 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
516 else if (regno
== SP_REGNUM
)
517 return AARCH64_DWARF_SP
;
518 else if (FP_REGNUM_P (regno
))
519 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
521 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
522 equivalent DWARF register. */
523 return DWARF_FRAME_REGISTERS
;
526 /* Return TRUE if MODE is any of the large INT modes. */
528 aarch64_vect_struct_mode_p (machine_mode mode
)
530 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
533 /* Return TRUE if MODE is any of the vector modes. */
535 aarch64_vector_mode_p (machine_mode mode
)
537 return aarch64_vector_mode_supported_p (mode
)
538 || aarch64_vect_struct_mode_p (mode
);
541 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
543 aarch64_array_mode_supported_p (machine_mode mode
,
544 unsigned HOST_WIDE_INT nelems
)
547 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
548 && (nelems
>= 2 && nelems
<= 4))
554 /* Implement HARD_REGNO_NREGS. */
557 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
559 switch (aarch64_regno_regclass (regno
))
563 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
565 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
570 /* Implement HARD_REGNO_MODE_OK. */
573 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
575 if (GET_MODE_CLASS (mode
) == MODE_CC
)
576 return regno
== CC_REGNUM
;
578 if (regno
== SP_REGNUM
)
579 /* The purpose of comparing with ptr_mode is to support the
580 global register variable associated with the stack pointer
581 register via the syntax of asm ("wsp") in ILP32. */
582 return mode
== Pmode
|| mode
== ptr_mode
;
584 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
585 return mode
== Pmode
;
587 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
590 if (FP_REGNUM_P (regno
))
592 if (aarch64_vect_struct_mode_p (mode
))
594 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
602 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
604 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
607 /* Handle modes that fit within single registers. */
608 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
610 if (GET_MODE_SIZE (mode
) >= 4)
615 /* Fall back to generic for multi-reg and very large modes. */
617 return choose_hard_reg_mode (regno
, nregs
, false);
620 /* Return true if calls to DECL should be treated as
621 long-calls (ie called via a register). */
623 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
628 /* Return true if calls to symbol-ref SYM should be treated as
629 long-calls (ie called via a register). */
631 aarch64_is_long_call_p (rtx sym
)
633 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
636 /* Return true if the offsets to a zero/sign-extract operation
637 represent an expression that matches an extend operation. The
638 operands represent the paramters from
640 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
642 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
645 HOST_WIDE_INT mult_val
, extract_val
;
647 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
650 mult_val
= INTVAL (mult_imm
);
651 extract_val
= INTVAL (extract_imm
);
654 && extract_val
< GET_MODE_BITSIZE (mode
)
655 && exact_log2 (extract_val
& ~7) > 0
656 && (extract_val
& 7) <= 4
657 && mult_val
== (1 << (extract_val
& 7)))
663 /* Emit an insn that's a simple single-set. Both the operands must be
664 known to be valid. */
666 emit_set_insn (rtx x
, rtx y
)
668 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
671 /* X and Y are two things to compare using CODE. Emit the compare insn and
672 return the rtx for register 0 in the proper mode. */
674 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
676 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
677 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
679 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
683 /* Build the SYMBOL_REF for __tls_get_addr. */
685 static GTY(()) rtx tls_get_addr_libfunc
;
688 aarch64_tls_get_addr (void)
690 if (!tls_get_addr_libfunc
)
691 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
692 return tls_get_addr_libfunc
;
695 /* Return the TLS model to use for ADDR. */
697 static enum tls_model
698 tls_symbolic_operand_type (rtx addr
)
700 enum tls_model tls_kind
= TLS_MODEL_NONE
;
703 if (GET_CODE (addr
) == CONST
)
705 split_const (addr
, &sym
, &addend
);
706 if (GET_CODE (sym
) == SYMBOL_REF
)
707 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
709 else if (GET_CODE (addr
) == SYMBOL_REF
)
710 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
715 /* We'll allow lo_sum's in addresses in our legitimate addresses
716 so that combine would take care of combining addresses where
717 necessary, but for generation purposes, we'll generate the address
720 tmp = hi (symbol_ref); adrp x1, foo
721 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
725 adrp x1, :got:foo adrp tmp, :tlsgd:foo
726 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
730 Load TLS symbol, depending on TLS mechanism and TLS access model.
732 Global Dynamic - Traditional TLS:
734 add dest, tmp, #:tlsgd_lo12:imm
737 Global Dynamic - TLS Descriptors:
738 adrp dest, :tlsdesc:imm
739 ldr tmp, [dest, #:tlsdesc_lo12:imm]
740 add dest, dest, #:tlsdesc_lo12:imm
747 adrp tmp, :gottprel:imm
748 ldr dest, [tmp, #:gottprel_lo12:imm]
753 add t0, tp, #:tprel_hi12:imm
754 add t0, #:tprel_lo12_nc:imm
758 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
759 enum aarch64_symbol_type type
)
763 case SYMBOL_SMALL_ABSOLUTE
:
765 /* In ILP32, the mode of dest can be either SImode or DImode. */
767 machine_mode mode
= GET_MODE (dest
);
769 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
771 if (can_create_pseudo_p ())
772 tmp_reg
= gen_reg_rtx (mode
);
774 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
775 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
779 case SYMBOL_TINY_ABSOLUTE
:
780 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
783 case SYMBOL_SMALL_GOT
:
785 /* In ILP32, the mode of dest can be either SImode or DImode,
786 while the got entry is always of SImode size. The mode of
787 dest depends on how dest is used: if dest is assigned to a
788 pointer (e.g. in the memory), it has SImode; it may have
789 DImode if dest is dereferenced to access the memeory.
790 This is why we have to handle three different ldr_got_small
791 patterns here (two patterns for ILP32). */
793 machine_mode mode
= GET_MODE (dest
);
795 if (can_create_pseudo_p ())
796 tmp_reg
= gen_reg_rtx (mode
);
798 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
799 if (mode
== ptr_mode
)
802 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
804 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
808 gcc_assert (mode
== Pmode
);
809 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
815 case SYMBOL_SMALL_TLSGD
:
818 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
821 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
822 insns
= get_insns ();
825 RTL_CONST_CALL_P (insns
) = 1;
826 emit_libcall_block (insns
, dest
, result
, imm
);
830 case SYMBOL_SMALL_TLSDESC
:
832 machine_mode mode
= GET_MODE (dest
);
833 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
836 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
838 /* In ILP32, the got entry is always of SImode size. Unlike
839 small GOT, the dest is fixed at reg 0. */
841 emit_insn (gen_tlsdesc_small_si (imm
));
843 emit_insn (gen_tlsdesc_small_di (imm
));
844 tp
= aarch64_load_tp (NULL
);
847 tp
= gen_lowpart (mode
, tp
);
849 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
850 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
854 case SYMBOL_SMALL_GOTTPREL
:
856 /* In ILP32, the mode of dest can be either SImode or DImode,
857 while the got entry is always of SImode size. The mode of
858 dest depends on how dest is used: if dest is assigned to a
859 pointer (e.g. in the memory), it has SImode; it may have
860 DImode if dest is dereferenced to access the memeory.
861 This is why we have to handle three different tlsie_small
862 patterns here (two patterns for ILP32). */
863 machine_mode mode
= GET_MODE (dest
);
864 rtx tmp_reg
= gen_reg_rtx (mode
);
865 rtx tp
= aarch64_load_tp (NULL
);
867 if (mode
== ptr_mode
)
870 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
873 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
874 tp
= gen_lowpart (mode
, tp
);
879 gcc_assert (mode
== Pmode
);
880 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
883 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
884 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
888 case SYMBOL_SMALL_TPREL
:
890 rtx tp
= aarch64_load_tp (NULL
);
891 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
892 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
896 case SYMBOL_TINY_GOT
:
897 emit_insn (gen_ldr_got_tiny (dest
, imm
));
905 /* Emit a move from SRC to DEST. Assume that the move expanders can
906 handle all moves if !can_create_pseudo_p (). The distinction is
907 important because, unlike emit_move_insn, the move expanders know
908 how to force Pmode objects into the constant pool even when the
909 constant pool address is not itself legitimate. */
911 aarch64_emit_move (rtx dest
, rtx src
)
913 return (can_create_pseudo_p ()
914 ? emit_move_insn (dest
, src
)
915 : emit_move_insn_1 (dest
, src
));
918 /* Split a 128-bit move operation into two 64-bit move operations,
919 taking care to handle partial overlap of register to register
920 copies. Special cases are needed when moving between GP regs and
921 FP regs. SRC can be a register, constant or memory; DST a register
922 or memory. If either operand is memory it must not have any side
925 aarch64_split_128bit_move (rtx dst
, rtx src
)
930 machine_mode mode
= GET_MODE (dst
);
932 gcc_assert (mode
== TImode
|| mode
== TFmode
);
933 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
934 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
936 if (REG_P (dst
) && REG_P (src
))
938 int src_regno
= REGNO (src
);
939 int dst_regno
= REGNO (dst
);
941 /* Handle FP <-> GP regs. */
942 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
944 src_lo
= gen_lowpart (word_mode
, src
);
945 src_hi
= gen_highpart (word_mode
, src
);
949 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
950 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
954 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
955 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
959 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
961 dst_lo
= gen_lowpart (word_mode
, dst
);
962 dst_hi
= gen_highpart (word_mode
, dst
);
966 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
967 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
971 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
972 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
978 dst_lo
= gen_lowpart (word_mode
, dst
);
979 dst_hi
= gen_highpart (word_mode
, dst
);
980 src_lo
= gen_lowpart (word_mode
, src
);
981 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
983 /* At most one pairing may overlap. */
984 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
986 aarch64_emit_move (dst_hi
, src_hi
);
987 aarch64_emit_move (dst_lo
, src_lo
);
991 aarch64_emit_move (dst_lo
, src_lo
);
992 aarch64_emit_move (dst_hi
, src_hi
);
997 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
999 return (! REG_P (src
)
1000 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1003 /* Split a complex SIMD combine. */
1006 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1008 machine_mode src_mode
= GET_MODE (src1
);
1009 machine_mode dst_mode
= GET_MODE (dst
);
1011 gcc_assert (VECTOR_MODE_P (dst_mode
));
1013 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1015 rtx (*gen
) (rtx
, rtx
, rtx
);
1020 gen
= gen_aarch64_simd_combinev8qi
;
1023 gen
= gen_aarch64_simd_combinev4hi
;
1026 gen
= gen_aarch64_simd_combinev2si
;
1029 gen
= gen_aarch64_simd_combinev2sf
;
1032 gen
= gen_aarch64_simd_combinedi
;
1035 gen
= gen_aarch64_simd_combinedf
;
1041 emit_insn (gen (dst
, src1
, src2
));
1046 /* Split a complex SIMD move. */
1049 aarch64_split_simd_move (rtx dst
, rtx src
)
1051 machine_mode src_mode
= GET_MODE (src
);
1052 machine_mode dst_mode
= GET_MODE (dst
);
1054 gcc_assert (VECTOR_MODE_P (dst_mode
));
1056 if (REG_P (dst
) && REG_P (src
))
1058 rtx (*gen
) (rtx
, rtx
);
1060 gcc_assert (VECTOR_MODE_P (src_mode
));
1065 gen
= gen_aarch64_split_simd_movv16qi
;
1068 gen
= gen_aarch64_split_simd_movv8hi
;
1071 gen
= gen_aarch64_split_simd_movv4si
;
1074 gen
= gen_aarch64_split_simd_movv2di
;
1077 gen
= gen_aarch64_split_simd_movv4sf
;
1080 gen
= gen_aarch64_split_simd_movv2df
;
1086 emit_insn (gen (dst
, src
));
1092 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1094 if (can_create_pseudo_p ())
1095 return force_reg (mode
, value
);
1098 x
= aarch64_emit_move (x
, value
);
1105 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1107 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1110 /* Load the full offset into a register. This
1111 might be improvable in the future. */
1112 high
= GEN_INT (offset
);
1114 high
= aarch64_force_temporary (mode
, temp
, high
);
1115 reg
= aarch64_force_temporary (mode
, temp
,
1116 gen_rtx_PLUS (mode
, high
, reg
));
1118 return plus_constant (mode
, reg
, offset
);
1122 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1125 unsigned HOST_WIDE_INT mask
;
1128 unsigned HOST_WIDE_INT val
;
1131 int one_match
, zero_match
, first_not_ffff_match
;
1134 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1137 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1144 /* We know we can't do this in 1 insn, and we must be able to do it
1145 in two; so don't mess around looking for sequences that don't buy
1149 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1150 GEN_INT (INTVAL (imm
) & 0xffff)));
1151 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1152 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1158 /* Remaining cases are all for DImode. */
1161 subtargets
= optimize
&& can_create_pseudo_p ();
1166 first_not_ffff_match
= -1;
1168 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1170 if ((val
& mask
) == mask
)
1174 if (first_not_ffff_match
< 0)
1175 first_not_ffff_match
= i
;
1176 if ((val
& mask
) == 0)
1183 /* Set one of the quarters and then insert back into result. */
1184 mask
= 0xffffll
<< first_not_ffff_match
;
1187 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1188 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1189 GEN_INT ((val
>> first_not_ffff_match
)
1196 if (zero_match
== 2)
1197 goto simple_sequence
;
1199 mask
= 0x0ffff0000UL
;
1200 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1202 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1204 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1208 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1209 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1210 GEN_INT (val
& mask
)));
1211 emit_insn (gen_adddi3 (dest
, subtarget
,
1212 GEN_INT (val
- (val
& mask
))));
1217 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1221 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1222 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1223 GEN_INT ((val
+ comp
) & mask
)));
1224 emit_insn (gen_adddi3 (dest
, subtarget
,
1225 GEN_INT (val
- ((val
+ comp
) & mask
))));
1230 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1234 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1235 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1236 GEN_INT ((val
- comp
) | ~mask
)));
1237 emit_insn (gen_adddi3 (dest
, subtarget
,
1238 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1243 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1247 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1248 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1249 GEN_INT (val
| ~mask
)));
1250 emit_insn (gen_adddi3 (dest
, subtarget
,
1251 GEN_INT (val
- (val
| ~mask
))));
1258 /* See if we can do it by arithmetically combining two
1260 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1265 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1266 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1270 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1271 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1272 GEN_INT (aarch64_bitmasks
[i
])));
1273 emit_insn (gen_adddi3 (dest
, subtarget
,
1274 GEN_INT (val
- aarch64_bitmasks
[i
])));
1280 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1282 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1286 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1287 GEN_INT (aarch64_bitmasks
[i
])));
1288 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1289 GEN_INT ((val
>> j
) & 0xffff)));
1297 /* See if we can do it by logically combining two immediates. */
1298 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1300 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1304 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1305 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1309 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1310 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1311 GEN_INT (aarch64_bitmasks
[i
])));
1312 emit_insn (gen_iordi3 (dest
, subtarget
,
1313 GEN_INT (aarch64_bitmasks
[j
])));
1319 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1323 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1324 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1328 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1329 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1330 GEN_INT (aarch64_bitmasks
[j
])));
1331 emit_insn (gen_anddi3 (dest
, subtarget
,
1332 GEN_INT (aarch64_bitmasks
[i
])));
1340 if (one_match
> zero_match
)
1342 /* Set either first three quarters or all but the third. */
1343 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1345 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1346 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1349 /* Now insert other two quarters. */
1350 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1351 i
< 64; i
+= 16, mask
<<= 16)
1353 if ((val
& mask
) != mask
)
1356 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1357 GEN_INT ((val
>> i
) & 0xffff)));
1367 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1369 if ((val
& mask
) != 0)
1374 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1375 GEN_INT (val
& mask
)));
1382 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1383 GEN_INT ((val
>> i
) & 0xffff)));
1394 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1396 machine_mode mode
= GET_MODE (dest
);
1398 gcc_assert (mode
== SImode
|| mode
== DImode
);
1400 /* Check on what type of symbol it is. */
1401 if (GET_CODE (imm
) == SYMBOL_REF
1402 || GET_CODE (imm
) == LABEL_REF
1403 || GET_CODE (imm
) == CONST
)
1405 rtx mem
, base
, offset
;
1406 enum aarch64_symbol_type sty
;
1408 /* If we have (const (plus symbol offset)), separate out the offset
1409 before we start classifying the symbol. */
1410 split_const (imm
, &base
, &offset
);
1412 sty
= aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
);
1415 case SYMBOL_FORCE_TO_MEM
:
1416 if (offset
!= const0_rtx
1417 && targetm
.cannot_force_const_mem (mode
, imm
))
1419 gcc_assert (can_create_pseudo_p ());
1420 base
= aarch64_force_temporary (mode
, dest
, base
);
1421 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1422 aarch64_emit_move (dest
, base
);
1425 mem
= force_const_mem (ptr_mode
, imm
);
1427 if (mode
!= ptr_mode
)
1428 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1429 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1432 case SYMBOL_SMALL_TLSGD
:
1433 case SYMBOL_SMALL_TLSDESC
:
1434 case SYMBOL_SMALL_GOTTPREL
:
1435 case SYMBOL_SMALL_GOT
:
1436 case SYMBOL_TINY_GOT
:
1437 if (offset
!= const0_rtx
)
1439 gcc_assert(can_create_pseudo_p ());
1440 base
= aarch64_force_temporary (mode
, dest
, base
);
1441 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1442 aarch64_emit_move (dest
, base
);
1447 case SYMBOL_SMALL_TPREL
:
1448 case SYMBOL_SMALL_ABSOLUTE
:
1449 case SYMBOL_TINY_ABSOLUTE
:
1450 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1458 if (!CONST_INT_P (imm
))
1460 if (GET_CODE (imm
) == HIGH
)
1461 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1464 rtx mem
= force_const_mem (mode
, imm
);
1466 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1472 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1476 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1477 tree exp ATTRIBUTE_UNUSED
)
1479 /* Currently, always true. */
1483 /* Implement TARGET_PASS_BY_REFERENCE. */
1486 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1489 bool named ATTRIBUTE_UNUSED
)
1492 machine_mode dummymode
;
1495 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1496 size
= (mode
== BLKmode
&& type
)
1497 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1499 /* Aggregates are passed by reference based on their size. */
1500 if (type
&& AGGREGATE_TYPE_P (type
))
1502 size
= int_size_in_bytes (type
);
1505 /* Variable sized arguments are always returned by reference. */
1509 /* Can this be a candidate to be passed in fp/simd register(s)? */
1510 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1515 /* Arguments which are variable sized or larger than 2 registers are
1516 passed by reference unless they are a homogenous floating point
1518 return size
> 2 * UNITS_PER_WORD
;
1521 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1523 aarch64_return_in_msb (const_tree valtype
)
1525 machine_mode dummy_mode
;
1528 /* Never happens in little-endian mode. */
1529 if (!BYTES_BIG_ENDIAN
)
1532 /* Only composite types smaller than or equal to 16 bytes can
1533 be potentially returned in registers. */
1534 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1535 || int_size_in_bytes (valtype
) <= 0
1536 || int_size_in_bytes (valtype
) > 16)
1539 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1540 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1541 is always passed/returned in the least significant bits of fp/simd
1543 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1544 &dummy_mode
, &dummy_int
, NULL
))
1550 /* Implement TARGET_FUNCTION_VALUE.
1551 Define how to find the value returned by a function. */
1554 aarch64_function_value (const_tree type
, const_tree func
,
1555 bool outgoing ATTRIBUTE_UNUSED
)
1560 machine_mode ag_mode
;
1562 mode
= TYPE_MODE (type
);
1563 if (INTEGRAL_TYPE_P (type
))
1564 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1566 if (aarch64_return_in_msb (type
))
1568 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1570 if (size
% UNITS_PER_WORD
!= 0)
1572 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1573 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1577 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1578 &ag_mode
, &count
, NULL
))
1580 if (!aarch64_composite_type_p (type
, mode
))
1582 gcc_assert (count
== 1 && mode
== ag_mode
);
1583 return gen_rtx_REG (mode
, V0_REGNUM
);
1590 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1591 for (i
= 0; i
< count
; i
++)
1593 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1594 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1595 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1596 XVECEXP (par
, 0, i
) = tmp
;
1602 return gen_rtx_REG (mode
, R0_REGNUM
);
1605 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1606 Return true if REGNO is the number of a hard register in which the values
1607 of called function may come back. */
1610 aarch64_function_value_regno_p (const unsigned int regno
)
1612 /* Maximum of 16 bytes can be returned in the general registers. Examples
1613 of 16-byte return values are: 128-bit integers and 16-byte small
1614 structures (excluding homogeneous floating-point aggregates). */
1615 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1618 /* Up to four fp/simd registers can return a function value, e.g. a
1619 homogeneous floating-point aggregate having four members. */
1620 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1621 return !TARGET_GENERAL_REGS_ONLY
;
1626 /* Implement TARGET_RETURN_IN_MEMORY.
1628 If the type T of the result of a function is such that
1630 would require that arg be passed as a value in a register (or set of
1631 registers) according to the parameter passing rules, then the result
1632 is returned in the same registers as would be used for such an
1636 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1639 machine_mode ag_mode
;
1642 if (!AGGREGATE_TYPE_P (type
)
1643 && TREE_CODE (type
) != COMPLEX_TYPE
1644 && TREE_CODE (type
) != VECTOR_TYPE
)
1645 /* Simple scalar types always returned in registers. */
1648 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1655 /* Types larger than 2 registers returned in memory. */
1656 size
= int_size_in_bytes (type
);
1657 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1661 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1662 const_tree type
, int *nregs
)
1664 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1665 return aarch64_vfp_is_call_or_return_candidate (mode
,
1667 &pcum
->aapcs_vfp_rmode
,
1672 /* Given MODE and TYPE of a function argument, return the alignment in
1673 bits. The idea is to suppress any stronger alignment requested by
1674 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1675 This is a helper function for local use only. */
1678 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1680 unsigned int alignment
;
1684 if (!integer_zerop (TYPE_SIZE (type
)))
1686 if (TYPE_MODE (type
) == mode
)
1687 alignment
= TYPE_ALIGN (type
);
1689 alignment
= GET_MODE_ALIGNMENT (mode
);
1695 alignment
= GET_MODE_ALIGNMENT (mode
);
1700 /* Layout a function argument according to the AAPCS64 rules. The rule
1701 numbers refer to the rule numbers in the AAPCS64. */
1704 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1706 bool named ATTRIBUTE_UNUSED
)
1708 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1709 int ncrn
, nvrn
, nregs
;
1710 bool allocate_ncrn
, allocate_nvrn
;
1713 /* We need to do this once per argument. */
1714 if (pcum
->aapcs_arg_processed
)
1717 pcum
->aapcs_arg_processed
= true;
1719 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1721 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1724 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1725 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1730 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1731 The following code thus handles passing by SIMD/FP registers first. */
1733 nvrn
= pcum
->aapcs_nvrn
;
1735 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1736 and homogenous short-vector aggregates (HVA). */
1739 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1741 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1742 if (!aarch64_composite_type_p (type
, mode
))
1744 gcc_assert (nregs
== 1);
1745 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1751 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1752 for (i
= 0; i
< nregs
; i
++)
1754 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1755 V0_REGNUM
+ nvrn
+ i
);
1756 tmp
= gen_rtx_EXPR_LIST
1758 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1759 XVECEXP (par
, 0, i
) = tmp
;
1761 pcum
->aapcs_reg
= par
;
1767 /* C.3 NSRN is set to 8. */
1768 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1773 ncrn
= pcum
->aapcs_ncrn
;
1774 nregs
= size
/ UNITS_PER_WORD
;
1776 /* C6 - C9. though the sign and zero extension semantics are
1777 handled elsewhere. This is the case where the argument fits
1778 entirely general registers. */
1779 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1781 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1783 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1785 /* C.8 if the argument has an alignment of 16 then the NGRN is
1786 rounded up to the next even number. */
1787 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1790 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1792 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1793 A reg is still generated for it, but the caller should be smart
1794 enough not to use it. */
1795 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1797 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1804 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1805 for (i
= 0; i
< nregs
; i
++)
1807 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1808 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1809 GEN_INT (i
* UNITS_PER_WORD
));
1810 XVECEXP (par
, 0, i
) = tmp
;
1812 pcum
->aapcs_reg
= par
;
1815 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1820 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1822 /* The argument is passed on stack; record the needed number of words for
1823 this argument and align the total size if necessary. */
1825 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1826 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1827 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1828 16 / UNITS_PER_WORD
);
1832 /* Implement TARGET_FUNCTION_ARG. */
1835 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1836 const_tree type
, bool named
)
1838 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1839 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1841 if (mode
== VOIDmode
)
1844 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1845 return pcum
->aapcs_reg
;
1849 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1850 const_tree fntype ATTRIBUTE_UNUSED
,
1851 rtx libname ATTRIBUTE_UNUSED
,
1852 const_tree fndecl ATTRIBUTE_UNUSED
,
1853 unsigned n_named ATTRIBUTE_UNUSED
)
1855 pcum
->aapcs_ncrn
= 0;
1856 pcum
->aapcs_nvrn
= 0;
1857 pcum
->aapcs_nextncrn
= 0;
1858 pcum
->aapcs_nextnvrn
= 0;
1859 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1860 pcum
->aapcs_reg
= NULL_RTX
;
1861 pcum
->aapcs_arg_processed
= false;
1862 pcum
->aapcs_stack_words
= 0;
1863 pcum
->aapcs_stack_size
= 0;
1869 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1874 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1875 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1877 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1878 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1879 != (pcum
->aapcs_stack_words
!= 0));
1880 pcum
->aapcs_arg_processed
= false;
1881 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1882 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1883 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1884 pcum
->aapcs_stack_words
= 0;
1885 pcum
->aapcs_reg
= NULL_RTX
;
1890 aarch64_function_arg_regno_p (unsigned regno
)
1892 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1893 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1896 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1897 PARM_BOUNDARY bits of alignment, but will be given anything up
1898 to STACK_BOUNDARY bits if the type requires it. This makes sure
1899 that both before and after the layout of each argument, the Next
1900 Stacked Argument Address (NSAA) will have a minimum alignment of
1904 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
1906 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1908 if (alignment
< PARM_BOUNDARY
)
1909 alignment
= PARM_BOUNDARY
;
1910 if (alignment
> STACK_BOUNDARY
)
1911 alignment
= STACK_BOUNDARY
;
1915 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1917 Return true if an argument passed on the stack should be padded upwards,
1918 i.e. if the least-significant byte of the stack slot has useful data.
1920 Small aggregate types are placed in the lowest memory address.
1922 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1925 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
1927 /* On little-endian targets, the least significant byte of every stack
1928 argument is passed at the lowest byte address of the stack slot. */
1929 if (!BYTES_BIG_ENDIAN
)
1932 /* Otherwise, integral, floating-point and pointer types are padded downward:
1933 the least significant byte of a stack argument is passed at the highest
1934 byte address of the stack slot. */
1936 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1937 || POINTER_TYPE_P (type
))
1938 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1941 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1945 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1947 It specifies padding for the last (may also be the only)
1948 element of a block move between registers and memory. If
1949 assuming the block is in the memory, padding upward means that
1950 the last element is padded after its highest significant byte,
1951 while in downward padding, the last element is padded at the
1952 its least significant byte side.
1954 Small aggregates and small complex types are always padded
1957 We don't need to worry about homogeneous floating-point or
1958 short-vector aggregates; their move is not affected by the
1959 padding direction determined here. Regardless of endianness,
1960 each element of such an aggregate is put in the least
1961 significant bits of a fp/simd register.
1963 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1964 register has useful data, and return the opposite if the most
1965 significant byte does. */
1968 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
1969 bool first ATTRIBUTE_UNUSED
)
1972 /* Small composite types are always padded upward. */
1973 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1975 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1976 : GET_MODE_SIZE (mode
));
1977 if (size
< 2 * UNITS_PER_WORD
)
1981 /* Otherwise, use the default padding. */
1982 return !BYTES_BIG_ENDIAN
;
1986 aarch64_libgcc_cmp_return_mode (void)
1992 aarch64_frame_pointer_required (void)
1994 /* In aarch64_override_options_after_change
1995 flag_omit_leaf_frame_pointer turns off the frame pointer by
1996 default. Turn it back on now if we've not got a leaf
1998 if (flag_omit_leaf_frame_pointer
1999 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2005 /* Mark the registers that need to be saved by the callee and calculate
2006 the size of the callee-saved registers area and frame record (both FP
2007 and LR may be omitted). */
2009 aarch64_layout_frame (void)
2011 HOST_WIDE_INT offset
= 0;
2014 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2017 #define SLOT_NOT_REQUIRED (-2)
2018 #define SLOT_REQUIRED (-1)
2020 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2021 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2023 /* First mark all the registers that really need to be saved... */
2024 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2025 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2027 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2028 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2030 /* ... that includes the eh data registers (if needed)... */
2031 if (crtl
->calls_eh_return
)
2032 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2033 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2036 /* ... and any callee saved register that dataflow says is live. */
2037 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2038 if (df_regs_ever_live_p (regno
)
2039 && (regno
== R30_REGNUM
2040 || !call_used_regs
[regno
]))
2041 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2043 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2044 if (df_regs_ever_live_p (regno
)
2045 && !call_used_regs
[regno
])
2046 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2048 if (frame_pointer_needed
)
2050 /* FP and LR are placed in the linkage record. */
2051 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2052 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2053 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2054 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2055 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2056 offset
+= 2 * UNITS_PER_WORD
;
2059 /* Now assign stack slots for them. */
2060 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2061 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2063 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2064 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2065 cfun
->machine
->frame
.wb_candidate1
= regno
;
2066 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2067 cfun
->machine
->frame
.wb_candidate2
= regno
;
2068 offset
+= UNITS_PER_WORD
;
2071 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2072 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2074 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2075 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2076 cfun
->machine
->frame
.wb_candidate1
= regno
;
2077 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2078 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2079 cfun
->machine
->frame
.wb_candidate2
= regno
;
2080 offset
+= UNITS_PER_WORD
;
2083 cfun
->machine
->frame
.padding0
=
2084 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2085 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2087 cfun
->machine
->frame
.saved_regs_size
= offset
;
2089 cfun
->machine
->frame
.hard_fp_offset
2090 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2092 + cfun
->machine
->frame
.saved_regs_size
,
2093 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2095 cfun
->machine
->frame
.frame_size
2096 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2097 + crtl
->outgoing_args_size
,
2098 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2100 cfun
->machine
->frame
.laid_out
= true;
2104 aarch64_register_saved_on_entry (int regno
)
2106 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2110 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2112 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2118 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2119 HOST_WIDE_INT adjustment
)
2121 rtx base_rtx
= stack_pointer_rtx
;
2124 reg
= gen_rtx_REG (mode
, regno
);
2125 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2126 plus_constant (Pmode
, base_rtx
, -adjustment
));
2127 mem
= gen_rtx_MEM (mode
, mem
);
2129 insn
= emit_move_insn (mem
, reg
);
2130 RTX_FRAME_RELATED_P (insn
) = 1;
2134 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2135 HOST_WIDE_INT adjustment
)
2140 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2141 GEN_INT (-adjustment
),
2142 GEN_INT (UNITS_PER_WORD
- adjustment
));
2144 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2145 GEN_INT (-adjustment
),
2146 GEN_INT (UNITS_PER_WORD
- adjustment
));
2153 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2154 unsigned regno2
, HOST_WIDE_INT adjustment
)
2157 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2158 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2160 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2162 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2163 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2164 RTX_FRAME_RELATED_P (insn
) = 1;
2168 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2169 HOST_WIDE_INT adjustment
)
2174 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2175 GEN_INT (UNITS_PER_WORD
));
2177 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2178 GEN_INT (UNITS_PER_WORD
));
2185 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2191 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2194 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2202 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2208 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2211 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2220 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2221 unsigned start
, unsigned limit
, bool skip_wb
)
2224 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2225 ? gen_frame_mem
: gen_rtx_MEM
);
2229 for (regno
= aarch64_next_callee_save (start
, limit
);
2231 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2234 HOST_WIDE_INT offset
;
2237 && (regno
== cfun
->machine
->frame
.wb_candidate1
2238 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2241 reg
= gen_rtx_REG (mode
, regno
);
2242 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2243 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2246 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2249 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2250 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2253 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2256 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2257 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2259 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2262 /* The first part of a frame-related parallel insn is
2263 always assumed to be relevant to the frame
2264 calculations; subsequent parts, are only
2265 frame-related if explicitly marked. */
2266 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2270 insn
= emit_move_insn (mem
, reg
);
2272 RTX_FRAME_RELATED_P (insn
) = 1;
2277 aarch64_restore_callee_saves (machine_mode mode
,
2278 HOST_WIDE_INT start_offset
, unsigned start
,
2279 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2281 rtx base_rtx
= stack_pointer_rtx
;
2282 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2283 ? gen_frame_mem
: gen_rtx_MEM
);
2286 HOST_WIDE_INT offset
;
2288 for (regno
= aarch64_next_callee_save (start
, limit
);
2290 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2295 && (regno
== cfun
->machine
->frame
.wb_candidate1
2296 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2299 reg
= gen_rtx_REG (mode
, regno
);
2300 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2301 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2303 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2306 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2307 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2309 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2312 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2313 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2314 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2316 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2320 emit_move_insn (reg
, mem
);
2321 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2325 /* AArch64 stack frames generated by this compiler look like:
2327 +-------------------------------+
2329 | incoming stack arguments |
2331 +-------------------------------+
2332 | | <-- incoming stack pointer (aligned)
2333 | callee-allocated save area |
2334 | for register varargs |
2336 +-------------------------------+
2337 | local variables | <-- frame_pointer_rtx
2339 +-------------------------------+
2341 +-------------------------------+ |
2342 | callee-saved registers | | frame.saved_regs_size
2343 +-------------------------------+ |
2345 +-------------------------------+ |
2346 | FP' | / <- hard_frame_pointer_rtx (aligned)
2347 +-------------------------------+
2348 | dynamic allocation |
2349 +-------------------------------+
2351 +-------------------------------+
2352 | outgoing stack arguments | <-- arg_pointer
2354 +-------------------------------+
2355 | | <-- stack_pointer_rtx (aligned)
2357 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2358 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2361 /* Generate the prologue instructions for entry into a function.
2362 Establish the stack frame by decreasing the stack pointer with a
2363 properly calculated size and, if necessary, create a frame record
2364 filled with the values of LR and previous frame pointer. The
2365 current FP is also set up if it is in use. */
2368 aarch64_expand_prologue (void)
2370 /* sub sp, sp, #<frame_size>
2371 stp {fp, lr}, [sp, #<frame_size> - 16]
2372 add fp, sp, #<frame_size> - hardfp_offset
2373 stp {cs_reg}, [fp, #-16] etc.
2375 sub sp, sp, <final_adjustment_if_any>
2377 HOST_WIDE_INT frame_size
, offset
;
2378 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2379 HOST_WIDE_INT hard_fp_offset
;
2382 aarch64_layout_frame ();
2384 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2385 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2386 fp_offset
= frame_size
- hard_fp_offset
;
2388 if (flag_stack_usage_info
)
2389 current_function_static_stack_size
= frame_size
;
2391 /* Store pairs and load pairs have a range only -512 to 504. */
2394 /* When the frame has a large size, an initial decrease is done on
2395 the stack pointer to jump over the callee-allocated save area for
2396 register varargs, the local variable area and/or the callee-saved
2397 register area. This will allow the pre-index write-back
2398 store pair instructions to be used for setting up the stack frame
2400 offset
= hard_fp_offset
;
2402 offset
= cfun
->machine
->frame
.saved_regs_size
;
2404 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2407 if (frame_size
>= 0x1000000)
2409 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2410 emit_move_insn (op0
, GEN_INT (-frame_size
));
2411 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2413 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2414 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
2415 plus_constant (Pmode
, stack_pointer_rtx
,
2417 RTX_FRAME_RELATED_P (insn
) = 1;
2419 else if (frame_size
> 0)
2421 int hi_ofs
= frame_size
& 0xfff000;
2422 int lo_ofs
= frame_size
& 0x000fff;
2426 insn
= emit_insn (gen_add2_insn
2427 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2428 RTX_FRAME_RELATED_P (insn
) = 1;
2432 insn
= emit_insn (gen_add2_insn
2433 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2434 RTX_FRAME_RELATED_P (insn
) = 1;
2443 bool skip_wb
= false;
2445 if (frame_pointer_needed
)
2451 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2452 GEN_INT (-offset
)));
2453 RTX_FRAME_RELATED_P (insn
) = 1;
2455 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2459 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2461 /* Set up frame pointer to point to the location of the
2462 previous frame pointer on the stack. */
2463 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2465 GEN_INT (fp_offset
)));
2466 RTX_FRAME_RELATED_P (insn
) = 1;
2467 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2471 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2472 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2475 || reg1
== FIRST_PSEUDO_REGISTER
2476 || (reg2
== FIRST_PSEUDO_REGISTER
2479 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2480 GEN_INT (-offset
)));
2481 RTX_FRAME_RELATED_P (insn
) = 1;
2485 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2489 if (reg2
== FIRST_PSEUDO_REGISTER
)
2490 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2492 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2496 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2498 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2502 /* when offset >= 512,
2503 sub sp, sp, #<outgoing_args_size> */
2504 if (frame_size
> -1)
2506 if (crtl
->outgoing_args_size
> 0)
2508 insn
= emit_insn (gen_add2_insn
2510 GEN_INT (- crtl
->outgoing_args_size
)));
2511 RTX_FRAME_RELATED_P (insn
) = 1;
2516 /* Return TRUE if we can use a simple_return insn.
2518 This function checks whether the callee saved stack is empty, which
2519 means no restore actions are need. The pro_and_epilogue will use
2520 this to check whether shrink-wrapping opt is feasible. */
2523 aarch64_use_return_insn_p (void)
2525 if (!reload_completed
)
2531 aarch64_layout_frame ();
2533 return cfun
->machine
->frame
.frame_size
== 0;
2536 /* Generate the epilogue instructions for returning from a function. */
2538 aarch64_expand_epilogue (bool for_sibcall
)
2540 HOST_WIDE_INT frame_size
, offset
;
2541 HOST_WIDE_INT fp_offset
;
2542 HOST_WIDE_INT hard_fp_offset
;
2544 /* We need to add memory barrier to prevent read from deallocated stack. */
2545 bool need_barrier_p
= (get_frame_size () != 0
2546 || cfun
->machine
->frame
.saved_varargs_size
);
2548 aarch64_layout_frame ();
2550 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2551 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2552 fp_offset
= frame_size
- hard_fp_offset
;
2554 /* Store pairs and load pairs have a range only -512 to 504. */
2557 offset
= hard_fp_offset
;
2559 offset
= cfun
->machine
->frame
.saved_regs_size
;
2561 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2563 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2565 insn
= emit_insn (gen_add2_insn
2567 GEN_INT (crtl
->outgoing_args_size
)));
2568 RTX_FRAME_RELATED_P (insn
) = 1;
2574 /* If there were outgoing arguments or we've done dynamic stack
2575 allocation, then restore the stack pointer from the frame
2576 pointer. This is at most one insn and more efficient than using
2577 GCC's internal mechanism. */
2578 if (frame_pointer_needed
2579 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2581 if (cfun
->calls_alloca
)
2582 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2584 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2585 hard_frame_pointer_rtx
,
2587 offset
= offset
- fp_offset
;
2592 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2593 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2594 bool skip_wb
= true;
2597 if (frame_pointer_needed
)
2600 || reg1
== FIRST_PSEUDO_REGISTER
2601 || (reg2
== FIRST_PSEUDO_REGISTER
2605 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2607 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2611 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2615 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2616 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2618 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2619 if (reg2
== FIRST_PSEUDO_REGISTER
)
2621 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2622 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2623 mem
= gen_rtx_MEM (mode1
, mem
);
2624 insn
= emit_move_insn (rreg1
, mem
);
2628 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2630 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2631 insn
= emit_insn (aarch64_gen_loadwb_pair
2632 (mode1
, stack_pointer_rtx
, rreg1
,
2638 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2642 /* Reset the CFA to be SP + FRAME_SIZE. */
2643 rtx new_cfa
= stack_pointer_rtx
;
2645 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2646 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2647 REG_NOTES (insn
) = cfi_ops
;
2648 RTX_FRAME_RELATED_P (insn
) = 1;
2654 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2656 if (frame_size
>= 0x1000000)
2658 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2659 emit_move_insn (op0
, GEN_INT (frame_size
));
2660 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2664 int hi_ofs
= frame_size
& 0xfff000;
2665 int lo_ofs
= frame_size
& 0x000fff;
2667 if (hi_ofs
&& lo_ofs
)
2669 insn
= emit_insn (gen_add2_insn
2670 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2671 RTX_FRAME_RELATED_P (insn
) = 1;
2672 frame_size
= lo_ofs
;
2674 insn
= emit_insn (gen_add2_insn
2675 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2678 /* Reset the CFA to be SP + 0. */
2679 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2680 RTX_FRAME_RELATED_P (insn
) = 1;
2683 /* Stack adjustment for exception handler. */
2684 if (crtl
->calls_eh_return
)
2686 /* We need to unwind the stack by the offset computed by
2687 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2688 to be SP; letting the CFA move during this adjustment
2689 is just as correct as retaining the CFA from the body
2690 of the function. Therefore, do nothing special. */
2691 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2694 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2696 emit_jump_insn (ret_rtx
);
2699 /* Return the place to copy the exception unwinding return address to.
2700 This will probably be a stack slot, but could (in theory be the
2701 return register). */
2703 aarch64_final_eh_return_addr (void)
2705 HOST_WIDE_INT fp_offset
;
2707 aarch64_layout_frame ();
2709 fp_offset
= cfun
->machine
->frame
.frame_size
2710 - cfun
->machine
->frame
.hard_fp_offset
;
2712 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2713 return gen_rtx_REG (DImode
, LR_REGNUM
);
2715 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2716 result in a store to save LR introduced by builtin_eh_return () being
2717 incorrectly deleted because the alias is not detected.
2718 So in the calculation of the address to copy the exception unwinding
2719 return address to, we note 2 cases.
2720 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2721 we return a SP-relative location since all the addresses are SP-relative
2722 in this case. This prevents the store from being optimized away.
2723 If the fp_offset is not 0, then the addresses will be FP-relative and
2724 therefore we return a FP-relative location. */
2726 if (frame_pointer_needed
)
2729 return gen_frame_mem (DImode
,
2730 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2732 return gen_frame_mem (DImode
,
2733 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2736 /* If FP is not needed, we calculate the location of LR, which would be
2737 at the top of the saved registers block. */
2739 return gen_frame_mem (DImode
,
2740 plus_constant (Pmode
,
2743 + cfun
->machine
->frame
.saved_regs_size
2744 - 2 * UNITS_PER_WORD
));
2747 /* Possibly output code to build up a constant in a register. For
2748 the benefit of the costs infrastructure, returns the number of
2749 instructions which would be emitted. GENERATE inhibits or
2750 enables code generation. */
2753 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2757 if (aarch64_bitmask_imm (val
, DImode
))
2760 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2768 HOST_WIDE_INT valp
= val
>> 16;
2772 for (i
= 16; i
< 64; i
+= 16)
2774 valm
= (valp
& 0xffff);
2785 /* zcount contains the number of additional MOVK instructions
2786 required if the constant is built up with an initial MOVZ instruction,
2787 while ncount is the number of MOVK instructions required if starting
2788 with a MOVN instruction. Choose the sequence that yields the fewest
2789 number of instructions, preferring MOVZ instructions when they are both
2791 if (ncount
< zcount
)
2794 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2795 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2802 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2803 GEN_INT (val
& 0xffff));
2810 for (i
= 16; i
< 64; i
+= 16)
2812 if ((val
& 0xffff) != tval
)
2815 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2817 GEN_INT (val
& 0xffff)));
2827 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2829 HOST_WIDE_INT mdelta
= delta
;
2830 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2831 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2836 if (mdelta
>= 4096 * 4096)
2838 (void) aarch64_build_constant (scratchreg
, delta
, true);
2839 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2841 else if (mdelta
> 0)
2845 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2846 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2848 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2849 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2851 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2852 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2854 if (mdelta
% 4096 != 0)
2856 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2857 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2858 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2863 /* Output code to add DELTA to the first argument, and then jump
2864 to FUNCTION. Used for C++ multiple inheritance. */
2866 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2867 HOST_WIDE_INT delta
,
2868 HOST_WIDE_INT vcall_offset
,
2871 /* The this pointer is always in x0. Note that this differs from
2872 Arm where the this pointer maybe bumped to r1 if r0 is required
2873 to return a pointer to an aggregate. On AArch64 a result value
2874 pointer will be in x8. */
2875 int this_regno
= R0_REGNUM
;
2876 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
2879 reload_completed
= 1;
2880 emit_note (NOTE_INSN_PROLOGUE_END
);
2882 if (vcall_offset
== 0)
2883 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2886 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2888 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2889 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2890 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2895 if (delta
>= -256 && delta
< 256)
2896 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2897 plus_constant (Pmode
, this_rtx
, delta
));
2899 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2902 if (Pmode
== ptr_mode
)
2903 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2905 aarch64_emit_move (temp0
,
2906 gen_rtx_ZERO_EXTEND (Pmode
,
2907 gen_rtx_MEM (ptr_mode
, addr
)));
2909 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2910 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2913 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2914 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2917 if (Pmode
== ptr_mode
)
2918 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2920 aarch64_emit_move (temp1
,
2921 gen_rtx_SIGN_EXTEND (Pmode
,
2922 gen_rtx_MEM (ptr_mode
, addr
)));
2924 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2927 /* Generate a tail call to the target function. */
2928 if (!TREE_USED (function
))
2930 assemble_external (function
);
2931 TREE_USED (function
) = 1;
2933 funexp
= XEXP (DECL_RTL (function
), 0);
2934 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2935 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2936 SIBLING_CALL_P (insn
) = 1;
2938 insn
= get_insns ();
2939 shorten_branches (insn
);
2940 final_start_function (insn
, file
, 1);
2941 final (insn
, file
, 1);
2942 final_end_function ();
2944 /* Stop pretending to be a post-reload pass. */
2945 reload_completed
= 0;
2949 aarch64_tls_referenced_p (rtx x
)
2951 if (!TARGET_HAVE_TLS
)
2953 subrtx_iterator::array_type array
;
2954 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
2956 const_rtx x
= *iter
;
2957 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
2959 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2960 TLS offsets, not real symbol references. */
2961 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
2962 iter
.skip_subrtxes ();
2969 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2971 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2972 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2983 aarch64_build_bitmask_table (void)
2985 unsigned HOST_WIDE_INT mask
, imm
;
2986 unsigned int log_e
, e
, s
, r
;
2987 unsigned int nimms
= 0;
2989 for (log_e
= 1; log_e
<= 6; log_e
++)
2993 mask
= ~(HOST_WIDE_INT
) 0;
2995 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2996 for (s
= 1; s
< e
; s
++)
2998 for (r
= 0; r
< e
; r
++)
3000 /* set s consecutive bits to 1 (s < 64) */
3001 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
3002 /* rotate right by r */
3004 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
3005 /* replicate the constant depending on SIMD size */
3007 case 1: imm
|= (imm
<< 2);
3008 case 2: imm
|= (imm
<< 4);
3009 case 3: imm
|= (imm
<< 8);
3010 case 4: imm
|= (imm
<< 16);
3011 case 5: imm
|= (imm
<< 32);
3017 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
3018 aarch64_bitmasks
[nimms
++] = imm
;
3023 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
3024 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
3025 aarch64_bitmasks_cmp
);
3029 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3030 a left shift of 0 or 12 bits. */
3032 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3034 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3035 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3040 /* Return true if val is an immediate that can be loaded into a
3041 register by a MOVZ instruction. */
3043 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3045 if (GET_MODE_SIZE (mode
) > 4)
3047 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3048 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3053 /* Ignore sign extension. */
3054 val
&= (HOST_WIDE_INT
) 0xffffffff;
3056 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3057 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3061 /* Return true if val is a valid bitmask immediate. */
3063 aarch64_bitmask_imm (HOST_WIDE_INT val
, machine_mode mode
)
3065 if (GET_MODE_SIZE (mode
) < 8)
3067 /* Replicate bit pattern. */
3068 val
&= (HOST_WIDE_INT
) 0xffffffff;
3071 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
3072 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
3076 /* Return true if val is an immediate that can be loaded into a
3077 register in a single instruction. */
3079 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3081 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3083 return aarch64_bitmask_imm (val
, mode
);
3087 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3091 if (GET_CODE (x
) == HIGH
)
3094 split_const (x
, &base
, &offset
);
3095 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3097 if (aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
)
3098 != SYMBOL_FORCE_TO_MEM
)
3101 /* Avoid generating a 64-bit relocation in ILP32; leave
3102 to aarch64_expand_mov_immediate to handle it properly. */
3103 return mode
!= ptr_mode
;
3106 return aarch64_tls_referenced_p (x
);
3109 /* Return true if register REGNO is a valid index register.
3110 STRICT_P is true if REG_OK_STRICT is in effect. */
3113 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3115 if (!HARD_REGISTER_NUM_P (regno
))
3123 regno
= reg_renumber
[regno
];
3125 return GP_REGNUM_P (regno
);
3128 /* Return true if register REGNO is a valid base register for mode MODE.
3129 STRICT_P is true if REG_OK_STRICT is in effect. */
3132 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3134 if (!HARD_REGISTER_NUM_P (regno
))
3142 regno
= reg_renumber
[regno
];
3145 /* The fake registers will be eliminated to either the stack or
3146 hard frame pointer, both of which are usually valid base registers.
3147 Reload deals with the cases where the eliminated form isn't valid. */
3148 return (GP_REGNUM_P (regno
)
3149 || regno
== SP_REGNUM
3150 || regno
== FRAME_POINTER_REGNUM
3151 || regno
== ARG_POINTER_REGNUM
);
3154 /* Return true if X is a valid base register for mode MODE.
3155 STRICT_P is true if REG_OK_STRICT is in effect. */
3158 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3160 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3163 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3166 /* Return true if address offset is a valid index. If it is, fill in INFO
3167 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3170 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3171 machine_mode mode
, bool strict_p
)
3173 enum aarch64_address_type type
;
3178 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3179 && GET_MODE (x
) == Pmode
)
3181 type
= ADDRESS_REG_REG
;
3185 /* (sign_extend:DI (reg:SI)) */
3186 else if ((GET_CODE (x
) == SIGN_EXTEND
3187 || GET_CODE (x
) == ZERO_EXTEND
)
3188 && GET_MODE (x
) == DImode
3189 && GET_MODE (XEXP (x
, 0)) == SImode
)
3191 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3192 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3193 index
= XEXP (x
, 0);
3196 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3197 else if (GET_CODE (x
) == MULT
3198 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3199 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3200 && GET_MODE (XEXP (x
, 0)) == DImode
3201 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3202 && CONST_INT_P (XEXP (x
, 1)))
3204 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3205 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3206 index
= XEXP (XEXP (x
, 0), 0);
3207 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3209 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3210 else if (GET_CODE (x
) == ASHIFT
3211 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3212 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3213 && GET_MODE (XEXP (x
, 0)) == DImode
3214 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3215 && CONST_INT_P (XEXP (x
, 1)))
3217 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3218 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3219 index
= XEXP (XEXP (x
, 0), 0);
3220 shift
= INTVAL (XEXP (x
, 1));
3222 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3223 else if ((GET_CODE (x
) == SIGN_EXTRACT
3224 || GET_CODE (x
) == ZERO_EXTRACT
)
3225 && GET_MODE (x
) == DImode
3226 && GET_CODE (XEXP (x
, 0)) == MULT
3227 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3228 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3230 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3231 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3232 index
= XEXP (XEXP (x
, 0), 0);
3233 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3234 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3235 || INTVAL (XEXP (x
, 2)) != 0)
3238 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3239 (const_int 0xffffffff<<shift)) */
3240 else if (GET_CODE (x
) == AND
3241 && GET_MODE (x
) == DImode
3242 && GET_CODE (XEXP (x
, 0)) == MULT
3243 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3244 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3245 && CONST_INT_P (XEXP (x
, 1)))
3247 type
= ADDRESS_REG_UXTW
;
3248 index
= XEXP (XEXP (x
, 0), 0);
3249 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3250 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3253 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3254 else if ((GET_CODE (x
) == SIGN_EXTRACT
3255 || GET_CODE (x
) == ZERO_EXTRACT
)
3256 && GET_MODE (x
) == DImode
3257 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3258 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3259 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3261 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3262 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3263 index
= XEXP (XEXP (x
, 0), 0);
3264 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3265 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3266 || INTVAL (XEXP (x
, 2)) != 0)
3269 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3270 (const_int 0xffffffff<<shift)) */
3271 else if (GET_CODE (x
) == AND
3272 && GET_MODE (x
) == DImode
3273 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3274 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3275 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3276 && CONST_INT_P (XEXP (x
, 1)))
3278 type
= ADDRESS_REG_UXTW
;
3279 index
= XEXP (XEXP (x
, 0), 0);
3280 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3281 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3284 /* (mult:P (reg:P) (const_int scale)) */
3285 else if (GET_CODE (x
) == MULT
3286 && GET_MODE (x
) == Pmode
3287 && GET_MODE (XEXP (x
, 0)) == Pmode
3288 && CONST_INT_P (XEXP (x
, 1)))
3290 type
= ADDRESS_REG_REG
;
3291 index
= XEXP (x
, 0);
3292 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3294 /* (ashift:P (reg:P) (const_int shift)) */
3295 else if (GET_CODE (x
) == ASHIFT
3296 && GET_MODE (x
) == Pmode
3297 && GET_MODE (XEXP (x
, 0)) == Pmode
3298 && CONST_INT_P (XEXP (x
, 1)))
3300 type
= ADDRESS_REG_REG
;
3301 index
= XEXP (x
, 0);
3302 shift
= INTVAL (XEXP (x
, 1));
3307 if (GET_CODE (index
) == SUBREG
)
3308 index
= SUBREG_REG (index
);
3311 (shift
> 0 && shift
<= 3
3312 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3314 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3317 info
->offset
= index
;
3318 info
->shift
= shift
;
3326 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3328 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3329 && offset
< 64 * GET_MODE_SIZE (mode
)
3330 && offset
% GET_MODE_SIZE (mode
) == 0);
3334 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3335 HOST_WIDE_INT offset
)
3337 return offset
>= -256 && offset
< 256;
3341 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3344 && offset
< 4096 * GET_MODE_SIZE (mode
)
3345 && offset
% GET_MODE_SIZE (mode
) == 0);
3348 /* Return true if X is a valid address for machine mode MODE. If it is,
3349 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3350 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3353 aarch64_classify_address (struct aarch64_address_info
*info
,
3354 rtx x
, machine_mode mode
,
3355 RTX_CODE outer_code
, bool strict_p
)
3357 enum rtx_code code
= GET_CODE (x
);
3359 bool allow_reg_index_p
=
3360 outer_code
!= PARALLEL
&& (GET_MODE_SIZE (mode
) != 16
3361 || aarch64_vector_mode_supported_p (mode
));
3362 /* Don't support anything other than POST_INC or REG addressing for
3364 if (aarch64_vect_struct_mode_p (mode
)
3365 && (code
!= POST_INC
&& code
!= REG
))
3372 info
->type
= ADDRESS_REG_IMM
;
3374 info
->offset
= const0_rtx
;
3375 return aarch64_base_register_rtx_p (x
, strict_p
);
3383 && (op0
== virtual_stack_vars_rtx
3384 || op0
== frame_pointer_rtx
3385 || op0
== arg_pointer_rtx
)
3386 && CONST_INT_P (op1
))
3388 info
->type
= ADDRESS_REG_IMM
;
3395 if (GET_MODE_SIZE (mode
) != 0
3396 && CONST_INT_P (op1
)
3397 && aarch64_base_register_rtx_p (op0
, strict_p
))
3399 HOST_WIDE_INT offset
= INTVAL (op1
);
3401 info
->type
= ADDRESS_REG_IMM
;
3405 /* TImode and TFmode values are allowed in both pairs of X
3406 registers and individual Q registers. The available
3408 X,X: 7-bit signed scaled offset
3409 Q: 9-bit signed offset
3410 We conservatively require an offset representable in either mode.
3412 if (mode
== TImode
|| mode
== TFmode
)
3413 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3414 && offset_9bit_signed_unscaled_p (mode
, offset
));
3416 if (outer_code
== PARALLEL
)
3417 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3418 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3420 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3421 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3424 if (allow_reg_index_p
)
3426 /* Look for base + (scaled/extended) index register. */
3427 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3428 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3433 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3434 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3447 info
->type
= ADDRESS_REG_WB
;
3448 info
->base
= XEXP (x
, 0);
3449 info
->offset
= NULL_RTX
;
3450 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3454 info
->type
= ADDRESS_REG_WB
;
3455 info
->base
= XEXP (x
, 0);
3456 if (GET_CODE (XEXP (x
, 1)) == PLUS
3457 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3458 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3459 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3461 HOST_WIDE_INT offset
;
3462 info
->offset
= XEXP (XEXP (x
, 1), 1);
3463 offset
= INTVAL (info
->offset
);
3465 /* TImode and TFmode values are allowed in both pairs of X
3466 registers and individual Q registers. The available
3468 X,X: 7-bit signed scaled offset
3469 Q: 9-bit signed offset
3470 We conservatively require an offset representable in either mode.
3472 if (mode
== TImode
|| mode
== TFmode
)
3473 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3474 && offset_9bit_signed_unscaled_p (mode
, offset
));
3476 if (outer_code
== PARALLEL
)
3477 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3478 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3480 return offset_9bit_signed_unscaled_p (mode
, offset
);
3487 /* load literal: pc-relative constant pool entry. Only supported
3488 for SI mode or larger. */
3489 info
->type
= ADDRESS_SYMBOLIC
;
3490 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3494 split_const (x
, &sym
, &addend
);
3495 return (GET_CODE (sym
) == LABEL_REF
3496 || (GET_CODE (sym
) == SYMBOL_REF
3497 && CONSTANT_POOL_ADDRESS_P (sym
)));
3502 info
->type
= ADDRESS_LO_SUM
;
3503 info
->base
= XEXP (x
, 0);
3504 info
->offset
= XEXP (x
, 1);
3505 if (allow_reg_index_p
3506 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3509 split_const (info
->offset
, &sym
, &offs
);
3510 if (GET_CODE (sym
) == SYMBOL_REF
3511 && (aarch64_classify_symbol (sym
, offs
, SYMBOL_CONTEXT_MEM
)
3512 == SYMBOL_SMALL_ABSOLUTE
))
3514 /* The symbol and offset must be aligned to the access size. */
3516 unsigned int ref_size
;
3518 if (CONSTANT_POOL_ADDRESS_P (sym
))
3519 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3520 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3522 tree exp
= SYMBOL_REF_DECL (sym
);
3523 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3524 align
= CONSTANT_ALIGNMENT (exp
, align
);
3526 else if (SYMBOL_REF_DECL (sym
))
3527 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3528 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3529 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3530 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3532 align
= BITS_PER_UNIT
;
3534 ref_size
= GET_MODE_SIZE (mode
);
3536 ref_size
= GET_MODE_SIZE (DImode
);
3538 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3539 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3550 aarch64_symbolic_address_p (rtx x
)
3554 split_const (x
, &x
, &offset
);
3555 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3558 /* Classify the base of symbolic expression X, given that X appears in
3561 enum aarch64_symbol_type
3562 aarch64_classify_symbolic_expression (rtx x
,
3563 enum aarch64_symbol_context context
)
3567 split_const (x
, &x
, &offset
);
3568 return aarch64_classify_symbol (x
, offset
, context
);
3572 /* Return TRUE if X is a legitimate address for accessing memory in
3575 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3577 struct aarch64_address_info addr
;
3579 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3582 /* Return TRUE if X is a legitimate address for accessing memory in
3583 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3586 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3587 RTX_CODE outer_code
, bool strict_p
)
3589 struct aarch64_address_info addr
;
3591 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3594 /* Return TRUE if rtx X is immediate constant 0.0 */
3596 aarch64_float_const_zero_rtx_p (rtx x
)
3600 if (GET_MODE (x
) == VOIDmode
)
3603 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3604 if (REAL_VALUE_MINUS_ZERO (r
))
3605 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3606 return REAL_VALUES_EQUAL (r
, dconst0
);
3609 /* Return the fixed registers used for condition codes. */
3612 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3615 *p2
= INVALID_REGNUM
;
3619 /* Emit call insn with PAT and do aarch64-specific handling. */
3622 aarch64_emit_call_insn (rtx pat
)
3624 rtx insn
= emit_call_insn (pat
);
3626 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3627 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3628 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3632 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3634 /* All floating point compares return CCFP if it is an equality
3635 comparison, and CCFPE otherwise. */
3636 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3663 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3665 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3666 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3667 || GET_CODE (x
) == NEG
))
3670 /* A compare with a shifted operand. Because of canonicalization,
3671 the comparison will have to be swapped when we emit the assembly
3673 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3674 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3675 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3676 || GET_CODE (x
) == LSHIFTRT
3677 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3680 /* Similarly for a negated operand, but we can only do this for
3682 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3683 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3684 && (code
== EQ
|| code
== NE
)
3685 && GET_CODE (x
) == NEG
)
3688 /* A compare of a mode narrower than SI mode against zero can be done
3689 by extending the value in the comparison. */
3690 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3692 /* Only use sign-extension if we really need it. */
3693 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3694 ? CC_SESWPmode
: CC_ZESWPmode
);
3696 /* For everything else, return CCmode. */
3701 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3704 aarch64_get_condition_code (rtx x
)
3706 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3707 enum rtx_code comp_code
= GET_CODE (x
);
3709 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3710 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3711 return aarch64_get_condition_code_1 (mode
, comp_code
);
3715 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3717 int ne
= -1, eq
= -1;
3724 case GE
: return AARCH64_GE
;
3725 case GT
: return AARCH64_GT
;
3726 case LE
: return AARCH64_LS
;
3727 case LT
: return AARCH64_MI
;
3728 case NE
: return AARCH64_NE
;
3729 case EQ
: return AARCH64_EQ
;
3730 case ORDERED
: return AARCH64_VC
;
3731 case UNORDERED
: return AARCH64_VS
;
3732 case UNLT
: return AARCH64_LT
;
3733 case UNLE
: return AARCH64_LE
;
3734 case UNGT
: return AARCH64_HI
;
3735 case UNGE
: return AARCH64_PL
;
3793 case NE
: return AARCH64_NE
;
3794 case EQ
: return AARCH64_EQ
;
3795 case GE
: return AARCH64_GE
;
3796 case GT
: return AARCH64_GT
;
3797 case LE
: return AARCH64_LE
;
3798 case LT
: return AARCH64_LT
;
3799 case GEU
: return AARCH64_CS
;
3800 case GTU
: return AARCH64_HI
;
3801 case LEU
: return AARCH64_LS
;
3802 case LTU
: return AARCH64_CC
;
3812 case NE
: return AARCH64_NE
;
3813 case EQ
: return AARCH64_EQ
;
3814 case GE
: return AARCH64_LE
;
3815 case GT
: return AARCH64_LT
;
3816 case LE
: return AARCH64_GE
;
3817 case LT
: return AARCH64_GT
;
3818 case GEU
: return AARCH64_LS
;
3819 case GTU
: return AARCH64_CC
;
3820 case LEU
: return AARCH64_CS
;
3821 case LTU
: return AARCH64_HI
;
3829 case NE
: return AARCH64_NE
;
3830 case EQ
: return AARCH64_EQ
;
3831 case GE
: return AARCH64_PL
;
3832 case LT
: return AARCH64_MI
;
3840 case NE
: return AARCH64_NE
;
3841 case EQ
: return AARCH64_EQ
;
3851 if (comp_code
== NE
)
3854 if (comp_code
== EQ
)
3861 aarch64_const_vec_all_same_in_range_p (rtx x
,
3862 HOST_WIDE_INT minval
,
3863 HOST_WIDE_INT maxval
)
3865 HOST_WIDE_INT firstval
;
3868 if (GET_CODE (x
) != CONST_VECTOR
3869 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
3872 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
3873 if (firstval
< minval
|| firstval
> maxval
)
3876 count
= CONST_VECTOR_NUNITS (x
);
3877 for (i
= 1; i
< count
; i
++)
3878 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
3885 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
3887 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
3891 bit_count (unsigned HOST_WIDE_INT value
)
3905 #define AARCH64_CC_V 1
3906 #define AARCH64_CC_C (1 << 1)
3907 #define AARCH64_CC_Z (1 << 2)
3908 #define AARCH64_CC_N (1 << 3)
3910 /* N Z C V flags for ccmp. The first code is for AND op and the other
3911 is for IOR op. Indexed by AARCH64_COND_CODE. */
3912 static const int aarch64_nzcv_codes
[][2] =
3914 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
3915 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
3916 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
3917 {0, AARCH64_CC_C
}, /* CC, C == 0. */
3918 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
3919 {0, AARCH64_CC_N
}, /* PL, N == 0. */
3920 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
3921 {0, AARCH64_CC_V
}, /* VC, V == 0. */
3922 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
3923 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
3924 {0, AARCH64_CC_V
}, /* GE, N == V. */
3925 {AARCH64_CC_V
, 0}, /* LT, N != V. */
3926 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
3927 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
3928 {0, 0}, /* AL, Any. */
3929 {0, 0}, /* NV, Any. */
3933 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
3974 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3978 /* An integer or symbol address without a preceding # sign. */
3980 switch (GET_CODE (x
))
3983 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3987 output_addr_const (f
, x
);
3991 if (GET_CODE (XEXP (x
, 0)) == PLUS
3992 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3994 output_addr_const (f
, x
);
4000 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4005 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4009 if (!CONST_INT_P (x
)
4010 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4012 output_operand_lossage ("invalid operand for '%%%c'", code
);
4028 output_operand_lossage ("invalid operand for '%%%c'", code
);
4038 /* Print N such that 2^N == X. */
4039 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4041 output_operand_lossage ("invalid operand for '%%%c'", code
);
4045 asm_fprintf (f
, "%d", n
);
4050 /* Print the number of non-zero bits in X (a const_int). */
4051 if (!CONST_INT_P (x
))
4053 output_operand_lossage ("invalid operand for '%%%c'", code
);
4057 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
4061 /* Print the higher numbered register of a pair (TImode) of regs. */
4062 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4064 output_operand_lossage ("invalid operand for '%%%c'", code
);
4068 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4074 /* Print a condition (eq, ne, etc). */
4076 /* CONST_TRUE_RTX means always -- that's the default. */
4077 if (x
== const_true_rtx
)
4080 if (!COMPARISON_P (x
))
4082 output_operand_lossage ("invalid operand for '%%%c'", code
);
4086 cond_code
= aarch64_get_condition_code (x
);
4087 gcc_assert (cond_code
>= 0);
4088 fputs (aarch64_condition_codes
[cond_code
], f
);
4095 /* Print the inverse of a condition (eq <-> ne, etc). */
4097 /* CONST_TRUE_RTX means never -- that's the default. */
4098 if (x
== const_true_rtx
)
4104 if (!COMPARISON_P (x
))
4106 output_operand_lossage ("invalid operand for '%%%c'", code
);
4109 cond_code
= aarch64_get_condition_code (x
);
4110 gcc_assert (cond_code
>= 0);
4111 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4121 /* Print a scalar FP/SIMD register name. */
4122 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4124 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4127 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4134 /* Print the first FP/SIMD register name in a list. */
4135 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4137 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4140 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4144 /* Print bottom 16 bits of integer constant in hex. */
4145 if (!CONST_INT_P (x
))
4147 output_operand_lossage ("invalid operand for '%%%c'", code
);
4150 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4155 /* Print a general register name or the zero register (32-bit or
4158 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4160 asm_fprintf (f
, "%czr", code
);
4164 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4166 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4170 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4172 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4179 /* Print a normal operand, if it's a general register, then we
4183 output_operand_lossage ("missing operand");
4187 switch (GET_CODE (x
))
4190 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4194 aarch64_memory_reference_mode
= GET_MODE (x
);
4195 output_address (XEXP (x
, 0));
4200 output_addr_const (asm_out_file
, x
);
4204 asm_fprintf (f
, "%wd", INTVAL (x
));
4208 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4211 aarch64_const_vec_all_same_in_range_p (x
,
4213 HOST_WIDE_INT_MAX
));
4214 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4216 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4225 /* CONST_DOUBLE can represent a double-width integer.
4226 In this case, the mode of x is VOIDmode. */
4227 if (GET_MODE (x
) == VOIDmode
)
4229 else if (aarch64_float_const_zero_rtx_p (x
))
4234 else if (aarch64_float_const_representable_p (x
))
4237 char float_buf
[buf_size
] = {'\0'};
4239 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4240 real_to_decimal_for_mode (float_buf
, &r
,
4243 asm_fprintf (asm_out_file
, "%s", float_buf
);
4247 output_operand_lossage ("invalid constant");
4250 output_operand_lossage ("invalid operand");
4256 if (GET_CODE (x
) == HIGH
)
4259 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4261 case SYMBOL_SMALL_GOT
:
4262 asm_fprintf (asm_out_file
, ":got:");
4265 case SYMBOL_SMALL_TLSGD
:
4266 asm_fprintf (asm_out_file
, ":tlsgd:");
4269 case SYMBOL_SMALL_TLSDESC
:
4270 asm_fprintf (asm_out_file
, ":tlsdesc:");
4273 case SYMBOL_SMALL_GOTTPREL
:
4274 asm_fprintf (asm_out_file
, ":gottprel:");
4277 case SYMBOL_SMALL_TPREL
:
4278 asm_fprintf (asm_out_file
, ":tprel:");
4281 case SYMBOL_TINY_GOT
:
4288 output_addr_const (asm_out_file
, x
);
4292 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4294 case SYMBOL_SMALL_GOT
:
4295 asm_fprintf (asm_out_file
, ":lo12:");
4298 case SYMBOL_SMALL_TLSGD
:
4299 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4302 case SYMBOL_SMALL_TLSDESC
:
4303 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4306 case SYMBOL_SMALL_GOTTPREL
:
4307 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4310 case SYMBOL_SMALL_TPREL
:
4311 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4314 case SYMBOL_TINY_GOT
:
4315 asm_fprintf (asm_out_file
, ":got:");
4321 output_addr_const (asm_out_file
, x
);
4326 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4328 case SYMBOL_SMALL_TPREL
:
4329 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4334 output_addr_const (asm_out_file
, x
);
4342 if (!COMPARISON_P (x
))
4344 output_operand_lossage ("invalid operand for '%%%c'", code
);
4348 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4349 gcc_assert (cond_code
>= 0);
4350 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4359 if (!COMPARISON_P (x
))
4361 output_operand_lossage ("invalid operand for '%%%c'", code
);
4365 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4366 gcc_assert (cond_code
>= 0);
4367 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4372 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4378 aarch64_print_operand_address (FILE *f
, rtx x
)
4380 struct aarch64_address_info addr
;
4382 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4386 case ADDRESS_REG_IMM
:
4387 if (addr
.offset
== const0_rtx
)
4388 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4390 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4391 INTVAL (addr
.offset
));
4394 case ADDRESS_REG_REG
:
4395 if (addr
.shift
== 0)
4396 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4397 reg_names
[REGNO (addr
.offset
)]);
4399 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4400 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4403 case ADDRESS_REG_UXTW
:
4404 if (addr
.shift
== 0)
4405 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4406 REGNO (addr
.offset
) - R0_REGNUM
);
4408 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4409 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4412 case ADDRESS_REG_SXTW
:
4413 if (addr
.shift
== 0)
4414 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4415 REGNO (addr
.offset
) - R0_REGNUM
);
4417 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4418 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4421 case ADDRESS_REG_WB
:
4422 switch (GET_CODE (x
))
4425 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4426 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4429 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4430 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4433 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4434 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4437 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4438 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4441 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4442 INTVAL (addr
.offset
));
4445 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4446 INTVAL (addr
.offset
));
4453 case ADDRESS_LO_SUM
:
4454 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4455 output_addr_const (f
, addr
.offset
);
4456 asm_fprintf (f
, "]");
4459 case ADDRESS_SYMBOLIC
:
4463 output_addr_const (f
, x
);
4467 aarch64_label_mentioned_p (rtx x
)
4472 if (GET_CODE (x
) == LABEL_REF
)
4475 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4476 referencing instruction, but they are constant offsets, not
4478 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4481 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4482 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4488 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4489 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4492 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4499 /* Implement REGNO_REG_CLASS. */
4502 aarch64_regno_regclass (unsigned regno
)
4504 if (GP_REGNUM_P (regno
))
4505 return GENERAL_REGS
;
4507 if (regno
== SP_REGNUM
)
4510 if (regno
== FRAME_POINTER_REGNUM
4511 || regno
== ARG_POINTER_REGNUM
)
4512 return POINTER_REGS
;
4514 if (FP_REGNUM_P (regno
))
4515 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4521 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4523 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4524 where mask is selected by alignment and size of the offset.
4525 We try to pick as large a range for the offset as possible to
4526 maximize the chance of a CSE. However, for aligned addresses
4527 we limit the range to 4k so that structures with different sized
4528 elements are likely to use the same base. */
4530 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4532 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4533 HOST_WIDE_INT base_offset
;
4535 /* Does it look like we'll need a load/store-pair operation? */
4536 if (GET_MODE_SIZE (mode
) > 16
4538 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4539 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4540 /* For offsets aren't a multiple of the access size, the limit is
4542 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4543 base_offset
= (offset
+ 0x100) & ~0x1ff;
4545 base_offset
= offset
& ~0xfff;
4547 if (base_offset
== 0)
4550 offset
-= base_offset
;
4551 rtx base_reg
= gen_reg_rtx (Pmode
);
4552 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4554 emit_move_insn (base_reg
, val
);
4555 x
= plus_constant (Pmode
, base_reg
, offset
);
4561 /* Try a machine-dependent way of reloading an illegitimate address
4562 operand. If we find one, push the reload and return the new rtx. */
4565 aarch64_legitimize_reload_address (rtx
*x_p
,
4567 int opnum
, int type
,
4568 int ind_levels ATTRIBUTE_UNUSED
)
4572 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4573 if (aarch64_vect_struct_mode_p (mode
)
4574 && GET_CODE (x
) == PLUS
4575 && REG_P (XEXP (x
, 0))
4576 && CONST_INT_P (XEXP (x
, 1)))
4580 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4581 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4582 opnum
, (enum reload_type
) type
);
4586 /* We must recognize output that we have already generated ourselves. */
4587 if (GET_CODE (x
) == PLUS
4588 && GET_CODE (XEXP (x
, 0)) == PLUS
4589 && REG_P (XEXP (XEXP (x
, 0), 0))
4590 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4591 && CONST_INT_P (XEXP (x
, 1)))
4593 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4594 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4595 opnum
, (enum reload_type
) type
);
4599 /* We wish to handle large displacements off a base register by splitting
4600 the addend across an add and the mem insn. This can cut the number of
4601 extra insns needed from 3 to 1. It is only useful for load/store of a
4602 single register with 12 bit offset field. */
4603 if (GET_CODE (x
) == PLUS
4604 && REG_P (XEXP (x
, 0))
4605 && CONST_INT_P (XEXP (x
, 1))
4606 && HARD_REGISTER_P (XEXP (x
, 0))
4609 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4611 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4612 HOST_WIDE_INT low
= val
& 0xfff;
4613 HOST_WIDE_INT high
= val
- low
;
4616 machine_mode xmode
= GET_MODE (x
);
4618 /* In ILP32, xmode can be either DImode or SImode. */
4619 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4621 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4622 BLKmode alignment. */
4623 if (GET_MODE_SIZE (mode
) == 0)
4626 offs
= low
% GET_MODE_SIZE (mode
);
4628 /* Align misaligned offset by adjusting high part to compensate. */
4631 if (aarch64_uimm12_shift (high
+ offs
))
4640 offs
= GET_MODE_SIZE (mode
) - offs
;
4642 high
= high
+ (low
& 0x1000) - offs
;
4647 /* Check for overflow. */
4648 if (high
+ low
!= val
)
4651 cst
= GEN_INT (high
);
4652 if (!aarch64_uimm12_shift (high
))
4653 cst
= force_const_mem (xmode
, cst
);
4655 /* Reload high part into base reg, leaving the low part
4656 in the mem instruction.
4657 Note that replacing this gen_rtx_PLUS with plus_constant is
4658 wrong in this case because we rely on the
4659 (plus (plus reg c1) c2) structure being preserved so that
4660 XEXP (*p, 0) in push_reload below uses the correct term. */
4661 x
= gen_rtx_PLUS (xmode
,
4662 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4665 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4666 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4667 opnum
, (enum reload_type
) type
);
4676 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4679 secondary_reload_info
*sri
)
4681 /* Without the TARGET_SIMD instructions we cannot move a Q register
4682 to a Q register directly. We need a scratch. */
4683 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4684 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4685 && reg_class_subset_p (rclass
, FP_REGS
))
4688 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4689 else if (mode
== TImode
)
4690 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4694 /* A TFmode or TImode memory access should be handled via an FP_REGS
4695 because AArch64 has richer addressing modes for LDR/STR instructions
4696 than LDP/STP instructions. */
4697 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== GENERAL_REGS
4698 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4701 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4702 return GENERAL_REGS
;
4708 aarch64_can_eliminate (const int from
, const int to
)
4710 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4711 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4713 if (frame_pointer_needed
)
4715 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4717 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4719 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4720 && !cfun
->calls_alloca
)
4722 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4729 /* If we decided that we didn't need a leaf frame pointer but then used
4730 LR in the function, then we'll want a frame pointer after all, so
4731 prevent this elimination to ensure a frame pointer is used. */
4732 if (to
== STACK_POINTER_REGNUM
4733 && flag_omit_leaf_frame_pointer
4734 && df_regs_ever_live_p (LR_REGNUM
))
4742 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4744 aarch64_layout_frame ();
4746 if (to
== HARD_FRAME_POINTER_REGNUM
)
4748 if (from
== ARG_POINTER_REGNUM
)
4749 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4751 if (from
== FRAME_POINTER_REGNUM
)
4752 return (cfun
->machine
->frame
.hard_fp_offset
4753 - cfun
->machine
->frame
.saved_varargs_size
);
4756 if (to
== STACK_POINTER_REGNUM
)
4758 if (from
== FRAME_POINTER_REGNUM
)
4759 return (cfun
->machine
->frame
.frame_size
4760 - cfun
->machine
->frame
.saved_varargs_size
);
4763 return cfun
->machine
->frame
.frame_size
;
4766 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4770 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4774 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4779 aarch64_asm_trampoline_template (FILE *f
)
4783 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4784 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4788 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4789 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4791 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4792 assemble_aligned_integer (4, const0_rtx
);
4793 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4794 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4798 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4800 rtx fnaddr
, mem
, a_tramp
;
4801 const int tramp_code_sz
= 16;
4803 /* Don't need to copy the trailing D-words, we fill those in below. */
4804 emit_block_move (m_tramp
, assemble_trampoline_template (),
4805 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4806 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4807 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4808 if (GET_MODE (fnaddr
) != ptr_mode
)
4809 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4810 emit_move_insn (mem
, fnaddr
);
4812 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4813 emit_move_insn (mem
, chain_value
);
4815 /* XXX We should really define a "clear_cache" pattern and use
4816 gen_clear_cache(). */
4817 a_tramp
= XEXP (m_tramp
, 0);
4818 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4819 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4820 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4824 static unsigned char
4825 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
4829 case CALLER_SAVE_REGS
:
4836 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4837 (GET_MODE_SIZE (mode
) + 7) / 8;
4851 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4853 if (regclass
== POINTER_REGS
)
4854 return GENERAL_REGS
;
4856 if (regclass
== STACK_REG
)
4859 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4865 /* If it's an integer immediate that MOVI can't handle, then
4866 FP_REGS is not an option, so we return NO_REGS instead. */
4867 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4868 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4871 /* Register eliminiation can result in a request for
4872 SP+constant->FP_REGS. We cannot support such operations which
4873 use SP as source and an FP_REG as destination, so reject out
4875 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4877 rtx lhs
= XEXP (x
, 0);
4879 /* Look through a possible SUBREG introduced by ILP32. */
4880 if (GET_CODE (lhs
) == SUBREG
)
4881 lhs
= SUBREG_REG (lhs
);
4883 gcc_assert (REG_P (lhs
));
4884 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4893 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4895 asm_fprintf (f
, "%U%s", name
);
4899 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4901 if (priority
== DEFAULT_INIT_PRIORITY
)
4902 default_ctor_section_asm_out_constructor (symbol
, priority
);
4907 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4908 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4909 switch_to_section (s
);
4910 assemble_align (POINTER_SIZE
);
4911 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4916 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4918 if (priority
== DEFAULT_INIT_PRIORITY
)
4919 default_dtor_section_asm_out_destructor (symbol
, priority
);
4924 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4925 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4926 switch_to_section (s
);
4927 assemble_align (POINTER_SIZE
);
4928 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4933 aarch64_output_casesi (rtx
*operands
)
4937 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
4939 static const char *const patterns
[4][2] =
4942 "ldrb\t%w3, [%0,%w1,uxtw]",
4943 "add\t%3, %4, %w3, sxtb #2"
4946 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4947 "add\t%3, %4, %w3, sxth #2"
4950 "ldr\t%w3, [%0,%w1,uxtw #2]",
4951 "add\t%3, %4, %w3, sxtw #2"
4953 /* We assume that DImode is only generated when not optimizing and
4954 that we don't really need 64-bit address offsets. That would
4955 imply an object file with 8GB of code in a single function! */
4957 "ldr\t%w3, [%0,%w1,uxtw #2]",
4958 "add\t%3, %4, %w3, sxtw #2"
4962 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4964 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4966 gcc_assert (index
>= 0 && index
<= 3);
4968 /* Need to implement table size reduction, by chaning the code below. */
4969 output_asm_insn (patterns
[index
][0], operands
);
4970 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4971 snprintf (buf
, sizeof (buf
),
4972 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4973 output_asm_insn (buf
, operands
);
4974 output_asm_insn (patterns
[index
][1], operands
);
4975 output_asm_insn ("br\t%3", operands
);
4976 assemble_label (asm_out_file
, label
);
4981 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4982 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4986 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4988 if (shift
>= 0 && shift
<= 3)
4991 for (size
= 8; size
<= 32; size
*= 2)
4993 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4994 if (mask
== bits
<< shift
)
5002 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED
,
5003 const_rtx x ATTRIBUTE_UNUSED
)
5005 /* We can't use blocks for constants when we're using a per-function
5011 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED
,
5012 rtx x ATTRIBUTE_UNUSED
,
5013 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
5015 /* Force all constant pool entries into the current function section. */
5016 return function_section (current_function_decl
);
5022 /* Helper function for rtx cost calculation. Strip a shift expression
5023 from X. Returns the inner operand if successful, or the original
5024 expression on failure. */
5026 aarch64_strip_shift (rtx x
)
5030 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5031 we can convert both to ROR during final output. */
5032 if ((GET_CODE (op
) == ASHIFT
5033 || GET_CODE (op
) == ASHIFTRT
5034 || GET_CODE (op
) == LSHIFTRT
5035 || GET_CODE (op
) == ROTATERT
5036 || GET_CODE (op
) == ROTATE
)
5037 && CONST_INT_P (XEXP (op
, 1)))
5038 return XEXP (op
, 0);
5040 if (GET_CODE (op
) == MULT
5041 && CONST_INT_P (XEXP (op
, 1))
5042 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5043 return XEXP (op
, 0);
5048 /* Helper function for rtx cost calculation. Strip an extend
5049 expression from X. Returns the inner operand if successful, or the
5050 original expression on failure. We deal with a number of possible
5051 canonicalization variations here. */
5053 aarch64_strip_extend (rtx x
)
5057 /* Zero and sign extraction of a widened value. */
5058 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5059 && XEXP (op
, 2) == const0_rtx
5060 && GET_CODE (XEXP (op
, 0)) == MULT
5061 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5063 return XEXP (XEXP (op
, 0), 0);
5065 /* It can also be represented (for zero-extend) as an AND with an
5067 if (GET_CODE (op
) == AND
5068 && GET_CODE (XEXP (op
, 0)) == MULT
5069 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5070 && CONST_INT_P (XEXP (op
, 1))
5071 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5072 INTVAL (XEXP (op
, 1))) != 0)
5073 return XEXP (XEXP (op
, 0), 0);
5075 /* Now handle extended register, as this may also have an optional
5076 left shift by 1..4. */
5077 if (GET_CODE (op
) == ASHIFT
5078 && CONST_INT_P (XEXP (op
, 1))
5079 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5082 if (GET_CODE (op
) == ZERO_EXTEND
5083 || GET_CODE (op
) == SIGN_EXTEND
)
5092 /* Helper function for rtx cost calculation. Calculate the cost of
5093 a MULT, which may be part of a multiply-accumulate rtx. Return
5094 the calculated cost of the expression, recursing manually in to
5095 operands where needed. */
5098 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
5101 const struct cpu_cost_table
*extra_cost
5102 = aarch64_tune_params
->insn_extra_cost
;
5104 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
5105 machine_mode mode
= GET_MODE (x
);
5107 gcc_checking_assert (code
== MULT
);
5112 if (VECTOR_MODE_P (mode
))
5113 mode
= GET_MODE_INNER (mode
);
5115 /* Integer multiply/fma. */
5116 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5118 /* The multiply will be canonicalized as a shift, cost it as such. */
5119 if (CONST_INT_P (op1
)
5120 && exact_log2 (INTVAL (op1
)) > 0)
5125 /* ADD (shifted register). */
5126 cost
+= extra_cost
->alu
.arith_shift
;
5128 /* LSL (immediate). */
5129 cost
+= extra_cost
->alu
.shift
;
5132 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
5137 /* Integer multiplies or FMAs have zero/sign extending variants. */
5138 if ((GET_CODE (op0
) == ZERO_EXTEND
5139 && GET_CODE (op1
) == ZERO_EXTEND
)
5140 || (GET_CODE (op0
) == SIGN_EXTEND
5141 && GET_CODE (op1
) == SIGN_EXTEND
))
5143 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
5144 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
5149 /* MADD/SMADDL/UMADDL. */
5150 cost
+= extra_cost
->mult
[0].extend_add
;
5152 /* MUL/SMULL/UMULL. */
5153 cost
+= extra_cost
->mult
[0].extend
;
5159 /* This is either an integer multiply or an FMA. In both cases
5160 we want to recurse and cost the operands. */
5161 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5162 + rtx_cost (op1
, MULT
, 1, speed
);
5168 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5171 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5180 /* Floating-point FMA/FMUL can also support negations of the
5182 if (GET_CODE (op0
) == NEG
)
5183 op0
= XEXP (op0
, 0);
5184 if (GET_CODE (op1
) == NEG
)
5185 op1
= XEXP (op1
, 0);
5188 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5189 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5192 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5195 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5196 + rtx_cost (op1
, MULT
, 1, speed
);
5202 aarch64_address_cost (rtx x
,
5204 addr_space_t as ATTRIBUTE_UNUSED
,
5207 enum rtx_code c
= GET_CODE (x
);
5208 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
5209 struct aarch64_address_info info
;
5213 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5215 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5217 /* This is a CONST or SYMBOL ref which will be split
5218 in a different way depending on the code model in use.
5219 Cost it through the generic infrastructure. */
5220 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
5221 /* Divide through by the cost of one instruction to
5222 bring it to the same units as the address costs. */
5223 cost_symbol_ref
/= COSTS_N_INSNS (1);
5224 /* The cost is then the cost of preparing the address,
5225 followed by an immediate (possibly 0) offset. */
5226 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5230 /* This is most likely a jump table from a case
5232 return addr_cost
->register_offset
;
5238 case ADDRESS_LO_SUM
:
5239 case ADDRESS_SYMBOLIC
:
5240 case ADDRESS_REG_IMM
:
5241 cost
+= addr_cost
->imm_offset
;
5244 case ADDRESS_REG_WB
:
5245 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5246 cost
+= addr_cost
->pre_modify
;
5247 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5248 cost
+= addr_cost
->post_modify
;
5254 case ADDRESS_REG_REG
:
5255 cost
+= addr_cost
->register_offset
;
5258 case ADDRESS_REG_UXTW
:
5259 case ADDRESS_REG_SXTW
:
5260 cost
+= addr_cost
->register_extend
;
5270 /* For the sake of calculating the cost of the shifted register
5271 component, we can treat same sized modes in the same way. */
5272 switch (GET_MODE_BITSIZE (mode
))
5275 cost
+= addr_cost
->addr_scale_costs
.hi
;
5279 cost
+= addr_cost
->addr_scale_costs
.si
;
5283 cost
+= addr_cost
->addr_scale_costs
.di
;
5286 /* We can't tell, or this is a 128-bit vector. */
5288 cost
+= addr_cost
->addr_scale_costs
.ti
;
5296 /* Return true if the RTX X in mode MODE is a zero or sign extract
5297 usable in an ADD or SUB (extended register) instruction. */
5299 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5301 /* Catch add with a sign extract.
5302 This is add_<optab><mode>_multp2. */
5303 if (GET_CODE (x
) == SIGN_EXTRACT
5304 || GET_CODE (x
) == ZERO_EXTRACT
)
5306 rtx op0
= XEXP (x
, 0);
5307 rtx op1
= XEXP (x
, 1);
5308 rtx op2
= XEXP (x
, 2);
5310 if (GET_CODE (op0
) == MULT
5311 && CONST_INT_P (op1
)
5312 && op2
== const0_rtx
5313 && CONST_INT_P (XEXP (op0
, 1))
5314 && aarch64_is_extend_from_extract (mode
,
5326 aarch64_frint_unspec_p (unsigned int u
)
5344 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5345 storing it in *COST. Result is true if the total cost of the operation
5346 has now been calculated. */
5348 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5352 enum rtx_code cmpcode
;
5354 if (COMPARISON_P (op0
))
5356 inner
= XEXP (op0
, 0);
5357 comparator
= XEXP (op0
, 1);
5358 cmpcode
= GET_CODE (op0
);
5363 comparator
= const0_rtx
;
5367 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5369 /* Conditional branch. */
5370 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5374 if (cmpcode
== NE
|| cmpcode
== EQ
)
5376 if (comparator
== const0_rtx
)
5378 /* TBZ/TBNZ/CBZ/CBNZ. */
5379 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5381 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
5385 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
5390 else if (cmpcode
== LT
|| cmpcode
== GE
)
5393 if (comparator
== const0_rtx
)
5398 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5400 /* It's a conditional operation based on the status flags,
5401 so it must be some flavor of CSEL. */
5403 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5404 if (GET_CODE (op1
) == NEG
5405 || GET_CODE (op1
) == NOT
5406 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5407 op1
= XEXP (op1
, 0);
5409 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5410 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5414 /* We don't know what this is, cost all operands. */
5418 /* Calculate the cost of calculating X, storing it in *COST. Result
5419 is true if the total cost of the operation has now been calculated. */
5421 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5422 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5425 const struct cpu_cost_table
*extra_cost
5426 = aarch64_tune_params
->insn_extra_cost
;
5427 machine_mode mode
= GET_MODE (x
);
5429 /* By default, assume that everything has equivalent cost to the
5430 cheapest instruction. Any additional costs are applied as a delta
5431 above this default. */
5432 *cost
= COSTS_N_INSNS (1);
5434 /* TODO: The cost infrastructure currently does not handle
5435 vector operations. Assume that all vector operations
5436 are equally expensive. */
5437 if (VECTOR_MODE_P (mode
))
5440 *cost
+= extra_cost
->vect
.alu
;
5447 /* The cost depends entirely on the operands to SET. */
5452 switch (GET_CODE (op0
))
5457 rtx address
= XEXP (op0
, 0);
5458 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5459 *cost
+= extra_cost
->ldst
.store
;
5460 else if (mode
== SFmode
)
5461 *cost
+= extra_cost
->ldst
.storef
;
5462 else if (mode
== DFmode
)
5463 *cost
+= extra_cost
->ldst
.stored
;
5466 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5470 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5474 if (! REG_P (SUBREG_REG (op0
)))
5475 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5479 /* const0_rtx is in general free, but we will use an
5480 instruction to set a register to 0. */
5481 if (REG_P (op1
) || op1
== const0_rtx
)
5483 /* The cost is 1 per register copied. */
5484 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5486 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5489 /* Cost is just the cost of the RHS of the set. */
5490 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5495 /* Bit-field insertion. Strip any redundant widening of
5496 the RHS to meet the width of the target. */
5497 if (GET_CODE (op1
) == SUBREG
)
5498 op1
= SUBREG_REG (op1
);
5499 if ((GET_CODE (op1
) == ZERO_EXTEND
5500 || GET_CODE (op1
) == SIGN_EXTEND
)
5501 && CONST_INT_P (XEXP (op0
, 1))
5502 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5503 >= INTVAL (XEXP (op0
, 1))))
5504 op1
= XEXP (op1
, 0);
5506 if (CONST_INT_P (op1
))
5508 /* MOV immediate is assumed to always be cheap. */
5509 *cost
= COSTS_N_INSNS (1);
5515 *cost
+= extra_cost
->alu
.bfi
;
5516 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5522 /* We can't make sense of this, assume default cost. */
5523 *cost
= COSTS_N_INSNS (1);
5529 /* If an instruction can incorporate a constant within the
5530 instruction, the instruction's expression avoids calling
5531 rtx_cost() on the constant. If rtx_cost() is called on a
5532 constant, then it is usually because the constant must be
5533 moved into a register by one or more instructions.
5535 The exception is constant 0, which can be expressed
5536 as XZR/WZR and is therefore free. The exception to this is
5537 if we have (set (reg) (const0_rtx)) in which case we must cost
5538 the move. However, we can catch that when we cost the SET, so
5539 we don't need to consider that here. */
5540 if (x
== const0_rtx
)
5544 /* To an approximation, building any other constant is
5545 proportionally expensive to the number of instructions
5546 required to build that constant. This is true whether we
5547 are compiling for SPEED or otherwise. */
5548 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5549 (NULL_RTX
, x
, false, mode
));
5556 /* mov[df,sf]_aarch64. */
5557 if (aarch64_float_const_representable_p (x
))
5558 /* FMOV (scalar immediate). */
5559 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5560 else if (!aarch64_float_const_zero_rtx_p (x
))
5562 /* This will be a load from memory. */
5564 *cost
+= extra_cost
->ldst
.loadd
;
5566 *cost
+= extra_cost
->ldst
.loadf
;
5569 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5570 or MOV v0.s[0], wzr - neither of which are modeled by the
5571 cost tables. Just use the default cost. */
5581 /* For loads we want the base cost of a load, plus an
5582 approximation for the additional cost of the addressing
5584 rtx address
= XEXP (x
, 0);
5585 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5586 *cost
+= extra_cost
->ldst
.load
;
5587 else if (mode
== SFmode
)
5588 *cost
+= extra_cost
->ldst
.loadf
;
5589 else if (mode
== DFmode
)
5590 *cost
+= extra_cost
->ldst
.loadd
;
5593 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5602 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5604 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5605 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5608 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5612 /* Cost this as SUB wzr, X. */
5613 op0
= CONST0_RTX (GET_MODE (x
));
5618 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5620 /* Support (neg(fma...)) as a single instruction only if
5621 sign of zeros is unimportant. This matches the decision
5622 making in aarch64.md. */
5623 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5626 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5631 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5640 *cost
+= extra_cost
->alu
.clz
;
5648 if (op1
== const0_rtx
5649 && GET_CODE (op0
) == AND
)
5655 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5657 /* TODO: A write to the CC flags possibly costs extra, this
5658 needs encoding in the cost tables. */
5660 /* CC_ZESWPmode supports zero extend for free. */
5661 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5662 op0
= XEXP (op0
, 0);
5665 if (GET_CODE (op0
) == AND
)
5671 if (GET_CODE (op0
) == PLUS
)
5673 /* ADDS (and CMN alias). */
5678 if (GET_CODE (op0
) == MINUS
)
5685 if (GET_CODE (op1
) == NEG
)
5689 *cost
+= extra_cost
->alu
.arith
;
5691 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5692 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5698 Compare can freely swap the order of operands, and
5699 canonicalization puts the more complex operation first.
5700 But the integer MINUS logic expects the shift/extend
5701 operation in op1. */
5703 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5711 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5715 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5717 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5719 /* FCMP supports constant 0.0 for no extra cost. */
5733 /* Detect valid immediates. */
5734 if ((GET_MODE_CLASS (mode
) == MODE_INT
5735 || (GET_MODE_CLASS (mode
) == MODE_CC
5736 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5737 && CONST_INT_P (op1
)
5738 && aarch64_uimm12_shift (INTVAL (op1
)))
5740 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5743 /* SUB(S) (immediate). */
5744 *cost
+= extra_cost
->alu
.arith
;
5749 /* Look for SUB (extended register). */
5750 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5753 *cost
+= extra_cost
->alu
.arith_shift
;
5755 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5756 (enum rtx_code
) GET_CODE (op1
),
5761 rtx new_op1
= aarch64_strip_extend (op1
);
5763 /* Cost this as an FMA-alike operation. */
5764 if ((GET_CODE (new_op1
) == MULT
5765 || GET_CODE (new_op1
) == ASHIFT
)
5768 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5769 (enum rtx_code
) code
,
5771 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5775 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5779 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5781 *cost
+= extra_cost
->alu
.arith
;
5782 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5784 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5797 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5798 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5801 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5802 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5806 if (GET_MODE_CLASS (mode
) == MODE_INT
5807 && CONST_INT_P (op1
)
5808 && aarch64_uimm12_shift (INTVAL (op1
)))
5810 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5813 /* ADD (immediate). */
5814 *cost
+= extra_cost
->alu
.arith
;
5818 /* Look for ADD (extended register). */
5819 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5822 *cost
+= extra_cost
->alu
.arith_shift
;
5824 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5825 (enum rtx_code
) GET_CODE (op0
),
5830 /* Strip any extend, leave shifts behind as we will
5831 cost them through mult_cost. */
5832 new_op0
= aarch64_strip_extend (op0
);
5834 if (GET_CODE (new_op0
) == MULT
5835 || GET_CODE (new_op0
) == ASHIFT
)
5837 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5839 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5843 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5844 + rtx_cost (op1
, PLUS
, 1, speed
));
5848 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5850 *cost
+= extra_cost
->alu
.arith
;
5851 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5853 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5859 *cost
= COSTS_N_INSNS (1);
5862 *cost
+= extra_cost
->alu
.rev
;
5867 if (aarch_rev16_p (x
))
5869 *cost
= COSTS_N_INSNS (1);
5872 *cost
+= extra_cost
->alu
.rev
;
5884 && GET_CODE (op0
) == MULT
5885 && CONST_INT_P (XEXP (op0
, 1))
5886 && CONST_INT_P (op1
)
5887 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5890 /* This is a UBFM/SBFM. */
5891 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
5893 *cost
+= extra_cost
->alu
.bfx
;
5897 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5899 /* We possibly get the immediate for free, this is not
5901 if (CONST_INT_P (op1
)
5902 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5904 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5907 *cost
+= extra_cost
->alu
.logical
;
5915 /* Handle ORN, EON, or BIC. */
5916 if (GET_CODE (op0
) == NOT
)
5917 op0
= XEXP (op0
, 0);
5919 new_op0
= aarch64_strip_shift (op0
);
5921 /* If we had a shift on op0 then this is a logical-shift-
5922 by-register/immediate operation. Otherwise, this is just
5923 a logical operation. */
5928 /* Shift by immediate. */
5929 if (CONST_INT_P (XEXP (op0
, 1)))
5930 *cost
+= extra_cost
->alu
.log_shift
;
5932 *cost
+= extra_cost
->alu
.log_shift_reg
;
5935 *cost
+= extra_cost
->alu
.logical
;
5938 /* In both cases we want to cost both operands. */
5939 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
5940 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5950 *cost
+= extra_cost
->alu
.logical
;
5952 /* The logical instruction could have the shifted register form,
5953 but the cost is the same if the shift is processed as a separate
5954 instruction, so we don't bother with it here. */
5960 /* If a value is written in SI mode, then zero extended to DI
5961 mode, the operation will in general be free as a write to
5962 a 'w' register implicitly zeroes the upper bits of an 'x'
5963 register. However, if this is
5965 (set (reg) (zero_extend (reg)))
5967 we must cost the explicit register move. */
5969 && GET_MODE (op0
) == SImode
5972 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5974 if (!op_cost
&& speed
)
5976 *cost
+= extra_cost
->alu
.extend
;
5978 /* Free, the cost is that of the SI mode operation. */
5983 else if (MEM_P (XEXP (x
, 0)))
5985 /* All loads can zero extend to any size for free. */
5986 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
5992 *cost
+= extra_cost
->alu
.extend
;
5997 if (MEM_P (XEXP (x
, 0)))
6002 rtx address
= XEXP (XEXP (x
, 0), 0);
6003 *cost
+= extra_cost
->ldst
.load_sign_extend
;
6006 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6013 *cost
+= extra_cost
->alu
.extend
;
6020 if (CONST_INT_P (op1
))
6022 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6025 *cost
+= extra_cost
->alu
.shift
;
6027 /* We can incorporate zero/sign extend for free. */
6028 if (GET_CODE (op0
) == ZERO_EXTEND
6029 || GET_CODE (op0
) == SIGN_EXTEND
)
6030 op0
= XEXP (op0
, 0);
6032 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
6039 *cost
+= extra_cost
->alu
.shift_reg
;
6041 return false; /* All arguments need to be in registers. */
6051 if (CONST_INT_P (op1
))
6053 /* ASR (immediate) and friends. */
6055 *cost
+= extra_cost
->alu
.shift
;
6057 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6063 /* ASR (register) and friends. */
6065 *cost
+= extra_cost
->alu
.shift_reg
;
6067 return false; /* All arguments need to be in registers. */
6072 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6076 *cost
+= extra_cost
->ldst
.load
;
6078 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6079 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6081 /* ADRP, followed by ADD. */
6082 *cost
+= COSTS_N_INSNS (1);
6084 *cost
+= 2 * extra_cost
->alu
.arith
;
6086 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6087 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6091 *cost
+= extra_cost
->alu
.arith
;
6096 /* One extra load instruction, after accessing the GOT. */
6097 *cost
+= COSTS_N_INSNS (1);
6099 *cost
+= extra_cost
->ldst
.load
;
6105 /* ADRP/ADD (immediate). */
6107 *cost
+= extra_cost
->alu
.arith
;
6114 *cost
+= extra_cost
->alu
.bfx
;
6116 /* We can trust that the immediates used will be correct (there
6117 are no by-register forms), so we need only cost op0. */
6118 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
6122 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6123 /* aarch64_rtx_mult_cost always handles recursion to its
6131 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6132 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
6133 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
6134 else if (GET_MODE (x
) == DFmode
)
6135 *cost
+= (extra_cost
->fp
[1].mult
6136 + extra_cost
->fp
[1].div
);
6137 else if (GET_MODE (x
) == SFmode
)
6138 *cost
+= (extra_cost
->fp
[0].mult
6139 + extra_cost
->fp
[0].div
);
6141 return false; /* All arguments need to be in registers. */
6148 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6149 /* There is no integer SQRT, so only DIV and UDIV can get
6151 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6153 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6155 return false; /* All arguments need to be in registers. */
6158 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6159 XEXP (x
, 2), cost
, speed
);
6172 return false; /* All arguments must be in registers. */
6180 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6182 /* FMSUB, FNMADD, and FNMSUB are free. */
6183 if (GET_CODE (op0
) == NEG
)
6184 op0
= XEXP (op0
, 0);
6186 if (GET_CODE (op2
) == NEG
)
6187 op2
= XEXP (op2
, 0);
6189 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6190 and the by-element operand as operand 0. */
6191 if (GET_CODE (op1
) == NEG
)
6192 op1
= XEXP (op1
, 0);
6194 /* Catch vector-by-element operations. The by-element operand can
6195 either be (vec_duplicate (vec_select (x))) or just
6196 (vec_select (x)), depending on whether we are multiplying by
6197 a vector or a scalar.
6199 Canonicalization is not very good in these cases, FMA4 will put the
6200 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6201 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6202 op0
= XEXP (op0
, 0);
6203 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6204 op1
= XEXP (op1
, 0);
6206 if (GET_CODE (op0
) == VEC_SELECT
)
6207 op0
= XEXP (op0
, 0);
6208 else if (GET_CODE (op1
) == VEC_SELECT
)
6209 op1
= XEXP (op1
, 0);
6211 /* If the remaining parameters are not registers,
6212 get the cost to put them into registers. */
6213 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
6214 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
6215 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
6220 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6223 case FLOAT_TRUNCATE
:
6225 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6231 /* Strip the rounding part. They will all be implemented
6232 by the fcvt* family of instructions anyway. */
6233 if (GET_CODE (x
) == UNSPEC
)
6235 unsigned int uns_code
= XINT (x
, 1);
6237 if (uns_code
== UNSPEC_FRINTA
6238 || uns_code
== UNSPEC_FRINTM
6239 || uns_code
== UNSPEC_FRINTN
6240 || uns_code
== UNSPEC_FRINTP
6241 || uns_code
== UNSPEC_FRINTZ
)
6242 x
= XVECEXP (x
, 0, 0);
6246 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6248 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
6252 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6254 /* FABS and FNEG are analogous. */
6256 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6260 /* Integer ABS will either be split to
6261 two arithmetic instructions, or will be an ABS
6262 (scalar), which we don't model. */
6263 *cost
= COSTS_N_INSNS (2);
6265 *cost
+= 2 * extra_cost
->alu
.arith
;
6273 /* FMAXNM/FMINNM/FMAX/FMIN.
6274 TODO: This may not be accurate for all implementations, but
6275 we do not model this in the cost tables. */
6276 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6281 /* The floating point round to integer frint* instructions. */
6282 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6285 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6290 if (XINT (x
, 1) == UNSPEC_RBIT
)
6293 *cost
+= extra_cost
->alu
.rev
;
6301 /* Decompose <su>muldi3_highpart. */
6302 if (/* (truncate:DI */
6305 && GET_MODE (XEXP (x
, 0)) == TImode
6306 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6308 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6309 /* (ANY_EXTEND:TI (reg:DI))
6310 (ANY_EXTEND:TI (reg:DI))) */
6311 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6312 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6313 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6314 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6315 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6316 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6317 /* (const_int 64) */
6318 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6319 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6323 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6324 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6326 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6336 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6338 "\nFailed to cost RTX. Assuming default cost.\n");
6343 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6344 calculated for X. This cost is stored in *COST. Returns true
6345 if the total cost of X was calculated. */
6347 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
6348 int param
, int *cost
, bool speed
)
6350 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
6352 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6354 print_rtl_single (dump_file
, x
);
6355 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6356 speed
? "Hot" : "Cold",
6357 *cost
, result
? "final" : "partial");
6364 aarch64_register_move_cost (machine_mode mode
,
6365 reg_class_t from_i
, reg_class_t to_i
)
6367 enum reg_class from
= (enum reg_class
) from_i
;
6368 enum reg_class to
= (enum reg_class
) to_i
;
6369 const struct cpu_regmove_cost
*regmove_cost
6370 = aarch64_tune_params
->regmove_cost
;
6372 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6373 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6376 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6377 from
= GENERAL_REGS
;
6379 /* Moving between GPR and stack cost is the same as GP2GP. */
6380 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6381 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6382 return regmove_cost
->GP2GP
;
6384 /* To/From the stack register, we move via the gprs. */
6385 if (to
== STACK_REG
|| from
== STACK_REG
)
6386 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
6387 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
6389 if (GET_MODE_SIZE (mode
) == 16)
6391 /* 128-bit operations on general registers require 2 instructions. */
6392 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6393 return regmove_cost
->GP2GP
* 2;
6394 else if (from
== GENERAL_REGS
)
6395 return regmove_cost
->GP2FP
* 2;
6396 else if (to
== GENERAL_REGS
)
6397 return regmove_cost
->FP2GP
* 2;
6399 /* When AdvSIMD instructions are disabled it is not possible to move
6400 a 128-bit value directly between Q registers. This is handled in
6401 secondary reload. A general register is used as a scratch to move
6402 the upper DI value and the lower DI value is moved directly,
6403 hence the cost is the sum of three moves. */
6405 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6407 return regmove_cost
->FP2FP
;
6410 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6411 return regmove_cost
->GP2GP
;
6412 else if (from
== GENERAL_REGS
)
6413 return regmove_cost
->GP2FP
;
6414 else if (to
== GENERAL_REGS
)
6415 return regmove_cost
->FP2GP
;
6417 return regmove_cost
->FP2FP
;
6421 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
6422 reg_class_t rclass ATTRIBUTE_UNUSED
,
6423 bool in ATTRIBUTE_UNUSED
)
6425 return aarch64_tune_params
->memmov_cost
;
6428 /* Return the number of instructions that can be issued per cycle. */
6430 aarch64_sched_issue_rate (void)
6432 return aarch64_tune_params
->issue_rate
;
6435 /* Vectorizer cost model target hooks. */
6437 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6439 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6441 int misalign ATTRIBUTE_UNUSED
)
6445 switch (type_of_cost
)
6448 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6451 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6454 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6457 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6460 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6463 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6466 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6469 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6471 case unaligned_load
:
6472 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6474 case unaligned_store
:
6475 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6477 case cond_branch_taken
:
6478 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6480 case cond_branch_not_taken
:
6481 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6484 case vec_promote_demote
:
6485 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6488 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6489 return elements
/ 2 + 1;
6496 /* Implement targetm.vectorize.add_stmt_cost. */
6498 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6499 struct _stmt_vec_info
*stmt_info
, int misalign
,
6500 enum vect_cost_model_location where
)
6502 unsigned *cost
= (unsigned *) data
;
6503 unsigned retval
= 0;
6505 if (flag_vect_cost_model
)
6507 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6509 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6511 /* Statements in an inner loop relative to the loop being
6512 vectorized are weighted more heavily. The value here is
6513 a function (linear for now) of the loop nest level. */
6514 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6516 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6517 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6518 unsigned nest_level
= loop_depth (loop
);
6520 count
*= nest_level
;
6523 retval
= (unsigned) (count
* stmt_cost
);
6524 cost
[where
] += retval
;
6530 static void initialize_aarch64_code_model (void);
6532 /* Parse the architecture extension string. */
6535 aarch64_parse_extension (char *str
)
6537 /* The extension string is parsed left to right. */
6538 const struct aarch64_option_extension
*opt
= NULL
;
6540 /* Flag to say whether we are adding or removing an extension. */
6541 int adding_ext
= -1;
6543 while (str
!= NULL
&& *str
!= 0)
6549 ext
= strchr (str
, '+');
6556 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6567 error ("missing feature modifier after %qs", adding_ext
? "+"
6572 /* Scan over the extensions table trying to find an exact match. */
6573 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6575 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6577 /* Add or remove the extension. */
6579 aarch64_isa_flags
|= opt
->flags_on
;
6581 aarch64_isa_flags
&= ~(opt
->flags_off
);
6586 if (opt
->name
== NULL
)
6588 /* Extension not found in list. */
6589 error ("unknown feature modifier %qs", str
);
6599 /* Parse the ARCH string. */
6602 aarch64_parse_arch (void)
6605 const struct processor
*arch
;
6606 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6609 strcpy (str
, aarch64_arch_string
);
6611 ext
= strchr (str
, '+');
6620 error ("missing arch name in -march=%qs", str
);
6624 /* Loop through the list of supported ARCHs to find a match. */
6625 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6627 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6629 selected_arch
= arch
;
6630 aarch64_isa_flags
= selected_arch
->flags
;
6633 selected_cpu
= &all_cores
[selected_arch
->core
];
6637 /* ARCH string contains at least one extension. */
6638 aarch64_parse_extension (ext
);
6641 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6643 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6644 selected_cpu
->name
, selected_arch
->name
);
6651 /* ARCH name not found in list. */
6652 error ("unknown value %qs for -march", str
);
6656 /* Parse the CPU string. */
6659 aarch64_parse_cpu (void)
6662 const struct processor
*cpu
;
6663 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6666 strcpy (str
, aarch64_cpu_string
);
6668 ext
= strchr (str
, '+');
6677 error ("missing cpu name in -mcpu=%qs", str
);
6681 /* Loop through the list of supported CPUs to find a match. */
6682 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6684 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6687 aarch64_isa_flags
= selected_cpu
->flags
;
6691 /* CPU string contains at least one extension. */
6692 aarch64_parse_extension (ext
);
6699 /* CPU name not found in list. */
6700 error ("unknown value %qs for -mcpu", str
);
6704 /* Parse the TUNE string. */
6707 aarch64_parse_tune (void)
6709 const struct processor
*cpu
;
6710 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6711 strcpy (str
, aarch64_tune_string
);
6713 /* Loop through the list of supported CPUs to find a match. */
6714 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6716 if (strcmp (cpu
->name
, str
) == 0)
6718 selected_tune
= cpu
;
6723 /* CPU name not found in list. */
6724 error ("unknown value %qs for -mtune", str
);
6729 /* Implement TARGET_OPTION_OVERRIDE. */
6732 aarch64_override_options (void)
6734 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6735 If either of -march or -mtune is given, they override their
6736 respective component of -mcpu.
6738 So, first parse AARCH64_CPU_STRING, then the others, be careful
6739 with -march as, if -mcpu is not present on the command line, march
6740 must set a sensible default CPU. */
6741 if (aarch64_cpu_string
)
6743 aarch64_parse_cpu ();
6746 if (aarch64_arch_string
)
6748 aarch64_parse_arch ();
6751 if (aarch64_tune_string
)
6753 aarch64_parse_tune ();
6756 #ifndef HAVE_AS_MABI_OPTION
6757 /* The compiler may have been configured with 2.23.* binutils, which does
6758 not have support for ILP32. */
6760 error ("Assembler does not support -mabi=ilp32");
6763 initialize_aarch64_code_model ();
6765 aarch64_build_bitmask_table ();
6767 /* This target defaults to strict volatile bitfields. */
6768 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6769 flag_strict_volatile_bitfields
= 1;
6771 /* If the user did not specify a processor, choose the default
6772 one for them. This will be the CPU set during configuration using
6773 --with-cpu, otherwise it is "generic". */
6776 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6777 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6780 gcc_assert (selected_cpu
);
6783 selected_tune
= selected_cpu
;
6785 aarch64_tune_flags
= selected_tune
->flags
;
6786 aarch64_tune
= selected_tune
->core
;
6787 aarch64_tune_params
= selected_tune
->tune
;
6788 aarch64_architecture_version
= selected_cpu
->architecture_version
;
6790 if (aarch64_fix_a53_err835769
== 2)
6792 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6793 aarch64_fix_a53_err835769
= 1;
6795 aarch64_fix_a53_err835769
= 0;
6799 /* If not opzimizing for size, set the default
6800 alignment to what the target wants */
6803 if (align_loops
<= 0)
6804 align_loops
= aarch64_tune_params
->loop_align
;
6805 if (align_jumps
<= 0)
6806 align_jumps
= aarch64_tune_params
->jump_align
;
6807 if (align_functions
<= 0)
6808 align_functions
= aarch64_tune_params
->function_align
;
6811 aarch64_override_options_after_change ();
6814 /* Implement targetm.override_options_after_change. */
6817 aarch64_override_options_after_change (void)
6819 if (flag_omit_frame_pointer
)
6820 flag_omit_leaf_frame_pointer
= false;
6821 else if (flag_omit_leaf_frame_pointer
)
6822 flag_omit_frame_pointer
= true;
6825 static struct machine_function
*
6826 aarch64_init_machine_status (void)
6828 struct machine_function
*machine
;
6829 machine
= ggc_cleared_alloc
<machine_function
> ();
6834 aarch64_init_expanders (void)
6836 init_machine_status
= aarch64_init_machine_status
;
6839 /* A checking mechanism for the implementation of the various code models. */
6841 initialize_aarch64_code_model (void)
6845 switch (aarch64_cmodel_var
)
6847 case AARCH64_CMODEL_TINY
:
6848 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
6850 case AARCH64_CMODEL_SMALL
:
6851 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
6853 case AARCH64_CMODEL_LARGE
:
6854 sorry ("code model %qs with -f%s", "large",
6855 flag_pic
> 1 ? "PIC" : "pic");
6861 aarch64_cmodel
= aarch64_cmodel_var
;
6864 /* Return true if SYMBOL_REF X binds locally. */
6867 aarch64_symbol_binds_local_p (const_rtx x
)
6869 return (SYMBOL_REF_DECL (x
)
6870 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
6871 : SYMBOL_REF_LOCAL_P (x
));
6874 /* Return true if SYMBOL_REF X is thread local */
6876 aarch64_tls_symbol_p (rtx x
)
6878 if (! TARGET_HAVE_TLS
)
6881 if (GET_CODE (x
) != SYMBOL_REF
)
6884 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6887 /* Classify a TLS symbol into one of the TLS kinds. */
6888 enum aarch64_symbol_type
6889 aarch64_classify_tls_symbol (rtx x
)
6891 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
6895 case TLS_MODEL_GLOBAL_DYNAMIC
:
6896 case TLS_MODEL_LOCAL_DYNAMIC
:
6897 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
6899 case TLS_MODEL_INITIAL_EXEC
:
6900 return SYMBOL_SMALL_GOTTPREL
;
6902 case TLS_MODEL_LOCAL_EXEC
:
6903 return SYMBOL_SMALL_TPREL
;
6905 case TLS_MODEL_EMULATED
:
6906 case TLS_MODEL_NONE
:
6907 return SYMBOL_FORCE_TO_MEM
;
6914 /* Return the method that should be used to access SYMBOL_REF or
6915 LABEL_REF X in context CONTEXT. */
6917 enum aarch64_symbol_type
6918 aarch64_classify_symbol (rtx x
, rtx offset
,
6919 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
6921 if (GET_CODE (x
) == LABEL_REF
)
6923 switch (aarch64_cmodel
)
6925 case AARCH64_CMODEL_LARGE
:
6926 return SYMBOL_FORCE_TO_MEM
;
6928 case AARCH64_CMODEL_TINY_PIC
:
6929 case AARCH64_CMODEL_TINY
:
6930 return SYMBOL_TINY_ABSOLUTE
;
6932 case AARCH64_CMODEL_SMALL_PIC
:
6933 case AARCH64_CMODEL_SMALL
:
6934 return SYMBOL_SMALL_ABSOLUTE
;
6941 if (GET_CODE (x
) == SYMBOL_REF
)
6943 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6944 return SYMBOL_FORCE_TO_MEM
;
6946 if (aarch64_tls_symbol_p (x
))
6947 return aarch64_classify_tls_symbol (x
);
6949 switch (aarch64_cmodel
)
6951 case AARCH64_CMODEL_TINY
:
6952 /* When we retreive symbol + offset address, we have to make sure
6953 the offset does not cause overflow of the final address. But
6954 we have no way of knowing the address of symbol at compile time
6955 so we can't accurately say if the distance between the PC and
6956 symbol + offset is outside the addressible range of +/-1M in the
6957 TINY code model. So we rely on images not being greater than
6958 1M and cap the offset at 1M and anything beyond 1M will have to
6959 be loaded using an alternative mechanism. */
6960 if (SYMBOL_REF_WEAK (x
)
6961 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
6962 return SYMBOL_FORCE_TO_MEM
;
6963 return SYMBOL_TINY_ABSOLUTE
;
6965 case AARCH64_CMODEL_SMALL
:
6966 /* Same reasoning as the tiny code model, but the offset cap here is
6968 if (SYMBOL_REF_WEAK (x
)
6969 || INTVAL (offset
) < (HOST_WIDE_INT
) -4294967263
6970 || INTVAL (offset
) > (HOST_WIDE_INT
) 4294967264)
6971 return SYMBOL_FORCE_TO_MEM
;
6972 return SYMBOL_SMALL_ABSOLUTE
;
6974 case AARCH64_CMODEL_TINY_PIC
:
6975 if (!aarch64_symbol_binds_local_p (x
))
6976 return SYMBOL_TINY_GOT
;
6977 return SYMBOL_TINY_ABSOLUTE
;
6979 case AARCH64_CMODEL_SMALL_PIC
:
6980 if (!aarch64_symbol_binds_local_p (x
))
6981 return SYMBOL_SMALL_GOT
;
6982 return SYMBOL_SMALL_ABSOLUTE
;
6989 /* By default push everything into the constant pool. */
6990 return SYMBOL_FORCE_TO_MEM
;
6994 aarch64_constant_address_p (rtx x
)
6996 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
7000 aarch64_legitimate_pic_operand_p (rtx x
)
7002 if (GET_CODE (x
) == SYMBOL_REF
7003 || (GET_CODE (x
) == CONST
7004 && GET_CODE (XEXP (x
, 0)) == PLUS
7005 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7011 /* Return true if X holds either a quarter-precision or
7012 floating-point +0.0 constant. */
7014 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
7016 if (!CONST_DOUBLE_P (x
))
7019 /* TODO: We could handle moving 0.0 to a TFmode register,
7020 but first we would like to refactor the movtf_aarch64
7021 to be more amicable to split moves properly and
7022 correctly gate on TARGET_SIMD. For now - reject all
7023 constants which are not to SFmode or DFmode registers. */
7024 if (!(mode
== SFmode
|| mode
== DFmode
))
7027 if (aarch64_float_const_zero_rtx_p (x
))
7029 return aarch64_float_const_representable_p (x
);
7033 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
7035 /* Do not allow vector struct mode constants. We could support
7036 0 and -1 easily, but they need support in aarch64-simd.md. */
7037 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
7040 /* This could probably go away because
7041 we now decompose CONST_INTs according to expand_mov_immediate. */
7042 if ((GET_CODE (x
) == CONST_VECTOR
7043 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
7044 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
7045 return !targetm
.cannot_force_const_mem (mode
, x
);
7047 if (GET_CODE (x
) == HIGH
7048 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7051 return aarch64_constant_address_p (x
);
7055 aarch64_load_tp (rtx target
)
7058 || GET_MODE (target
) != Pmode
7059 || !register_operand (target
, Pmode
))
7060 target
= gen_reg_rtx (Pmode
);
7062 /* Can return in any reg. */
7063 emit_insn (gen_aarch64_load_tp_hard (target
));
7067 /* On AAPCS systems, this is the "struct __va_list". */
7068 static GTY(()) tree va_list_type
;
7070 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7071 Return the type to use as __builtin_va_list.
7073 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7085 aarch64_build_builtin_va_list (void)
7088 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7090 /* Create the type. */
7091 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7092 /* Give it the required name. */
7093 va_list_name
= build_decl (BUILTINS_LOCATION
,
7095 get_identifier ("__va_list"),
7097 DECL_ARTIFICIAL (va_list_name
) = 1;
7098 TYPE_NAME (va_list_type
) = va_list_name
;
7099 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
7101 /* Create the fields. */
7102 f_stack
= build_decl (BUILTINS_LOCATION
,
7103 FIELD_DECL
, get_identifier ("__stack"),
7105 f_grtop
= build_decl (BUILTINS_LOCATION
,
7106 FIELD_DECL
, get_identifier ("__gr_top"),
7108 f_vrtop
= build_decl (BUILTINS_LOCATION
,
7109 FIELD_DECL
, get_identifier ("__vr_top"),
7111 f_groff
= build_decl (BUILTINS_LOCATION
,
7112 FIELD_DECL
, get_identifier ("__gr_offs"),
7114 f_vroff
= build_decl (BUILTINS_LOCATION
,
7115 FIELD_DECL
, get_identifier ("__vr_offs"),
7118 DECL_ARTIFICIAL (f_stack
) = 1;
7119 DECL_ARTIFICIAL (f_grtop
) = 1;
7120 DECL_ARTIFICIAL (f_vrtop
) = 1;
7121 DECL_ARTIFICIAL (f_groff
) = 1;
7122 DECL_ARTIFICIAL (f_vroff
) = 1;
7124 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
7125 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
7126 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
7127 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
7128 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
7130 TYPE_FIELDS (va_list_type
) = f_stack
;
7131 DECL_CHAIN (f_stack
) = f_grtop
;
7132 DECL_CHAIN (f_grtop
) = f_vrtop
;
7133 DECL_CHAIN (f_vrtop
) = f_groff
;
7134 DECL_CHAIN (f_groff
) = f_vroff
;
7136 /* Compute its layout. */
7137 layout_type (va_list_type
);
7139 return va_list_type
;
7142 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7144 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
7146 const CUMULATIVE_ARGS
*cum
;
7147 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7148 tree stack
, grtop
, vrtop
, groff
, vroff
;
7150 int gr_save_area_size
;
7151 int vr_save_area_size
;
7154 cum
= &crtl
->args
.info
;
7156 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
7158 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
7160 if (TARGET_GENERAL_REGS_ONLY
)
7162 if (cum
->aapcs_nvrn
> 0)
7163 sorry ("%qs and floating point or vector arguments",
7164 "-mgeneral-regs-only");
7165 vr_save_area_size
= 0;
7168 f_stack
= TYPE_FIELDS (va_list_type_node
);
7169 f_grtop
= DECL_CHAIN (f_stack
);
7170 f_vrtop
= DECL_CHAIN (f_grtop
);
7171 f_groff
= DECL_CHAIN (f_vrtop
);
7172 f_vroff
= DECL_CHAIN (f_groff
);
7174 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
7176 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
7178 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
7180 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
7182 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
7185 /* Emit code to initialize STACK, which points to the next varargs stack
7186 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7187 by named arguments. STACK is 8-byte aligned. */
7188 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
7189 if (cum
->aapcs_stack_size
> 0)
7190 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
7191 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
7192 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7194 /* Emit code to initialize GRTOP, the top of the GR save area.
7195 virtual_incoming_args_rtx should have been 16 byte aligned. */
7196 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
7197 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
7198 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7200 /* Emit code to initialize VRTOP, the top of the VR save area.
7201 This address is gr_save_area_bytes below GRTOP, rounded
7202 down to the next 16-byte boundary. */
7203 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
7204 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
7205 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7208 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
7209 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
7210 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7212 /* Emit code to initialize GROFF, the offset from GRTOP of the
7213 next GPR argument. */
7214 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
7215 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
7216 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7218 /* Likewise emit code to initialize VROFF, the offset from FTOP
7219 of the next VR argument. */
7220 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
7221 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
7222 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7225 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7228 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
7229 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
7233 bool is_ha
; /* is HFA or HVA. */
7234 bool dw_align
; /* double-word align. */
7235 machine_mode ag_mode
= VOIDmode
;
7239 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7240 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
7241 HOST_WIDE_INT size
, rsize
, adjust
, align
;
7242 tree t
, u
, cond1
, cond2
;
7244 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7246 type
= build_pointer_type (type
);
7248 mode
= TYPE_MODE (type
);
7250 f_stack
= TYPE_FIELDS (va_list_type_node
);
7251 f_grtop
= DECL_CHAIN (f_stack
);
7252 f_vrtop
= DECL_CHAIN (f_grtop
);
7253 f_groff
= DECL_CHAIN (f_vrtop
);
7254 f_vroff
= DECL_CHAIN (f_groff
);
7256 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
7257 f_stack
, NULL_TREE
);
7258 size
= int_size_in_bytes (type
);
7259 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
7263 if (aarch64_vfp_is_call_or_return_candidate (mode
,
7269 /* TYPE passed in fp/simd registers. */
7270 if (TARGET_GENERAL_REGS_ONLY
)
7271 sorry ("%qs and floating point or vector arguments",
7272 "-mgeneral-regs-only");
7274 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
7275 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
7276 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
7277 unshare_expr (valist
), f_vroff
, NULL_TREE
);
7279 rsize
= nregs
* UNITS_PER_VREG
;
7283 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
7284 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
7286 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7287 && size
< UNITS_PER_VREG
)
7289 adjust
= UNITS_PER_VREG
- size
;
7294 /* TYPE passed in general registers. */
7295 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
7296 unshare_expr (valist
), f_grtop
, NULL_TREE
);
7297 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
7298 unshare_expr (valist
), f_groff
, NULL_TREE
);
7299 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
7300 nregs
= rsize
/ UNITS_PER_WORD
;
7305 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7306 && size
< UNITS_PER_WORD
)
7308 adjust
= UNITS_PER_WORD
- size
;
7312 /* Get a local temporary for the field value. */
7313 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
7315 /* Emit code to branch if off >= 0. */
7316 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
7317 build_int_cst (TREE_TYPE (off
), 0));
7318 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
7322 /* Emit: offs = (offs + 15) & -16. */
7323 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7324 build_int_cst (TREE_TYPE (off
), 15));
7325 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
7326 build_int_cst (TREE_TYPE (off
), -16));
7327 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
7332 /* Update ap.__[g|v]r_offs */
7333 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7334 build_int_cst (TREE_TYPE (off
), rsize
));
7335 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
7339 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7341 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7342 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
7343 build_int_cst (TREE_TYPE (f_off
), 0));
7344 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
7346 /* String up: make sure the assignment happens before the use. */
7347 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
7348 COND_EXPR_ELSE (cond1
) = t
;
7350 /* Prepare the trees handling the argument that is passed on the stack;
7351 the top level node will store in ON_STACK. */
7352 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
7355 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7356 t
= fold_convert (intDI_type_node
, arg
);
7357 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7358 build_int_cst (TREE_TYPE (t
), 15));
7359 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7360 build_int_cst (TREE_TYPE (t
), -16));
7361 t
= fold_convert (TREE_TYPE (arg
), t
);
7362 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
7366 /* Advance ap.__stack */
7367 t
= fold_convert (intDI_type_node
, arg
);
7368 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7369 build_int_cst (TREE_TYPE (t
), size
+ 7));
7370 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7371 build_int_cst (TREE_TYPE (t
), -8));
7372 t
= fold_convert (TREE_TYPE (arg
), t
);
7373 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
7374 /* String up roundup and advance. */
7376 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7377 /* String up with arg */
7378 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
7379 /* Big-endianness related address adjustment. */
7380 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7381 && size
< UNITS_PER_WORD
)
7383 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
7384 size_int (UNITS_PER_WORD
- size
));
7385 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
7388 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
7389 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
7391 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7394 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
7395 build_int_cst (TREE_TYPE (off
), adjust
));
7397 t
= fold_convert (sizetype
, t
);
7398 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
7402 /* type ha; // treat as "struct {ftype field[n];}"
7403 ... [computing offs]
7404 for (i = 0; i <nregs; ++i, offs += 16)
7405 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7408 tree tmp_ha
, field_t
, field_ptr_t
;
7410 /* Declare a local variable. */
7411 tmp_ha
= create_tmp_var_raw (type
, "ha");
7412 gimple_add_tmp_var (tmp_ha
);
7414 /* Establish the base type. */
7418 field_t
= float_type_node
;
7419 field_ptr_t
= float_ptr_type_node
;
7422 field_t
= double_type_node
;
7423 field_ptr_t
= double_ptr_type_node
;
7426 field_t
= long_double_type_node
;
7427 field_ptr_t
= long_double_ptr_type_node
;
7429 /* The half precision and quad precision are not fully supported yet. Enable
7430 the following code after the support is complete. Need to find the correct
7431 type node for __fp16 *. */
7434 field_t
= float_type_node
;
7435 field_ptr_t
= float_ptr_type_node
;
7441 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
7442 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
7443 field_ptr_t
= build_pointer_type (field_t
);
7450 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7451 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
7453 t
= fold_convert (field_ptr_t
, addr
);
7454 t
= build2 (MODIFY_EXPR
, field_t
,
7455 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
7456 build1 (INDIRECT_REF
, field_t
, t
));
7458 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7459 for (i
= 1; i
< nregs
; ++i
)
7461 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
7462 u
= fold_convert (field_ptr_t
, addr
);
7463 u
= build2 (MODIFY_EXPR
, field_t
,
7464 build2 (MEM_REF
, field_t
, tmp_ha
,
7465 build_int_cst (field_ptr_t
,
7467 int_size_in_bytes (field_t
)))),
7468 build1 (INDIRECT_REF
, field_t
, u
));
7469 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7472 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7473 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7476 COND_EXPR_ELSE (cond2
) = t
;
7477 addr
= fold_convert (build_pointer_type (type
), cond1
);
7478 addr
= build_va_arg_indirect_ref (addr
);
7481 addr
= build_va_arg_indirect_ref (addr
);
7486 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7489 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
7490 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7493 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7494 CUMULATIVE_ARGS local_cum
;
7495 int gr_saved
, vr_saved
;
7497 /* The caller has advanced CUM up to, but not beyond, the last named
7498 argument. Advance a local copy of CUM past the last "real" named
7499 argument, to find out how many registers are left over. */
7501 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7503 /* Found out how many registers we need to save. */
7504 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7505 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7507 if (TARGET_GENERAL_REGS_ONLY
)
7509 if (local_cum
.aapcs_nvrn
> 0)
7510 sorry ("%qs and floating point or vector arguments",
7511 "-mgeneral-regs-only");
7521 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7522 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7523 - gr_saved
* UNITS_PER_WORD
);
7524 mem
= gen_frame_mem (BLKmode
, ptr
);
7525 set_mem_alias_set (mem
, get_varargs_alias_set ());
7527 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7532 /* We can't use move_block_from_reg, because it will use
7533 the wrong mode, storing D regs only. */
7534 machine_mode mode
= TImode
;
7537 /* Set OFF to the offset from virtual_incoming_args_rtx of
7538 the first vector register. The VR save area lies below
7539 the GR one, and is aligned to 16 bytes. */
7540 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7541 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7542 off
-= vr_saved
* UNITS_PER_VREG
;
7544 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7548 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7549 mem
= gen_frame_mem (mode
, ptr
);
7550 set_mem_alias_set (mem
, get_varargs_alias_set ());
7551 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7552 off
+= UNITS_PER_VREG
;
7557 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7558 any complication of having crtl->args.pretend_args_size changed. */
7559 cfun
->machine
->frame
.saved_varargs_size
7560 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7561 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7562 + vr_saved
* UNITS_PER_VREG
);
7566 aarch64_conditional_register_usage (void)
7571 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7574 call_used_regs
[i
] = 1;
7579 /* Walk down the type tree of TYPE counting consecutive base elements.
7580 If *MODEP is VOIDmode, then set it to the first valid floating point
7581 type. If a non-floating point type is found, or if a floating point
7582 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7583 otherwise return the count in the sub-tree. */
7585 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
7590 switch (TREE_CODE (type
))
7593 mode
= TYPE_MODE (type
);
7594 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7597 if (*modep
== VOIDmode
)
7606 mode
= TYPE_MODE (TREE_TYPE (type
));
7607 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7610 if (*modep
== VOIDmode
)
7619 /* Use V2SImode and V4SImode as representatives of all 64-bit
7620 and 128-bit vector types. */
7621 size
= int_size_in_bytes (type
);
7634 if (*modep
== VOIDmode
)
7637 /* Vector modes are considered to be opaque: two vectors are
7638 equivalent for the purposes of being homogeneous aggregates
7639 if they are the same size. */
7648 tree index
= TYPE_DOMAIN (type
);
7650 /* Can't handle incomplete types nor sizes that are not
7652 if (!COMPLETE_TYPE_P (type
)
7653 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7656 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7659 || !TYPE_MAX_VALUE (index
)
7660 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7661 || !TYPE_MIN_VALUE (index
)
7662 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7666 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7667 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7669 /* There must be no padding. */
7670 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7682 /* Can't handle incomplete types nor sizes that are not
7684 if (!COMPLETE_TYPE_P (type
)
7685 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7688 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7690 if (TREE_CODE (field
) != FIELD_DECL
)
7693 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7699 /* There must be no padding. */
7700 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7707 case QUAL_UNION_TYPE
:
7709 /* These aren't very interesting except in a degenerate case. */
7714 /* Can't handle incomplete types nor sizes that are not
7716 if (!COMPLETE_TYPE_P (type
)
7717 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7720 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7722 if (TREE_CODE (field
) != FIELD_DECL
)
7725 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7728 count
= count
> sub_count
? count
: sub_count
;
7731 /* There must be no padding. */
7732 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7745 /* Return true if we use LRA instead of reload pass. */
7747 aarch64_lra_p (void)
7749 return aarch64_lra_flag
;
7752 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7753 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7754 array types. The C99 floating-point complex types are also considered
7755 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7756 types, which are GCC extensions and out of the scope of AAPCS64, are
7757 treated as composite types here as well.
7759 Note that MODE itself is not sufficient in determining whether a type
7760 is such a composite type or not. This is because
7761 stor-layout.c:compute_record_mode may have already changed the MODE
7762 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7763 structure with only one field may have its MODE set to the mode of the
7764 field. Also an integer mode whose size matches the size of the
7765 RECORD_TYPE type may be used to substitute the original mode
7766 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7767 solely relied on. */
7770 aarch64_composite_type_p (const_tree type
,
7773 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7777 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7778 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7784 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7785 type as described in AAPCS64 \S 4.1.2.
7787 See the comment above aarch64_composite_type_p for the notes on MODE. */
7790 aarch64_short_vector_p (const_tree type
,
7793 HOST_WIDE_INT size
= -1;
7795 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7796 size
= int_size_in_bytes (type
);
7797 else if (!aarch64_composite_type_p (type
, mode
)
7798 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
7799 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
7800 size
= GET_MODE_SIZE (mode
);
7802 return (size
== 8 || size
== 16) ? true : false;
7805 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7806 shall be passed or returned in simd/fp register(s) (providing these
7807 parameter passing registers are available).
7809 Upon successful return, *COUNT returns the number of needed registers,
7810 *BASE_MODE returns the mode of the individual register and when IS_HAF
7811 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7812 floating-point aggregate or a homogeneous short-vector aggregate. */
7815 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
7817 machine_mode
*base_mode
,
7821 machine_mode new_mode
= VOIDmode
;
7822 bool composite_p
= aarch64_composite_type_p (type
, mode
);
7824 if (is_ha
!= NULL
) *is_ha
= false;
7826 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7827 || aarch64_short_vector_p (type
, mode
))
7832 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
7834 if (is_ha
!= NULL
) *is_ha
= true;
7836 new_mode
= GET_MODE_INNER (mode
);
7838 else if (type
&& composite_p
)
7840 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
7842 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
7844 if (is_ha
!= NULL
) *is_ha
= true;
7853 *base_mode
= new_mode
;
7857 /* Implement TARGET_STRUCT_VALUE_RTX. */
7860 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
7861 int incoming ATTRIBUTE_UNUSED
)
7863 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
7866 /* Implements target hook vector_mode_supported_p. */
7868 aarch64_vector_mode_supported_p (machine_mode mode
)
7871 && (mode
== V4SImode
|| mode
== V8HImode
7872 || mode
== V16QImode
|| mode
== V2DImode
7873 || mode
== V2SImode
|| mode
== V4HImode
7874 || mode
== V8QImode
|| mode
== V2SFmode
7875 || mode
== V4SFmode
|| mode
== V2DFmode
7876 || mode
== V1DFmode
))
7882 /* Return appropriate SIMD container
7883 for MODE within a vector of WIDTH bits. */
7885 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
7887 gcc_assert (width
== 64 || width
== 128);
7926 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7928 aarch64_preferred_simd_mode (machine_mode mode
)
7930 return aarch64_simd_container_mode (mode
, 128);
7933 /* Return the bitmask of possible vector sizes for the vectorizer
7936 aarch64_autovectorize_vector_sizes (void)
7941 /* Implement TARGET_MANGLE_TYPE. */
7944 aarch64_mangle_type (const_tree type
)
7946 /* The AArch64 ABI documents say that "__va_list" has to be
7947 managled as if it is in the "std" namespace. */
7948 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
7949 return "St9__va_list";
7951 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
7953 if (TYPE_NAME (type
) != NULL
)
7954 return aarch64_mangle_builtin_type (type
);
7956 /* Use the default mangling. */
7961 /* Return true if the rtx_insn contains a MEM RTX somewhere
7965 has_memory_op (rtx_insn
*mem_insn
)
7967 subrtx_iterator::array_type array
;
7968 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
7975 /* Find the first rtx_insn before insn that will generate an assembly
7979 aarch64_prev_real_insn (rtx_insn
*insn
)
7986 insn
= prev_real_insn (insn
);
7988 while (insn
&& recog_memoized (insn
) < 0);
7994 is_madd_op (enum attr_type t1
)
7997 /* A number of these may be AArch32 only. */
7998 enum attr_type mlatypes
[] = {
7999 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
8000 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
8001 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
8004 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
8006 if (t1
== mlatypes
[i
])
8013 /* Check if there is a register dependency between a load and the insn
8014 for which we hold recog_data. */
8017 dep_between_memop_and_curr (rtx memop
)
8022 gcc_assert (GET_CODE (memop
) == SET
);
8024 if (!REG_P (SET_DEST (memop
)))
8027 load_reg
= SET_DEST (memop
);
8028 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
8030 rtx operand
= recog_data
.operand
[opno
];
8032 && reg_overlap_mentioned_p (load_reg
, operand
))
8040 /* When working around the Cortex-A53 erratum 835769,
8041 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8042 instruction and has a preceding memory instruction such that a NOP
8043 should be inserted between them. */
8046 aarch64_madd_needs_nop (rtx_insn
* insn
)
8048 enum attr_type attr_type
;
8052 if (!aarch64_fix_a53_err835769
)
8055 if (recog_memoized (insn
) < 0)
8058 attr_type
= get_attr_type (insn
);
8059 if (!is_madd_op (attr_type
))
8062 prev
= aarch64_prev_real_insn (insn
);
8063 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8064 Restore recog state to INSN to avoid state corruption. */
8065 extract_constrain_insn_cached (insn
);
8067 if (!prev
|| !has_memory_op (prev
))
8070 body
= single_set (prev
);
8072 /* If the previous insn is a memory op and there is no dependency between
8073 it and the DImode madd, emit a NOP between them. If body is NULL then we
8074 have a complex memory operation, probably a load/store pair.
8075 Be conservative for now and emit a NOP. */
8076 if (GET_MODE (recog_data
.operand
[0]) == DImode
8077 && (!body
|| !dep_between_memop_and_curr (body
)))
8085 /* Implement FINAL_PRESCAN_INSN. */
8088 aarch64_final_prescan_insn (rtx_insn
*insn
)
8090 if (aarch64_madd_needs_nop (insn
))
8091 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
8095 /* Return the equivalent letter for size. */
8097 sizetochar (int size
)
8101 case 64: return 'd';
8102 case 32: return 's';
8103 case 16: return 'h';
8104 case 8 : return 'b';
8105 default: gcc_unreachable ();
8109 /* Return true iff x is a uniform vector of floating-point
8110 constants, and the constant can be represented in
8111 quarter-precision form. Note, as aarch64_float_const_representable
8112 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8114 aarch64_vect_float_const_representable_p (rtx x
)
8117 REAL_VALUE_TYPE r0
, ri
;
8120 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
8123 x0
= CONST_VECTOR_ELT (x
, 0);
8124 if (!CONST_DOUBLE_P (x0
))
8127 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
8129 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
8131 xi
= CONST_VECTOR_ELT (x
, i
);
8132 if (!CONST_DOUBLE_P (xi
))
8135 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
8136 if (!REAL_VALUES_EQUAL (r0
, ri
))
8140 return aarch64_float_const_representable_p (x0
);
8143 /* Return true for valid and false for invalid. */
8145 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
8146 struct simd_immediate_info
*info
)
8148 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8150 for (i = 0; i < idx; i += (STRIDE)) \
8155 immtype = (CLASS); \
8156 elsize = (ELSIZE); \
8162 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8163 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8164 unsigned char bytes
[16];
8165 int immtype
= -1, matches
;
8166 unsigned int invmask
= inverse
? 0xff : 0;
8169 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8171 if (! (aarch64_simd_imm_zero_p (op
, mode
)
8172 || aarch64_vect_float_const_representable_p (op
)))
8177 info
->value
= CONST_VECTOR_ELT (op
, 0);
8178 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
8186 /* Splat vector constant out into a byte vector. */
8187 for (i
= 0; i
< n_elts
; i
++)
8189 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8190 it must be laid out in the vector register in reverse order. */
8191 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
8192 unsigned HOST_WIDE_INT elpart
;
8193 unsigned int part
, parts
;
8195 if (CONST_INT_P (el
))
8197 elpart
= INTVAL (el
);
8200 else if (GET_CODE (el
) == CONST_DOUBLE
)
8202 elpart
= CONST_DOUBLE_LOW (el
);
8208 for (part
= 0; part
< parts
; part
++)
8211 for (byte
= 0; byte
< innersize
; byte
++)
8213 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8214 elpart
>>= BITS_PER_UNIT
;
8216 if (GET_CODE (el
) == CONST_DOUBLE
)
8217 elpart
= CONST_DOUBLE_HIGH (el
);
8222 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8226 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8227 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
8229 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8230 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8232 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8233 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8235 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8236 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
8238 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
8240 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
8242 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8243 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
8245 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8246 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8248 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8249 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8251 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8252 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
8254 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
8256 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
8258 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8259 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8261 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8262 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8264 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8265 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8267 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8268 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8270 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
8272 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8273 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
8282 info
->element_width
= elsize
;
8283 info
->mvn
= emvn
!= 0;
8284 info
->shift
= eshift
;
8286 unsigned HOST_WIDE_INT imm
= 0;
8288 if (immtype
>= 12 && immtype
<= 15)
8291 /* Un-invert bytes of recognized vector, if necessary. */
8293 for (i
= 0; i
< idx
; i
++)
8294 bytes
[i
] ^= invmask
;
8298 /* FIXME: Broken on 32-bit H_W_I hosts. */
8299 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8301 for (i
= 0; i
< 8; i
++)
8302 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8303 << (i
* BITS_PER_UNIT
);
8306 info
->value
= GEN_INT (imm
);
8310 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8311 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8313 /* Construct 'abcdefgh' because the assembler cannot handle
8314 generic constants. */
8317 imm
= (imm
>> info
->shift
) & 0xff;
8318 info
->value
= GEN_INT (imm
);
8326 /* Check of immediate shift constants are within range. */
8328 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
8330 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
8332 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
8334 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
8337 /* Return true if X is a uniform vector where all elements
8338 are either the floating-point constant 0.0 or the
8339 integer constant 0. */
8341 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
8343 return x
== CONST0_RTX (mode
);
8347 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
8349 HOST_WIDE_INT imm
= INTVAL (x
);
8352 for (i
= 0; i
< 8; i
++)
8354 unsigned int byte
= imm
& 0xff;
8355 if (byte
!= 0xff && byte
!= 0)
8364 aarch64_mov_operand_p (rtx x
,
8365 enum aarch64_symbol_context context
,
8368 if (GET_CODE (x
) == HIGH
8369 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
8372 if (CONST_INT_P (x
))
8375 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
8378 return aarch64_classify_symbolic_expression (x
, context
)
8379 == SYMBOL_TINY_ABSOLUTE
;
8382 /* Return a const_int vector of VAL. */
8384 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
8386 int nunits
= GET_MODE_NUNITS (mode
);
8387 rtvec v
= rtvec_alloc (nunits
);
8390 for (i
=0; i
< nunits
; i
++)
8391 RTVEC_ELT (v
, i
) = GEN_INT (val
);
8393 return gen_rtx_CONST_VECTOR (mode
, v
);
8396 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8399 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
8403 gcc_assert (!VECTOR_MODE_P (mode
));
8404 vmode
= aarch64_preferred_simd_mode (mode
);
8405 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
8406 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
8409 /* Construct and return a PARALLEL RTX vector with elements numbering the
8410 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8411 the vector - from the perspective of the architecture. This does not
8412 line up with GCC's perspective on lane numbers, so we end up with
8413 different masks depending on our target endian-ness. The diagram
8414 below may help. We must draw the distinction when building masks
8415 which select one half of the vector. An instruction selecting
8416 architectural low-lanes for a big-endian target, must be described using
8417 a mask selecting GCC high-lanes.
8419 Big-Endian Little-Endian
8422 | x | x | x | x | | x | x | x | x |
8423 Architecture 3 2 1 0 3 2 1 0
8425 Low Mask: { 2, 3 } { 0, 1 }
8426 High Mask: { 0, 1 } { 2, 3 }
8430 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
8432 int nunits
= GET_MODE_NUNITS (mode
);
8433 rtvec v
= rtvec_alloc (nunits
/ 2);
8434 int high_base
= nunits
/ 2;
8440 if (BYTES_BIG_ENDIAN
)
8441 base
= high
? low_base
: high_base
;
8443 base
= high
? high_base
: low_base
;
8445 for (i
= 0; i
< nunits
/ 2; i
++)
8446 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
8448 t1
= gen_rtx_PARALLEL (mode
, v
);
8452 /* Check OP for validity as a PARALLEL RTX vector with elements
8453 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8454 from the perspective of the architecture. See the diagram above
8455 aarch64_simd_vect_par_cnst_half for more details. */
8458 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
8461 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
8462 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
8463 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
8466 if (!VECTOR_MODE_P (mode
))
8469 if (count_op
!= count_ideal
)
8472 for (i
= 0; i
< count_ideal
; i
++)
8474 rtx elt_op
= XVECEXP (op
, 0, i
);
8475 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
8477 if (!CONST_INT_P (elt_op
)
8478 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
8484 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8485 HIGH (exclusive). */
8487 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8491 gcc_assert (CONST_INT_P (operand
));
8492 lane
= INTVAL (operand
);
8494 if (lane
< low
|| lane
>= high
)
8497 error ("%Klane %ld out of range %ld - %ld", exp
, lane
, low
, high
- 1);
8499 error ("lane %ld out of range %ld - %ld", lane
, low
, high
- 1);
8503 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8506 aarch64_simd_emit_pair_result_insn (machine_mode mode
,
8507 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
8510 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
8511 rtx tmp1
= gen_reg_rtx (mode
);
8512 rtx tmp2
= gen_reg_rtx (mode
);
8514 emit_insn (intfn (tmp1
, op1
, tmp2
));
8516 emit_move_insn (mem
, tmp1
);
8517 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
8518 emit_move_insn (mem
, tmp2
);
8521 /* Return TRUE if OP is a valid vector addressing mode. */
8523 aarch64_simd_mem_operand_p (rtx op
)
8525 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
8526 || REG_P (XEXP (op
, 0)));
8529 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8530 not to early-clobber SRC registers in the process.
8532 We assume that the operands described by SRC and DEST represent a
8533 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8534 number of components into which the copy has been decomposed. */
8536 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
8537 rtx
*src
, unsigned int count
)
8541 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
8542 || REGNO (operands
[0]) < REGNO (operands
[1]))
8544 for (i
= 0; i
< count
; i
++)
8546 operands
[2 * i
] = dest
[i
];
8547 operands
[2 * i
+ 1] = src
[i
];
8552 for (i
= 0; i
< count
; i
++)
8554 operands
[2 * i
] = dest
[count
- i
- 1];
8555 operands
[2 * i
+ 1] = src
[count
- i
- 1];
8560 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8561 one of VSTRUCT modes: OI, CI or XI. */
8563 aarch64_simd_attr_length_move (rtx_insn
*insn
)
8567 extract_insn_cached (insn
);
8569 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8571 mode
= GET_MODE (recog_data
.operand
[0]);
8587 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8588 alignment of a vector to 128 bits. */
8589 static HOST_WIDE_INT
8590 aarch64_simd_vector_alignment (const_tree type
)
8592 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8593 return MIN (align
, 128);
8596 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8598 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8603 /* We guarantee alignment for vectors up to 128-bits. */
8604 if (tree_int_cst_compare (TYPE_SIZE (type
),
8605 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
8608 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8612 /* If VALS is a vector constant that can be loaded into a register
8613 using DUP, generate instructions to do so and return an RTX to
8614 assign to the register. Otherwise return NULL_RTX. */
8616 aarch64_simd_dup_constant (rtx vals
)
8618 machine_mode mode
= GET_MODE (vals
);
8619 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8620 int n_elts
= GET_MODE_NUNITS (mode
);
8621 bool all_same
= true;
8625 if (GET_CODE (vals
) != CONST_VECTOR
)
8628 for (i
= 1; i
< n_elts
; ++i
)
8630 x
= CONST_VECTOR_ELT (vals
, i
);
8631 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
8638 /* We can load this constant by using DUP and a constant in a
8639 single ARM register. This will be cheaper than a vector
8641 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
8642 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8646 /* Generate code to load VALS, which is a PARALLEL containing only
8647 constants (for vec_init) or CONST_VECTOR, efficiently into a
8648 register. Returns an RTX to copy into the register, or NULL_RTX
8649 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8651 aarch64_simd_make_constant (rtx vals
)
8653 machine_mode mode
= GET_MODE (vals
);
8655 rtx const_vec
= NULL_RTX
;
8656 int n_elts
= GET_MODE_NUNITS (mode
);
8660 if (GET_CODE (vals
) == CONST_VECTOR
)
8662 else if (GET_CODE (vals
) == PARALLEL
)
8664 /* A CONST_VECTOR must contain only CONST_INTs and
8665 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8666 Only store valid constants in a CONST_VECTOR. */
8667 for (i
= 0; i
< n_elts
; ++i
)
8669 rtx x
= XVECEXP (vals
, 0, i
);
8670 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8673 if (n_const
== n_elts
)
8674 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8679 if (const_vec
!= NULL_RTX
8680 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
8681 /* Load using MOVI/MVNI. */
8683 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
8684 /* Loaded using DUP. */
8686 else if (const_vec
!= NULL_RTX
)
8687 /* Load from constant pool. We can not take advantage of single-cycle
8688 LD1 because we need a PC-relative addressing mode. */
8691 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8692 We can not construct an initializer. */
8697 aarch64_expand_vector_init (rtx target
, rtx vals
)
8699 machine_mode mode
= GET_MODE (target
);
8700 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8701 int n_elts
= GET_MODE_NUNITS (mode
);
8702 int n_var
= 0, one_var
= -1;
8703 bool all_same
= true;
8707 x
= XVECEXP (vals
, 0, 0);
8708 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8709 n_var
= 1, one_var
= 0;
8711 for (i
= 1; i
< n_elts
; ++i
)
8713 x
= XVECEXP (vals
, 0, i
);
8714 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8715 ++n_var
, one_var
= i
;
8717 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8723 rtx constant
= aarch64_simd_make_constant (vals
);
8724 if (constant
!= NULL_RTX
)
8726 emit_move_insn (target
, constant
);
8731 /* Splat a single non-constant element if we can. */
8734 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8735 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8739 /* One field is non-constant. Load constant then overwrite varying
8740 field. This is more efficient than using the stack. */
8743 rtx copy
= copy_rtx (vals
);
8744 rtx index
= GEN_INT (one_var
);
8745 enum insn_code icode
;
8747 /* Load constant part of vector, substitute neighboring value for
8749 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
8750 aarch64_expand_vector_init (target
, copy
);
8752 /* Insert variable. */
8753 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8754 icode
= optab_handler (vec_set_optab
, mode
);
8755 gcc_assert (icode
!= CODE_FOR_nothing
);
8756 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
8760 /* Construct the vector in memory one field at a time
8761 and load the whole vector. */
8762 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8763 for (i
= 0; i
< n_elts
; i
++)
8764 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8765 i
* GET_MODE_SIZE (inner_mode
)),
8766 XVECEXP (vals
, 0, i
));
8767 emit_move_insn (target
, mem
);
8771 static unsigned HOST_WIDE_INT
8772 aarch64_shift_truncation_mask (machine_mode mode
)
8775 (aarch64_vector_mode_supported_p (mode
)
8776 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8779 #ifndef TLS_SECTION_ASM_FLAG
8780 #define TLS_SECTION_ASM_FLAG 'T'
8784 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8785 tree decl ATTRIBUTE_UNUSED
)
8787 char flagchars
[10], *f
= flagchars
;
8789 /* If we have already declared this section, we can use an
8790 abbreviated form to switch back to it -- unless this section is
8791 part of a COMDAT groups, in which case GAS requires the full
8792 declaration every time. */
8793 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8794 && (flags
& SECTION_DECLARED
))
8796 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
8800 if (!(flags
& SECTION_DEBUG
))
8802 if (flags
& SECTION_WRITE
)
8804 if (flags
& SECTION_CODE
)
8806 if (flags
& SECTION_SMALL
)
8808 if (flags
& SECTION_MERGE
)
8810 if (flags
& SECTION_STRINGS
)
8812 if (flags
& SECTION_TLS
)
8813 *f
++ = TLS_SECTION_ASM_FLAG
;
8814 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8818 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
8820 if (!(flags
& SECTION_NOTYPE
))
8825 if (flags
& SECTION_BSS
)
8830 #ifdef TYPE_OPERAND_FMT
8831 format
= "," TYPE_OPERAND_FMT
;
8836 fprintf (asm_out_file
, format
, type
);
8838 if (flags
& SECTION_ENTSIZE
)
8839 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
8840 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8842 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
8843 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
8845 fprintf (asm_out_file
, ",%s,comdat",
8846 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
8850 putc ('\n', asm_out_file
);
8853 /* Select a format to encode pointers in exception handling data. */
8855 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
8858 switch (aarch64_cmodel
)
8860 case AARCH64_CMODEL_TINY
:
8861 case AARCH64_CMODEL_TINY_PIC
:
8862 case AARCH64_CMODEL_SMALL
:
8863 case AARCH64_CMODEL_SMALL_PIC
:
8864 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8866 type
= DW_EH_PE_sdata4
;
8869 /* No assumptions here. 8-byte relocs required. */
8870 type
= DW_EH_PE_sdata8
;
8873 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
8876 /* Emit load exclusive. */
8879 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
8880 rtx mem
, rtx model_rtx
)
8882 rtx (*gen
) (rtx
, rtx
, rtx
);
8886 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
8887 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
8888 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
8889 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
8894 emit_insn (gen (rval
, mem
, model_rtx
));
8897 /* Emit store exclusive. */
8900 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
8901 rtx rval
, rtx mem
, rtx model_rtx
)
8903 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
8907 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
8908 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
8909 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
8910 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
8915 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
8918 /* Mark the previous jump instruction as unlikely. */
8921 aarch64_emit_unlikely_jump (rtx insn
)
8923 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
8925 insn
= emit_jump_insn (insn
);
8926 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
8929 /* Expand a compare and swap pattern. */
8932 aarch64_expand_compare_and_swap (rtx operands
[])
8934 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
8935 machine_mode mode
, cmp_mode
;
8936 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
8941 oldval
= operands
[3];
8942 newval
= operands
[4];
8943 is_weak
= operands
[5];
8944 mod_s
= operands
[6];
8945 mod_f
= operands
[7];
8946 mode
= GET_MODE (mem
);
8949 /* Normally the succ memory model must be stronger than fail, but in the
8950 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8951 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8953 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
8954 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
8955 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
8961 /* For short modes, we're going to perform the comparison in SImode,
8962 so do the zero-extension now. */
8964 rval
= gen_reg_rtx (SImode
);
8965 oldval
= convert_modes (SImode
, mode
, oldval
, true);
8970 /* Force the value into a register if needed. */
8971 if (!aarch64_plus_operand (oldval
, mode
))
8972 oldval
= force_reg (cmp_mode
, oldval
);
8981 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
8982 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
8983 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
8984 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
8989 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
8991 if (mode
== QImode
|| mode
== HImode
)
8992 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
8994 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8995 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
8996 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
8999 /* Split a compare and swap pattern. */
9002 aarch64_split_compare_and_swap (rtx operands
[])
9004 rtx rval
, mem
, oldval
, newval
, scratch
;
9007 rtx_code_label
*label1
, *label2
;
9012 oldval
= operands
[2];
9013 newval
= operands
[3];
9014 is_weak
= (operands
[4] != const0_rtx
);
9015 scratch
= operands
[7];
9016 mode
= GET_MODE (mem
);
9021 label1
= gen_label_rtx ();
9022 emit_label (label1
);
9024 label2
= gen_label_rtx ();
9026 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
9028 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
9029 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9030 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9031 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
9032 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9034 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
9038 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
9039 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9040 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
9041 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9045 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9046 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
9047 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
9050 emit_label (label2
);
9053 /* Split an atomic operation. */
9056 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
9057 rtx value
, rtx model_rtx
, rtx cond
)
9059 machine_mode mode
= GET_MODE (mem
);
9060 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
9061 rtx_code_label
*label
;
9064 label
= gen_label_rtx ();
9068 new_out
= gen_lowpart (wmode
, new_out
);
9070 old_out
= gen_lowpart (wmode
, old_out
);
9073 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
9075 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
9084 x
= gen_rtx_AND (wmode
, old_out
, value
);
9085 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9086 x
= gen_rtx_NOT (wmode
, new_out
);
9087 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9091 if (CONST_INT_P (value
))
9093 value
= GEN_INT (-INTVAL (value
));
9099 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
9100 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9104 aarch64_emit_store_exclusive (mode
, cond
, mem
,
9105 gen_lowpart (mode
, new_out
), model_rtx
);
9107 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9108 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9109 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
9110 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9114 aarch64_print_extension (void)
9116 const struct aarch64_option_extension
*opt
= NULL
;
9118 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
9119 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
9120 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
9122 asm_fprintf (asm_out_file
, "\n");
9126 aarch64_start_file (void)
9130 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
9131 aarch64_print_extension ();
9133 else if (selected_cpu
)
9135 const char *truncated_name
9136 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
9137 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
9138 aarch64_print_extension ();
9140 default_file_start();
9143 /* Target hook for c_mode_for_suffix. */
9145 aarch64_c_mode_for_suffix (char suffix
)
9153 /* We can only represent floating point constants which will fit in
9154 "quarter-precision" values. These values are characterised by
9155 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9158 (-1)^s * (n/16) * 2^r
9161 's' is the sign bit.
9162 'n' is an integer in the range 16 <= n <= 31.
9163 'r' is an integer in the range -3 <= r <= 4. */
9165 /* Return true iff X can be represented by a quarter-precision
9166 floating point immediate operand X. Note, we cannot represent 0.0. */
9168 aarch64_float_const_representable_p (rtx x
)
9170 /* This represents our current view of how many bits
9171 make up the mantissa. */
9172 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
9174 unsigned HOST_WIDE_INT mantissa
, mask
;
9175 REAL_VALUE_TYPE r
, m
;
9178 if (!CONST_DOUBLE_P (x
))
9181 if (GET_MODE (x
) == VOIDmode
)
9184 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9186 /* We cannot represent infinities, NaNs or +/-zero. We won't
9187 know if we have +zero until we analyse the mantissa, but we
9188 can reject the other invalid values. */
9189 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
9190 || REAL_VALUE_MINUS_ZERO (r
))
9193 /* Extract exponent. */
9194 r
= real_value_abs (&r
);
9195 exponent
= REAL_EXP (&r
);
9197 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9198 highest (sign) bit, with a fixed binary point at bit point_pos.
9199 m1 holds the low part of the mantissa, m2 the high part.
9200 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9201 bits for the mantissa, this can fail (low bits will be lost). */
9202 real_ldexp (&m
, &r
, point_pos
- exponent
);
9203 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
9205 /* If the low part of the mantissa has bits set we cannot represent
9209 /* We have rejected the lower HOST_WIDE_INT, so update our
9210 understanding of how many bits lie in the mantissa and
9211 look only at the high HOST_WIDE_INT. */
9212 mantissa
= w
.elt (1);
9213 point_pos
-= HOST_BITS_PER_WIDE_INT
;
9215 /* We can only represent values with a mantissa of the form 1.xxxx. */
9216 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
9217 if ((mantissa
& mask
) != 0)
9220 /* Having filtered unrepresentable values, we may now remove all
9221 but the highest 5 bits. */
9222 mantissa
>>= point_pos
- 5;
9224 /* We cannot represent the value 0.0, so reject it. This is handled
9229 /* Then, as bit 4 is always set, we can mask it off, leaving
9230 the mantissa in the range [0, 15]. */
9231 mantissa
&= ~(1 << 4);
9232 gcc_assert (mantissa
<= 15);
9234 /* GCC internally does not use IEEE754-like encoding (where normalized
9235 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9236 Our mantissa values are shifted 4 places to the left relative to
9237 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9238 by 5 places to correct for GCC's representation. */
9239 exponent
= 5 - exponent
;
9241 return (exponent
>= 0 && exponent
<= 7);
9245 aarch64_output_simd_mov_immediate (rtx const_vector
,
9250 static char templ
[40];
9251 const char *mnemonic
;
9252 const char *shift_op
;
9253 unsigned int lane_count
= 0;
9256 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
9258 /* This will return true to show const_vector is legal for use as either
9259 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9260 also update INFO to show how the immediate should be generated. */
9261 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
9262 gcc_assert (is_valid
);
9264 element_char
= sizetochar (info
.element_width
);
9265 lane_count
= width
/ info
.element_width
;
9267 mode
= GET_MODE_INNER (mode
);
9268 if (mode
== SFmode
|| mode
== DFmode
)
9270 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
9271 if (aarch64_float_const_zero_rtx_p (info
.value
))
9272 info
.value
= GEN_INT (0);
9277 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
9278 char float_buf
[buf_size
] = {'\0'};
9279 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
9282 if (lane_count
== 1)
9283 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
9285 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
9286 lane_count
, element_char
, float_buf
);
9291 mnemonic
= info
.mvn
? "mvni" : "movi";
9292 shift_op
= info
.msl
? "msl" : "lsl";
9294 if (lane_count
== 1)
9295 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
9296 mnemonic
, UINTVAL (info
.value
));
9297 else if (info
.shift
)
9298 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9299 ", %s %d", mnemonic
, lane_count
, element_char
,
9300 UINTVAL (info
.value
), shift_op
, info
.shift
);
9302 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
9303 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
9308 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
9313 gcc_assert (!VECTOR_MODE_P (mode
));
9314 vmode
= aarch64_simd_container_mode (mode
, 64);
9315 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
9316 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
9319 /* Split operands into moves from op[1] + op[2] into op[0]. */
9322 aarch64_split_combinev16qi (rtx operands
[3])
9324 unsigned int dest
= REGNO (operands
[0]);
9325 unsigned int src1
= REGNO (operands
[1]);
9326 unsigned int src2
= REGNO (operands
[2]);
9327 machine_mode halfmode
= GET_MODE (operands
[1]);
9328 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
9331 gcc_assert (halfmode
== V16QImode
);
9333 if (src1
== dest
&& src2
== dest
+ halfregs
)
9335 /* No-op move. Can't split to nothing; emit something. */
9336 emit_note (NOTE_INSN_DELETED
);
9340 /* Preserve register attributes for variable tracking. */
9341 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
9342 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
9343 GET_MODE_SIZE (halfmode
));
9345 /* Special case of reversed high/low parts. */
9346 if (reg_overlap_mentioned_p (operands
[2], destlo
)
9347 && reg_overlap_mentioned_p (operands
[1], desthi
))
9349 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9350 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
9351 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9353 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
9355 /* Try to avoid unnecessary moves if part of the result
9356 is in the right place already. */
9358 emit_move_insn (destlo
, operands
[1]);
9359 if (src2
!= dest
+ halfregs
)
9360 emit_move_insn (desthi
, operands
[2]);
9364 if (src2
!= dest
+ halfregs
)
9365 emit_move_insn (desthi
, operands
[2]);
9367 emit_move_insn (destlo
, operands
[1]);
9371 /* vec_perm support. */
9373 #define MAX_VECT_LEN 16
9375 struct expand_vec_perm_d
9377 rtx target
, op0
, op1
;
9378 unsigned char perm
[MAX_VECT_LEN
];
9385 /* Generate a variable permutation. */
9388 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9390 machine_mode vmode
= GET_MODE (target
);
9391 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9393 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
9394 gcc_checking_assert (GET_MODE (op0
) == vmode
);
9395 gcc_checking_assert (GET_MODE (op1
) == vmode
);
9396 gcc_checking_assert (GET_MODE (sel
) == vmode
);
9397 gcc_checking_assert (TARGET_SIMD
);
9401 if (vmode
== V8QImode
)
9403 /* Expand the argument to a V16QI mode by duplicating it. */
9404 rtx pair
= gen_reg_rtx (V16QImode
);
9405 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
9406 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9410 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
9417 if (vmode
== V8QImode
)
9419 pair
= gen_reg_rtx (V16QImode
);
9420 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
9421 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9425 pair
= gen_reg_rtx (OImode
);
9426 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
9427 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
9433 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9435 machine_mode vmode
= GET_MODE (target
);
9436 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
9437 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9440 /* The TBL instruction does not use a modulo index, so we must take care
9441 of that ourselves. */
9442 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
9443 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9444 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
9446 /* For big-endian, we also need to reverse the index within the vector
9447 (but not which vector). */
9448 if (BYTES_BIG_ENDIAN
)
9450 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9452 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
9453 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
9454 NULL
, 0, OPTAB_LIB_WIDEN
);
9456 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
9459 /* Recognize patterns suitable for the TRN instructions. */
9461 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
9463 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9464 rtx out
, in0
, in1
, x
;
9465 rtx (*gen
) (rtx
, rtx
, rtx
);
9466 machine_mode vmode
= d
->vmode
;
9468 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9471 /* Note that these are little-endian tests.
9472 We correct for big-endian later. */
9473 if (d
->perm
[0] == 0)
9475 else if (d
->perm
[0] == 1)
9479 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9481 for (i
= 0; i
< nelt
; i
+= 2)
9483 if (d
->perm
[i
] != i
+ odd
)
9485 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
9495 if (BYTES_BIG_ENDIAN
)
9497 x
= in0
, in0
= in1
, in1
= x
;
9506 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
9507 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
9508 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
9509 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
9510 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
9511 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
9512 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
9513 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
9514 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
9515 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
9524 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
9525 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
9526 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
9527 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
9528 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
9529 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
9530 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
9531 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
9532 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
9533 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
9539 emit_insn (gen (out
, in0
, in1
));
9543 /* Recognize patterns suitable for the UZP instructions. */
9545 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
9547 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9548 rtx out
, in0
, in1
, x
;
9549 rtx (*gen
) (rtx
, rtx
, rtx
);
9550 machine_mode vmode
= d
->vmode
;
9552 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9555 /* Note that these are little-endian tests.
9556 We correct for big-endian later. */
9557 if (d
->perm
[0] == 0)
9559 else if (d
->perm
[0] == 1)
9563 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9565 for (i
= 0; i
< nelt
; i
++)
9567 unsigned elt
= (i
* 2 + odd
) & mask
;
9568 if (d
->perm
[i
] != elt
)
9578 if (BYTES_BIG_ENDIAN
)
9580 x
= in0
, in0
= in1
, in1
= x
;
9589 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
9590 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
9591 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
9592 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
9593 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
9594 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
9595 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
9596 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
9597 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
9598 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
9607 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
9608 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
9609 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
9610 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
9611 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
9612 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
9613 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
9614 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
9615 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
9616 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
9622 emit_insn (gen (out
, in0
, in1
));
9626 /* Recognize patterns suitable for the ZIP instructions. */
9628 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
9630 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
9631 rtx out
, in0
, in1
, x
;
9632 rtx (*gen
) (rtx
, rtx
, rtx
);
9633 machine_mode vmode
= d
->vmode
;
9635 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9638 /* Note that these are little-endian tests.
9639 We correct for big-endian later. */
9641 if (d
->perm
[0] == high
)
9644 else if (d
->perm
[0] == 0)
9648 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9650 for (i
= 0; i
< nelt
/ 2; i
++)
9652 unsigned elt
= (i
+ high
) & mask
;
9653 if (d
->perm
[i
* 2] != elt
)
9655 elt
= (elt
+ nelt
) & mask
;
9656 if (d
->perm
[i
* 2 + 1] != elt
)
9666 if (BYTES_BIG_ENDIAN
)
9668 x
= in0
, in0
= in1
, in1
= x
;
9677 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
9678 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
9679 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
9680 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
9681 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
9682 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
9683 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
9684 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
9685 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
9686 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
9695 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9696 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9697 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9698 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9699 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9700 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9701 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9702 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9703 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9704 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9710 emit_insn (gen (out
, in0
, in1
));
9714 /* Recognize patterns for the EXT insn. */
9717 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9719 unsigned int i
, nelt
= d
->nelt
;
9720 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9723 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9725 /* Check if the extracted indices are increasing by one. */
9726 for (i
= 1; i
< nelt
; i
++)
9728 unsigned int required
= location
+ i
;
9729 if (d
->one_vector_p
)
9731 /* We'll pass the same vector in twice, so allow indices to wrap. */
9732 required
&= (nelt
- 1);
9734 if (d
->perm
[i
] != required
)
9740 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9741 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9742 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9743 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9744 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9745 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9746 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9747 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9748 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9749 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9758 /* The case where (location == 0) is a no-op for both big- and little-endian,
9759 and is removed by the mid-end at optimization levels -O1 and higher. */
9761 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9763 /* After setup, we want the high elements of the first vector (stored
9764 at the LSB end of the register), and the low elements of the second
9765 vector (stored at the MSB end of the register). So swap. */
9769 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9770 location
= nelt
- location
;
9773 offset
= GEN_INT (location
);
9774 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9778 /* Recognize patterns for the REV insns. */
9781 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
9783 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
9784 rtx (*gen
) (rtx
, rtx
);
9786 if (!d
->one_vector_p
)
9795 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
9796 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
9804 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
9805 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
9806 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
9807 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
9815 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
9816 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
9817 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
9818 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
9819 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
9820 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
9821 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
9822 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
9831 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
9832 for (j
= 0; j
<= diff
; j
+= 1)
9834 /* This is guaranteed to be true as the value of diff
9835 is 7, 3, 1 and we should have enough elements in the
9836 queue to generate this. Getting a vector mask with a
9837 value of diff other than these values implies that
9838 something is wrong by the time we get here. */
9839 gcc_assert (i
+ j
< nelt
);
9840 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
9848 emit_insn (gen (d
->target
, d
->op0
));
9853 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
9855 rtx (*gen
) (rtx
, rtx
, rtx
);
9856 rtx out
= d
->target
;
9858 machine_mode vmode
= d
->vmode
;
9859 unsigned int i
, elt
, nelt
= d
->nelt
;
9863 for (i
= 1; i
< nelt
; i
++)
9865 if (elt
!= d
->perm
[i
])
9869 /* The generic preparation in aarch64_expand_vec_perm_const_1
9870 swaps the operand order and the permute indices if it finds
9871 d->perm[0] to be in the second operand. Thus, we can always
9872 use d->op0 and need not do any extra arithmetic to get the
9873 correct lane number. */
9875 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
9879 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
9880 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
9881 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
9882 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
9883 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
9884 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
9885 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
9886 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
9887 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
9888 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
9893 emit_insn (gen (out
, in0
, lane
));
9898 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
9900 rtx rperm
[MAX_VECT_LEN
], sel
;
9901 machine_mode vmode
= d
->vmode
;
9902 unsigned int i
, nelt
= d
->nelt
;
9907 /* Generic code will try constant permutation twice. Once with the
9908 original mode and again with the elements lowered to QImode.
9909 So wait and don't do the selector expansion ourselves. */
9910 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
9913 for (i
= 0; i
< nelt
; ++i
)
9915 int nunits
= GET_MODE_NUNITS (vmode
);
9917 /* If big-endian and two vectors we end up with a weird mixed-endian
9918 mode on NEON. Reverse the index within each word but not the word
9920 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
9923 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
9924 sel
= force_reg (vmode
, sel
);
9926 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
9931 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
9933 /* The pattern matching functions above are written to look for a small
9934 number to begin the sequence (0, 1, N/2). If we begin with an index
9935 from the second operand, we can swap the operands. */
9936 if (d
->perm
[0] >= d
->nelt
)
9938 unsigned i
, nelt
= d
->nelt
;
9941 gcc_assert (nelt
== (nelt
& -nelt
));
9942 for (i
= 0; i
< nelt
; ++i
)
9943 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
9952 if (aarch64_evpc_rev (d
))
9954 else if (aarch64_evpc_ext (d
))
9956 else if (aarch64_evpc_dup (d
))
9958 else if (aarch64_evpc_zip (d
))
9960 else if (aarch64_evpc_uzp (d
))
9962 else if (aarch64_evpc_trn (d
))
9964 return aarch64_evpc_tbl (d
);
9969 /* Expand a vec_perm_const pattern. */
9972 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9974 struct expand_vec_perm_d d
;
9981 d
.vmode
= GET_MODE (target
);
9982 gcc_assert (VECTOR_MODE_P (d
.vmode
));
9983 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9984 d
.testing_p
= false;
9986 for (i
= which
= 0; i
< nelt
; ++i
)
9988 rtx e
= XVECEXP (sel
, 0, i
);
9989 int ei
= INTVAL (e
) & (2 * nelt
- 1);
9990 which
|= (ei
< nelt
? 1 : 2);
10000 d
.one_vector_p
= false;
10001 if (!rtx_equal_p (op0
, op1
))
10004 /* The elements of PERM do not suggest that only the first operand
10005 is used, but both operands are identical. Allow easier matching
10006 of the permutation by folding the permutation into the single
10008 /* Fall Through. */
10010 for (i
= 0; i
< nelt
; ++i
)
10011 d
.perm
[i
] &= nelt
- 1;
10013 d
.one_vector_p
= true;
10018 d
.one_vector_p
= true;
10022 return aarch64_expand_vec_perm_const_1 (&d
);
10026 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
10027 const unsigned char *sel
)
10029 struct expand_vec_perm_d d
;
10030 unsigned int i
, nelt
, which
;
10034 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10035 d
.testing_p
= true;
10036 memcpy (d
.perm
, sel
, nelt
);
10038 /* Calculate whether all elements are in one vector. */
10039 for (i
= which
= 0; i
< nelt
; ++i
)
10041 unsigned char e
= d
.perm
[i
];
10042 gcc_assert (e
< 2 * nelt
);
10043 which
|= (e
< nelt
? 1 : 2);
10046 /* If all elements are from the second vector, reindex as if from the
10049 for (i
= 0; i
< nelt
; ++i
)
10052 /* Check whether the mask can be applied to a single vector. */
10053 d
.one_vector_p
= (which
!= 3);
10055 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
10056 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
10057 if (!d
.one_vector_p
)
10058 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
10061 ret
= aarch64_expand_vec_perm_const_1 (&d
);
10067 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
10069 aarch64_cannot_change_mode_class (machine_mode from
,
10071 enum reg_class rclass
)
10073 /* Full-reg subregs are allowed on general regs or any class if they are
10075 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
10076 || !reg_classes_intersect_p (FP_REGS
, rclass
))
10079 /* Limited combinations of subregs are safe on FPREGs. Particularly,
10080 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10081 2. Scalar to Scalar for integer modes or same size float modes.
10082 3. Vector to Vector modes.
10083 4. On little-endian only, Vector-Structure to Vector modes. */
10084 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
10086 if (aarch64_vector_mode_supported_p (from
)
10087 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
10090 if (GET_MODE_NUNITS (from
) == 1
10091 && GET_MODE_NUNITS (to
) == 1
10092 && (GET_MODE_CLASS (from
) == MODE_INT
10096 if (aarch64_vector_mode_supported_p (from
)
10097 && aarch64_vector_mode_supported_p (to
))
10100 /* Within an vector structure straddling multiple vector registers
10101 we are in a mixed-endian representation. As such, we can't
10102 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
10103 switch between vectors and vector structures cheaply. */
10104 if (!BYTES_BIG_ENDIAN
)
10105 if ((aarch64_vector_mode_supported_p (from
)
10106 && aarch64_vect_struct_mode_p (to
))
10107 || (aarch64_vector_mode_supported_p (to
)
10108 && aarch64_vect_struct_mode_p (from
)))
10115 /* Implement MODES_TIEABLE_P. */
10118 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10120 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
10123 /* We specifically want to allow elements of "structure" modes to
10124 be tieable to the structure. This more general condition allows
10125 other rarer situations too. */
10127 && aarch64_vector_mode_p (mode1
)
10128 && aarch64_vector_mode_p (mode2
))
10134 /* Return a new RTX holding the result of moving POINTER forward by
10138 aarch64_move_pointer (rtx pointer
, int amount
)
10140 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
10142 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
10146 /* Return a new RTX holding the result of moving POINTER forward by the
10147 size of the mode it points to. */
10150 aarch64_progress_pointer (rtx pointer
)
10152 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
10154 return aarch64_move_pointer (pointer
, amount
);
10157 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10161 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
10164 rtx reg
= gen_reg_rtx (mode
);
10166 /* "Cast" the pointers to the correct mode. */
10167 *src
= adjust_address (*src
, mode
, 0);
10168 *dst
= adjust_address (*dst
, mode
, 0);
10169 /* Emit the memcpy. */
10170 emit_move_insn (reg
, *src
);
10171 emit_move_insn (*dst
, reg
);
10172 /* Move the pointers forward. */
10173 *src
= aarch64_progress_pointer (*src
);
10174 *dst
= aarch64_progress_pointer (*dst
);
10177 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10178 we succeed, otherwise return false. */
10181 aarch64_expand_movmem (rtx
*operands
)
10184 rtx dst
= operands
[0];
10185 rtx src
= operands
[1];
10187 bool speed_p
= !optimize_function_for_size_p (cfun
);
10189 /* When optimizing for size, give a better estimate of the length of a
10190 memcpy call, but use the default otherwise. */
10191 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
10193 /* We can't do anything smart if the amount to copy is not constant. */
10194 if (!CONST_INT_P (operands
[2]))
10197 n
= UINTVAL (operands
[2]);
10199 /* Try to keep the number of instructions low. For cases below 16 bytes we
10200 need to make at most two moves. For cases above 16 bytes it will be one
10201 move for each 16 byte chunk, then at most two additional moves. */
10202 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
10205 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10206 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
10208 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10209 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
10211 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10217 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10222 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10227 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10228 4-byte chunk, partially overlapping with the previously copied chunk. */
10231 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10237 src
= aarch64_move_pointer (src
, move
);
10238 dst
= aarch64_move_pointer (dst
, move
);
10239 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10244 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10245 them, then (if applicable) an 8-byte chunk. */
10250 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
10255 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10260 /* Finish the final bytes of the copy. We can always do this in one
10261 instruction. We either copy the exact amount we need, or partially
10262 overlap with the previous chunk we copied and copy 8-bytes. */
10266 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10268 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10270 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10275 src
= aarch64_move_pointer (src
, -1);
10276 dst
= aarch64_move_pointer (dst
, -1);
10277 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10283 src
= aarch64_move_pointer (src
, move
);
10284 dst
= aarch64_move_pointer (dst
, move
);
10285 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10292 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10294 static unsigned HOST_WIDE_INT
10295 aarch64_asan_shadow_offset (void)
10297 return (HOST_WIDE_INT_1
<< 36);
10301 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
10302 unsigned int align
,
10303 enum by_pieces_operation op
,
10306 /* STORE_BY_PIECES can be used when copying a constant string, but
10307 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10308 For now we always fail this and let the move_by_pieces code copy
10309 the string from read-only memory. */
10310 if (op
== STORE_BY_PIECES
)
10313 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
10316 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10317 instruction fusion of some sort. */
10320 aarch64_macro_fusion_p (void)
10322 return aarch64_tune_params
->fuseable_ops
!= AARCH64_FUSE_NOTHING
;
10326 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10327 should be kept together during scheduling. */
10330 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
10333 rtx prev_set
= single_set (prev
);
10334 rtx curr_set
= single_set (curr
);
10335 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10336 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
10338 if (!aarch64_macro_fusion_p ())
10342 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOV_MOVK
))
10344 /* We are trying to match:
10345 prev (mov) == (set (reg r0) (const_int imm16))
10346 curr (movk) == (set (zero_extract (reg r0)
10349 (const_int imm16_1)) */
10351 set_dest
= SET_DEST (curr_set
);
10353 if (GET_CODE (set_dest
) == ZERO_EXTRACT
10354 && CONST_INT_P (SET_SRC (curr_set
))
10355 && CONST_INT_P (SET_SRC (prev_set
))
10356 && CONST_INT_P (XEXP (set_dest
, 2))
10357 && INTVAL (XEXP (set_dest
, 2)) == 16
10358 && REG_P (XEXP (set_dest
, 0))
10359 && REG_P (SET_DEST (prev_set
))
10360 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
10367 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_ADD
))
10370 /* We're trying to match:
10371 prev (adrp) == (set (reg r1)
10372 (high (symbol_ref ("SYM"))))
10373 curr (add) == (set (reg r0)
10375 (symbol_ref ("SYM"))))
10376 Note that r0 need not necessarily be the same as r1, especially
10377 during pre-regalloc scheduling. */
10379 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10380 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10382 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
10383 && REG_P (XEXP (SET_SRC (curr_set
), 0))
10384 && REGNO (XEXP (SET_SRC (curr_set
), 0))
10385 == REGNO (SET_DEST (prev_set
))
10386 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
10387 XEXP (SET_SRC (curr_set
), 1)))
10393 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOVK_MOVK
))
10396 /* We're trying to match:
10397 prev (movk) == (set (zero_extract (reg r0)
10400 (const_int imm16_1))
10401 curr (movk) == (set (zero_extract (reg r0)
10404 (const_int imm16_2)) */
10406 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
10407 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
10408 && REG_P (XEXP (SET_DEST (prev_set
), 0))
10409 && REG_P (XEXP (SET_DEST (curr_set
), 0))
10410 && REGNO (XEXP (SET_DEST (prev_set
), 0))
10411 == REGNO (XEXP (SET_DEST (curr_set
), 0))
10412 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
10413 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
10414 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
10415 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
10416 && CONST_INT_P (SET_SRC (prev_set
))
10417 && CONST_INT_P (SET_SRC (curr_set
)))
10422 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_LDR
))
10424 /* We're trying to match:
10425 prev (adrp) == (set (reg r0)
10426 (high (symbol_ref ("SYM"))))
10427 curr (ldr) == (set (reg r1)
10428 (mem (lo_sum (reg r0)
10429 (symbol_ref ("SYM")))))
10431 curr (ldr) == (set (reg r1)
10434 (symbol_ref ("SYM")))))) */
10435 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10436 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10438 rtx curr_src
= SET_SRC (curr_set
);
10440 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
10441 curr_src
= XEXP (curr_src
, 0);
10443 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
10444 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
10445 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
10446 == REGNO (SET_DEST (prev_set
))
10447 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
10448 XEXP (SET_SRC (prev_set
), 0)))
10453 if ((aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_CMP_BRANCH
)
10454 && any_condjump_p (curr
))
10456 enum attr_type prev_type
= get_attr_type (prev
);
10458 /* FIXME: this misses some which is considered simple arthematic
10459 instructions for ThunderX. Simple shifts are missed here. */
10460 if (prev_type
== TYPE_ALUS_SREG
10461 || prev_type
== TYPE_ALUS_IMM
10462 || prev_type
== TYPE_LOGICS_REG
10463 || prev_type
== TYPE_LOGICS_IMM
)
10470 /* If MEM is in the form of [base+offset], extract the two parts
10471 of address and set to BASE and OFFSET, otherwise return false
10472 after clearing BASE and OFFSET. */
10475 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
10479 gcc_assert (MEM_P (mem
));
10481 addr
= XEXP (mem
, 0);
10486 *offset
= const0_rtx
;
10490 if (GET_CODE (addr
) == PLUS
10491 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
10493 *base
= XEXP (addr
, 0);
10494 *offset
= XEXP (addr
, 1);
10499 *offset
= NULL_RTX
;
10504 /* Types for scheduling fusion. */
10505 enum sched_fusion_type
10507 SCHED_FUSION_NONE
= 0,
10508 SCHED_FUSION_LD_SIGN_EXTEND
,
10509 SCHED_FUSION_LD_ZERO_EXTEND
,
10515 /* If INSN is a load or store of address in the form of [base+offset],
10516 extract the two parts and set to BASE and OFFSET. Return scheduling
10517 fusion type this INSN is. */
10519 static enum sched_fusion_type
10520 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
10523 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
10525 gcc_assert (INSN_P (insn
));
10526 x
= PATTERN (insn
);
10527 if (GET_CODE (x
) != SET
)
10528 return SCHED_FUSION_NONE
;
10531 dest
= SET_DEST (x
);
10533 if (GET_MODE (dest
) != SImode
&& GET_MODE (dest
) != DImode
10534 && GET_MODE (dest
) != SFmode
&& GET_MODE (dest
) != DFmode
)
10535 return SCHED_FUSION_NONE
;
10537 if (GET_CODE (src
) == SIGN_EXTEND
)
10539 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
10540 src
= XEXP (src
, 0);
10541 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10542 return SCHED_FUSION_NONE
;
10544 else if (GET_CODE (src
) == ZERO_EXTEND
)
10546 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
10547 src
= XEXP (src
, 0);
10548 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10549 return SCHED_FUSION_NONE
;
10552 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
10553 extract_base_offset_in_addr (src
, base
, offset
);
10554 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
10556 fusion
= SCHED_FUSION_ST
;
10557 extract_base_offset_in_addr (dest
, base
, offset
);
10560 return SCHED_FUSION_NONE
;
10562 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
10563 fusion
= SCHED_FUSION_NONE
;
10568 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10570 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10571 and PRI are only calculated for these instructions. For other instruction,
10572 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10573 type instruction fusion can be added by returning different priorities.
10575 It's important that irrelevant instructions get the largest FUSION_PRI. */
10578 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
10579 int *fusion_pri
, int *pri
)
10583 enum sched_fusion_type fusion
;
10585 gcc_assert (INSN_P (insn
));
10588 fusion
= fusion_load_store (insn
, &base
, &offset
);
10589 if (fusion
== SCHED_FUSION_NONE
)
10596 /* Set FUSION_PRI according to fusion type and base register. */
10597 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
10599 /* Calculate PRI. */
10602 /* INSN with smaller offset goes first. */
10603 off_val
= (int)(INTVAL (offset
));
10605 tmp
-= (off_val
& 0xfffff);
10607 tmp
+= ((- off_val
) & 0xfffff);
10613 /* Given OPERANDS of consecutive load/store, check if we can merge
10614 them into ldp/stp. LOAD is true if they are load instructions.
10615 MODE is the mode of memory operands. */
10618 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
10619 enum machine_mode mode
)
10621 HOST_WIDE_INT offval_1
, offval_2
, msize
;
10622 enum reg_class rclass_1
, rclass_2
;
10623 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
10627 mem_1
= operands
[1];
10628 mem_2
= operands
[3];
10629 reg_1
= operands
[0];
10630 reg_2
= operands
[2];
10631 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
10632 if (REGNO (reg_1
) == REGNO (reg_2
))
10637 mem_1
= operands
[0];
10638 mem_2
= operands
[2];
10639 reg_1
= operands
[1];
10640 reg_2
= operands
[3];
10643 /* The mems cannot be volatile. */
10644 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
10647 /* Check if the addresses are in the form of [base+offset]. */
10648 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
10649 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
10651 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
10652 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
10655 /* Check if the bases are same. */
10656 if (!rtx_equal_p (base_1
, base_2
))
10659 offval_1
= INTVAL (offset_1
);
10660 offval_2
= INTVAL (offset_2
);
10661 msize
= GET_MODE_SIZE (mode
);
10662 /* Check if the offsets are consecutive. */
10663 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
10666 /* Check if the addresses are clobbered by load. */
10669 if (reg_mentioned_p (reg_1
, mem_1
))
10672 /* In increasing order, the last load can clobber the address. */
10673 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
10677 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
10678 rclass_1
= FP_REGS
;
10680 rclass_1
= GENERAL_REGS
;
10682 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
10683 rclass_2
= FP_REGS
;
10685 rclass_2
= GENERAL_REGS
;
10687 /* Check if the registers are of same class. */
10688 if (rclass_1
!= rclass_2
)
10694 /* Given OPERANDS of consecutive load/store, check if we can merge
10695 them into ldp/stp by adjusting the offset. LOAD is true if they
10696 are load instructions. MODE is the mode of memory operands.
10698 Given below consecutive stores:
10700 str w1, [xb, 0x100]
10701 str w1, [xb, 0x104]
10702 str w1, [xb, 0x108]
10703 str w1, [xb, 0x10c]
10705 Though the offsets are out of the range supported by stp, we can
10706 still pair them after adjusting the offset, like:
10708 add scratch, xb, 0x100
10709 stp w1, w1, [scratch]
10710 stp w1, w1, [scratch, 0x8]
10712 The peephole patterns detecting this opportunity should guarantee
10713 the scratch register is avaliable. */
10716 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
10717 enum machine_mode mode
)
10719 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
10720 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
10721 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
10722 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
10726 reg_1
= operands
[0];
10727 mem_1
= operands
[1];
10728 reg_2
= operands
[2];
10729 mem_2
= operands
[3];
10730 reg_3
= operands
[4];
10731 mem_3
= operands
[5];
10732 reg_4
= operands
[6];
10733 mem_4
= operands
[7];
10734 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
10735 && REG_P (reg_3
) && REG_P (reg_4
));
10736 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
10741 mem_1
= operands
[0];
10742 reg_1
= operands
[1];
10743 mem_2
= operands
[2];
10744 reg_2
= operands
[3];
10745 mem_3
= operands
[4];
10746 reg_3
= operands
[5];
10747 mem_4
= operands
[6];
10748 reg_4
= operands
[7];
10750 /* Skip if memory operand is by itslef valid for ldp/stp. */
10751 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
10754 /* The mems cannot be volatile. */
10755 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
10756 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
10759 /* Check if the addresses are in the form of [base+offset]. */
10760 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
10761 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
10763 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
10764 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
10766 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
10767 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
10769 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
10770 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
10773 /* Check if the bases are same. */
10774 if (!rtx_equal_p (base_1
, base_2
)
10775 || !rtx_equal_p (base_2
, base_3
)
10776 || !rtx_equal_p (base_3
, base_4
))
10779 offval_1
= INTVAL (offset_1
);
10780 offval_2
= INTVAL (offset_2
);
10781 offval_3
= INTVAL (offset_3
);
10782 offval_4
= INTVAL (offset_4
);
10783 msize
= GET_MODE_SIZE (mode
);
10784 /* Check if the offsets are consecutive. */
10785 if ((offval_1
!= (offval_2
+ msize
)
10786 || offval_1
!= (offval_3
+ msize
* 2)
10787 || offval_1
!= (offval_4
+ msize
* 3))
10788 && (offval_4
!= (offval_3
+ msize
)
10789 || offval_4
!= (offval_2
+ msize
* 2)
10790 || offval_4
!= (offval_1
+ msize
* 3)))
10793 /* Check if the addresses are clobbered by load. */
10796 if (reg_mentioned_p (reg_1
, mem_1
)
10797 || reg_mentioned_p (reg_2
, mem_2
)
10798 || reg_mentioned_p (reg_3
, mem_3
))
10801 /* In increasing order, the last load can clobber the address. */
10802 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
10806 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
10807 rclass_1
= FP_REGS
;
10809 rclass_1
= GENERAL_REGS
;
10811 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
10812 rclass_2
= FP_REGS
;
10814 rclass_2
= GENERAL_REGS
;
10816 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
10817 rclass_3
= FP_REGS
;
10819 rclass_3
= GENERAL_REGS
;
10821 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
10822 rclass_4
= FP_REGS
;
10824 rclass_4
= GENERAL_REGS
;
10826 /* Check if the registers are of same class. */
10827 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
10833 /* Given OPERANDS of consecutive load/store, this function pairs them
10834 into ldp/stp after adjusting the offset. It depends on the fact
10835 that addresses of load/store instructions are in increasing order.
10836 MODE is the mode of memory operands. CODE is the rtl operator
10837 which should be applied to all memory operands, it's SIGN_EXTEND,
10838 ZERO_EXTEND or UNKNOWN. */
10841 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
10842 enum machine_mode mode
, RTX_CODE code
)
10844 rtx base
, offset
, t1
, t2
;
10845 rtx mem_1
, mem_2
, mem_3
, mem_4
;
10846 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
10850 mem_1
= operands
[1];
10851 mem_2
= operands
[3];
10852 mem_3
= operands
[5];
10853 mem_4
= operands
[7];
10857 mem_1
= operands
[0];
10858 mem_2
= operands
[2];
10859 mem_3
= operands
[4];
10860 mem_4
= operands
[6];
10861 gcc_assert (code
== UNKNOWN
);
10864 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
10865 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
10867 /* Adjust offset thus it can fit in ldp/stp instruction. */
10868 msize
= GET_MODE_SIZE (mode
);
10869 stp_off_limit
= msize
* 0x40;
10870 off_val
= INTVAL (offset
);
10871 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
10872 new_off
= abs_off
% stp_off_limit
;
10873 adj_off
= abs_off
- new_off
;
10875 /* Further adjust to make sure all offsets are OK. */
10876 if ((new_off
+ msize
* 2) >= stp_off_limit
)
10878 adj_off
+= stp_off_limit
;
10879 new_off
-= stp_off_limit
;
10882 /* Make sure the adjustment can be done with ADD/SUB instructions. */
10883 if (adj_off
>= 0x1000)
10888 adj_off
= -adj_off
;
10889 new_off
= -new_off
;
10892 /* Create new memory references. */
10893 mem_1
= change_address (mem_1
, VOIDmode
,
10894 plus_constant (DImode
, operands
[8], new_off
));
10896 /* Check if the adjusted address is OK for ldp/stp. */
10897 if (!aarch64_mem_pair_operand (mem_1
, mode
))
10900 msize
= GET_MODE_SIZE (mode
);
10901 mem_2
= change_address (mem_2
, VOIDmode
,
10902 plus_constant (DImode
,
10905 mem_3
= change_address (mem_3
, VOIDmode
,
10906 plus_constant (DImode
,
10908 new_off
+ msize
* 2));
10909 mem_4
= change_address (mem_4
, VOIDmode
,
10910 plus_constant (DImode
,
10912 new_off
+ msize
* 3));
10914 if (code
== ZERO_EXTEND
)
10916 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
10917 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
10918 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
10919 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
10921 else if (code
== SIGN_EXTEND
)
10923 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
10924 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
10925 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
10926 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
10931 operands
[1] = mem_1
;
10932 operands
[3] = mem_2
;
10933 operands
[5] = mem_3
;
10934 operands
[7] = mem_4
;
10938 operands
[0] = mem_1
;
10939 operands
[2] = mem_2
;
10940 operands
[4] = mem_3
;
10941 operands
[6] = mem_4
;
10944 /* Emit adjusting instruction. */
10945 emit_insn (gen_rtx_SET (VOIDmode
, operands
[8],
10946 plus_constant (DImode
, base
, adj_off
)));
10947 /* Emit ldp/stp instructions. */
10948 t1
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
10949 t2
= gen_rtx_SET (VOIDmode
, operands
[2], operands
[3]);
10950 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
10951 t1
= gen_rtx_SET (VOIDmode
, operands
[4], operands
[5]);
10952 t2
= gen_rtx_SET (VOIDmode
, operands
[6], operands
[7]);
10953 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
10957 #undef TARGET_ADDRESS_COST
10958 #define TARGET_ADDRESS_COST aarch64_address_cost
10960 /* This hook will determines whether unnamed bitfields affect the alignment
10961 of the containing structure. The hook returns true if the structure
10962 should inherit the alignment requirements of an unnamed bitfield's
10964 #undef TARGET_ALIGN_ANON_BITFIELD
10965 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10967 #undef TARGET_ASM_ALIGNED_DI_OP
10968 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10970 #undef TARGET_ASM_ALIGNED_HI_OP
10971 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10973 #undef TARGET_ASM_ALIGNED_SI_OP
10974 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10976 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10977 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10978 hook_bool_const_tree_hwi_hwi_const_tree_true
10980 #undef TARGET_ASM_FILE_START
10981 #define TARGET_ASM_FILE_START aarch64_start_file
10983 #undef TARGET_ASM_OUTPUT_MI_THUNK
10984 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10986 #undef TARGET_ASM_SELECT_RTX_SECTION
10987 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10989 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10990 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10992 #undef TARGET_BUILD_BUILTIN_VA_LIST
10993 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10995 #undef TARGET_CALLEE_COPIES
10996 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10998 #undef TARGET_CAN_ELIMINATE
10999 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11001 #undef TARGET_CANNOT_FORCE_CONST_MEM
11002 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11004 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11005 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11007 /* Only the least significant bit is used for initialization guard
11009 #undef TARGET_CXX_GUARD_MASK_BIT
11010 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11012 #undef TARGET_C_MODE_FOR_SUFFIX
11013 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11015 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11016 #undef TARGET_DEFAULT_TARGET_FLAGS
11017 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11020 #undef TARGET_CLASS_MAX_NREGS
11021 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11023 #undef TARGET_BUILTIN_DECL
11024 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11026 #undef TARGET_EXPAND_BUILTIN
11027 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11029 #undef TARGET_EXPAND_BUILTIN_VA_START
11030 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11032 #undef TARGET_FOLD_BUILTIN
11033 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11035 #undef TARGET_FUNCTION_ARG
11036 #define TARGET_FUNCTION_ARG aarch64_function_arg
11038 #undef TARGET_FUNCTION_ARG_ADVANCE
11039 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11041 #undef TARGET_FUNCTION_ARG_BOUNDARY
11042 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11044 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11045 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11047 #undef TARGET_FUNCTION_VALUE
11048 #define TARGET_FUNCTION_VALUE aarch64_function_value
11050 #undef TARGET_FUNCTION_VALUE_REGNO_P
11051 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11053 #undef TARGET_FRAME_POINTER_REQUIRED
11054 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11056 #undef TARGET_GIMPLE_FOLD_BUILTIN
11057 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11059 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11060 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11062 #undef TARGET_INIT_BUILTINS
11063 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11065 #undef TARGET_LEGITIMATE_ADDRESS_P
11066 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11068 #undef TARGET_LEGITIMATE_CONSTANT_P
11069 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11071 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11072 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11074 #undef TARGET_LRA_P
11075 #define TARGET_LRA_P aarch64_lra_p
11077 #undef TARGET_MANGLE_TYPE
11078 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11080 #undef TARGET_MEMORY_MOVE_COST
11081 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11083 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11084 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11086 #undef TARGET_MUST_PASS_IN_STACK
11087 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11089 /* This target hook should return true if accesses to volatile bitfields
11090 should use the narrowest mode possible. It should return false if these
11091 accesses should use the bitfield container type. */
11092 #undef TARGET_NARROW_VOLATILE_BITFIELD
11093 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11095 #undef TARGET_OPTION_OVERRIDE
11096 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11098 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11099 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11100 aarch64_override_options_after_change
11102 #undef TARGET_PASS_BY_REFERENCE
11103 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11105 #undef TARGET_PREFERRED_RELOAD_CLASS
11106 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11108 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11109 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11111 #undef TARGET_SECONDARY_RELOAD
11112 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11114 #undef TARGET_SHIFT_TRUNCATION_MASK
11115 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11117 #undef TARGET_SETUP_INCOMING_VARARGS
11118 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11120 #undef TARGET_STRUCT_VALUE_RTX
11121 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11123 #undef TARGET_REGISTER_MOVE_COST
11124 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11126 #undef TARGET_RETURN_IN_MEMORY
11127 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11129 #undef TARGET_RETURN_IN_MSB
11130 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11132 #undef TARGET_RTX_COSTS
11133 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11135 #undef TARGET_SCHED_ISSUE_RATE
11136 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11138 #undef TARGET_TRAMPOLINE_INIT
11139 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11141 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11142 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11144 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11145 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11147 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11148 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11150 #undef TARGET_VECTORIZE_ADD_STMT_COST
11151 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11155 aarch64_builtin_vectorization_cost
11157 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11158 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11160 #undef TARGET_VECTORIZE_BUILTINS
11161 #define TARGET_VECTORIZE_BUILTINS
11163 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11164 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11165 aarch64_builtin_vectorized_function
11167 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11168 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11169 aarch64_autovectorize_vector_sizes
11171 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11172 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11173 aarch64_atomic_assign_expand_fenv
11175 /* Section anchor support. */
11177 #undef TARGET_MIN_ANCHOR_OFFSET
11178 #define TARGET_MIN_ANCHOR_OFFSET -256
11180 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11181 byte offset; we can do much more for larger data types, but have no way
11182 to determine the size of the access. We assume accesses are aligned. */
11183 #undef TARGET_MAX_ANCHOR_OFFSET
11184 #define TARGET_MAX_ANCHOR_OFFSET 4095
11186 #undef TARGET_VECTOR_ALIGNMENT
11187 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11189 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11190 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11191 aarch64_simd_vector_alignment_reachable
11193 /* vec_perm support. */
11195 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11196 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11197 aarch64_vectorize_vec_perm_const_ok
11200 #undef TARGET_FIXED_CONDITION_CODE_REGS
11201 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11203 #undef TARGET_FLAGS_REGNUM
11204 #define TARGET_FLAGS_REGNUM CC_REGNUM
11206 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11207 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11209 #undef TARGET_ASAN_SHADOW_OFFSET
11210 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11212 #undef TARGET_LEGITIMIZE_ADDRESS
11213 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11215 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11216 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11217 aarch64_use_by_pieces_infrastructure_p
11219 #undef TARGET_CAN_USE_DOLOOP_P
11220 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11222 #undef TARGET_SCHED_MACRO_FUSION_P
11223 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11225 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11226 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11228 #undef TARGET_SCHED_FUSION_PRIORITY
11229 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11231 struct gcc_target targetm
= TARGET_INITIALIZER
;
11233 #include "gt-aarch64.h"