1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
22 #define INCLUDE_STRING
24 #include "coretypes.h"
35 #include "stringpool.h"
40 #include "diagnostic.h"
41 #include "insn-attr.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
52 #include "langhooks.h"
57 #include "gimple-iterator.h"
58 #include "tree-vectorizer.h"
59 #include "aarch64-cost-tables.h"
63 #include "tm-constrs.h"
64 #include "sched-int.h"
65 #include "target-globals.h"
66 #include "common/common-target.h"
68 #include "selftest-rtl.h"
70 /* This file should be included last. */
71 #include "target-def.h"
73 /* Defined for convenience. */
74 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
76 /* Classifies an address.
79 A simple base register plus immediate offset.
82 A base register indexed by immediate offset with writeback.
85 A base register indexed by (optionally scaled) register.
88 A base register indexed by (optionally scaled) zero-extended register.
91 A base register indexed by (optionally scaled) sign-extended register.
94 A LO_SUM rtx with a base register and "LO12" symbol relocation.
97 A constant symbolic address, in pc-relative literal pool. */
99 enum aarch64_address_type
{
109 struct aarch64_address_info
{
110 enum aarch64_address_type type
;
114 enum aarch64_symbol_type symbol_type
;
117 struct simd_immediate_info
126 /* The current code model. */
127 enum aarch64_code_model aarch64_cmodel
;
130 #undef TARGET_HAVE_TLS
131 #define TARGET_HAVE_TLS 1
134 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
135 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
137 machine_mode
*, int *,
139 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
140 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
141 static void aarch64_override_options_after_change (void);
142 static bool aarch64_vector_mode_supported_p (machine_mode
);
143 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
144 const unsigned char *sel
);
145 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
146 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode
,
151 /* Major revision number of the ARM Architecture implemented by the target. */
152 unsigned aarch64_architecture_version
;
154 /* The processor for which instructions should be scheduled. */
155 enum aarch64_processor aarch64_tune
= cortexa53
;
157 /* Mask to specify which instruction scheduling options should be used. */
158 unsigned long aarch64_tune_flags
= 0;
160 /* Global flag for PC relative loads. */
161 bool aarch64_pcrelative_literal_loads
;
163 /* Support for command line parsing of boolean flags in the tuning
165 struct aarch64_flag_desc
171 #define AARCH64_FUSION_PAIR(name, internal_name) \
172 { name, AARCH64_FUSE_##internal_name },
173 static const struct aarch64_flag_desc aarch64_fusible_pairs
[] =
175 { "none", AARCH64_FUSE_NOTHING
},
176 #include "aarch64-fusion-pairs.def"
177 { "all", AARCH64_FUSE_ALL
},
178 { NULL
, AARCH64_FUSE_NOTHING
}
181 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
182 { name, AARCH64_EXTRA_TUNE_##internal_name },
183 static const struct aarch64_flag_desc aarch64_tuning_flags
[] =
185 { "none", AARCH64_EXTRA_TUNE_NONE
},
186 #include "aarch64-tuning-flags.def"
187 { "all", AARCH64_EXTRA_TUNE_ALL
},
188 { NULL
, AARCH64_EXTRA_TUNE_NONE
}
191 /* Tuning parameters. */
193 static const struct cpu_addrcost_table generic_addrcost_table
=
203 0, /* register_offset */
204 0, /* register_sextend */
205 0, /* register_zextend */
209 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
219 0, /* register_offset */
220 0, /* register_sextend */
221 0, /* register_zextend */
225 static const struct cpu_addrcost_table exynosm1_addrcost_table
=
235 1, /* register_offset */
236 1, /* register_sextend */
237 2, /* register_zextend */
241 static const struct cpu_addrcost_table xgene1_addrcost_table
=
251 0, /* register_offset */
252 1, /* register_sextend */
253 1, /* register_zextend */
257 static const struct cpu_addrcost_table qdf24xx_addrcost_table
=
267 0, /* register_offset */
268 0, /* register_sextend */
269 0, /* register_zextend */
273 static const struct cpu_addrcost_table thunderx2t99_addrcost_table
=
283 2, /* register_offset */
284 3, /* register_sextend */
285 3, /* register_zextend */
289 static const struct cpu_regmove_cost generic_regmove_cost
=
292 /* Avoid the use of slow int<->fp moves for spilling by setting
293 their cost higher than memmov_cost. */
299 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
302 /* Avoid the use of slow int<->fp moves for spilling by setting
303 their cost higher than memmov_cost. */
309 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
312 /* Avoid the use of slow int<->fp moves for spilling by setting
313 their cost higher than memmov_cost. */
319 static const struct cpu_regmove_cost exynosm1_regmove_cost
=
322 /* Avoid the use of slow int<->fp moves for spilling by setting
323 their cost higher than memmov_cost (actual, 4 and 9). */
329 static const struct cpu_regmove_cost thunderx_regmove_cost
=
337 static const struct cpu_regmove_cost xgene1_regmove_cost
=
340 /* Avoid the use of slow int<->fp moves for spilling by setting
341 their cost higher than memmov_cost. */
347 static const struct cpu_regmove_cost qdf24xx_regmove_cost
=
350 /* Avoid the use of int<->fp moves for spilling. */
356 static const struct cpu_regmove_cost thunderx2t99_regmove_cost
=
359 /* Avoid the use of int<->fp moves for spilling. */
365 /* Generic costs for vector insn classes. */
366 static const struct cpu_vector_cost generic_vector_cost
=
368 1, /* scalar_int_stmt_cost */
369 1, /* scalar_fp_stmt_cost */
370 1, /* scalar_load_cost */
371 1, /* scalar_store_cost */
372 1, /* vec_int_stmt_cost */
373 1, /* vec_fp_stmt_cost */
374 2, /* vec_permute_cost */
375 1, /* vec_to_scalar_cost */
376 1, /* scalar_to_vec_cost */
377 1, /* vec_align_load_cost */
378 1, /* vec_unalign_load_cost */
379 1, /* vec_unalign_store_cost */
380 1, /* vec_store_cost */
381 3, /* cond_taken_branch_cost */
382 1 /* cond_not_taken_branch_cost */
385 /* ThunderX costs for vector insn classes. */
386 static const struct cpu_vector_cost thunderx_vector_cost
=
388 1, /* scalar_int_stmt_cost */
389 1, /* scalar_fp_stmt_cost */
390 3, /* scalar_load_cost */
391 1, /* scalar_store_cost */
392 4, /* vec_int_stmt_cost */
393 4, /* vec_fp_stmt_cost */
394 4, /* vec_permute_cost */
395 2, /* vec_to_scalar_cost */
396 2, /* scalar_to_vec_cost */
397 3, /* vec_align_load_cost */
398 10, /* vec_unalign_load_cost */
399 10, /* vec_unalign_store_cost */
400 1, /* vec_store_cost */
401 3, /* cond_taken_branch_cost */
402 3 /* cond_not_taken_branch_cost */
405 /* Generic costs for vector insn classes. */
406 static const struct cpu_vector_cost cortexa57_vector_cost
=
408 1, /* scalar_int_stmt_cost */
409 1, /* scalar_fp_stmt_cost */
410 4, /* scalar_load_cost */
411 1, /* scalar_store_cost */
412 2, /* vec_int_stmt_cost */
413 2, /* vec_fp_stmt_cost */
414 3, /* vec_permute_cost */
415 8, /* vec_to_scalar_cost */
416 8, /* scalar_to_vec_cost */
417 4, /* vec_align_load_cost */
418 4, /* vec_unalign_load_cost */
419 1, /* vec_unalign_store_cost */
420 1, /* vec_store_cost */
421 1, /* cond_taken_branch_cost */
422 1 /* cond_not_taken_branch_cost */
425 static const struct cpu_vector_cost exynosm1_vector_cost
=
427 1, /* scalar_int_stmt_cost */
428 1, /* scalar_fp_stmt_cost */
429 5, /* scalar_load_cost */
430 1, /* scalar_store_cost */
431 3, /* vec_int_stmt_cost */
432 3, /* vec_fp_stmt_cost */
433 3, /* vec_permute_cost */
434 3, /* vec_to_scalar_cost */
435 3, /* scalar_to_vec_cost */
436 5, /* vec_align_load_cost */
437 5, /* vec_unalign_load_cost */
438 1, /* vec_unalign_store_cost */
439 1, /* vec_store_cost */
440 1, /* cond_taken_branch_cost */
441 1 /* cond_not_taken_branch_cost */
444 /* Generic costs for vector insn classes. */
445 static const struct cpu_vector_cost xgene1_vector_cost
=
447 1, /* scalar_int_stmt_cost */
448 1, /* scalar_fp_stmt_cost */
449 5, /* scalar_load_cost */
450 1, /* scalar_store_cost */
451 2, /* vec_int_stmt_cost */
452 2, /* vec_fp_stmt_cost */
453 2, /* vec_permute_cost */
454 4, /* vec_to_scalar_cost */
455 4, /* scalar_to_vec_cost */
456 10, /* vec_align_load_cost */
457 10, /* vec_unalign_load_cost */
458 2, /* vec_unalign_store_cost */
459 2, /* vec_store_cost */
460 2, /* cond_taken_branch_cost */
461 1 /* cond_not_taken_branch_cost */
464 /* Costs for vector insn classes for Vulcan. */
465 static const struct cpu_vector_cost thunderx2t99_vector_cost
=
467 1, /* scalar_int_stmt_cost */
468 6, /* scalar_fp_stmt_cost */
469 4, /* scalar_load_cost */
470 1, /* scalar_store_cost */
471 5, /* vec_int_stmt_cost */
472 6, /* vec_fp_stmt_cost */
473 3, /* vec_permute_cost */
474 6, /* vec_to_scalar_cost */
475 5, /* scalar_to_vec_cost */
476 8, /* vec_align_load_cost */
477 8, /* vec_unalign_load_cost */
478 4, /* vec_unalign_store_cost */
479 4, /* vec_store_cost */
480 2, /* cond_taken_branch_cost */
481 1 /* cond_not_taken_branch_cost */
484 /* Generic costs for branch instructions. */
485 static const struct cpu_branch_cost generic_branch_cost
=
487 1, /* Predictable. */
488 3 /* Unpredictable. */
491 /* Branch costs for Cortex-A57. */
492 static const struct cpu_branch_cost cortexa57_branch_cost
=
494 1, /* Predictable. */
495 3 /* Unpredictable. */
498 /* Branch costs for Vulcan. */
499 static const struct cpu_branch_cost thunderx2t99_branch_cost
=
501 1, /* Predictable. */
502 3 /* Unpredictable. */
505 /* Generic approximation modes. */
506 static const cpu_approx_modes generic_approx_modes
=
508 AARCH64_APPROX_NONE
, /* division */
509 AARCH64_APPROX_NONE
, /* sqrt */
510 AARCH64_APPROX_NONE
/* recip_sqrt */
513 /* Approximation modes for Exynos M1. */
514 static const cpu_approx_modes exynosm1_approx_modes
=
516 AARCH64_APPROX_NONE
, /* division */
517 AARCH64_APPROX_ALL
, /* sqrt */
518 AARCH64_APPROX_ALL
/* recip_sqrt */
521 /* Approximation modes for X-Gene 1. */
522 static const cpu_approx_modes xgene1_approx_modes
=
524 AARCH64_APPROX_NONE
, /* division */
525 AARCH64_APPROX_NONE
, /* sqrt */
526 AARCH64_APPROX_ALL
/* recip_sqrt */
529 /* Generic prefetch settings (which disable prefetch). */
530 static const cpu_prefetch_tune generic_prefetch_tune
=
533 -1, /* l1_cache_size */
534 -1, /* l1_cache_line_size */
535 -1, /* l2_cache_size */
536 -1 /* default_opt_level */
539 static const cpu_prefetch_tune exynosm1_prefetch_tune
=
542 -1, /* l1_cache_size */
543 64, /* l1_cache_line_size */
544 -1, /* l2_cache_size */
545 -1 /* default_opt_level */
548 static const cpu_prefetch_tune qdf24xx_prefetch_tune
=
551 32, /* l1_cache_size */
552 64, /* l1_cache_line_size */
553 1024, /* l2_cache_size */
554 3 /* default_opt_level */
557 static const cpu_prefetch_tune thunderx2t99_prefetch_tune
=
560 -1, /* l1_cache_size */
561 64, /* l1_cache_line_size */
562 -1, /* l2_cache_size */
563 -1 /* default_opt_level */
566 static const struct tune_params generic_tunings
=
568 &cortexa57_extra_costs
,
569 &generic_addrcost_table
,
570 &generic_regmove_cost
,
571 &generic_vector_cost
,
572 &generic_branch_cost
,
573 &generic_approx_modes
,
576 (AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
577 8, /* function_align. */
580 2, /* int_reassoc_width. */
581 4, /* fp_reassoc_width. */
582 1, /* vec_reassoc_width. */
583 2, /* min_div_recip_mul_sf. */
584 2, /* min_div_recip_mul_df. */
585 0, /* max_case_values. */
586 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
587 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
588 &generic_prefetch_tune
591 static const struct tune_params cortexa35_tunings
=
593 &cortexa53_extra_costs
,
594 &generic_addrcost_table
,
595 &cortexa53_regmove_cost
,
596 &generic_vector_cost
,
597 &cortexa57_branch_cost
,
598 &generic_approx_modes
,
601 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
602 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
603 16, /* function_align. */
606 2, /* int_reassoc_width. */
607 4, /* fp_reassoc_width. */
608 1, /* vec_reassoc_width. */
609 2, /* min_div_recip_mul_sf. */
610 2, /* min_div_recip_mul_df. */
611 0, /* max_case_values. */
612 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
613 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
614 &generic_prefetch_tune
617 static const struct tune_params cortexa53_tunings
=
619 &cortexa53_extra_costs
,
620 &generic_addrcost_table
,
621 &cortexa53_regmove_cost
,
622 &generic_vector_cost
,
623 &cortexa57_branch_cost
,
624 &generic_approx_modes
,
627 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
628 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
629 16, /* function_align. */
632 2, /* int_reassoc_width. */
633 4, /* fp_reassoc_width. */
634 1, /* vec_reassoc_width. */
635 2, /* min_div_recip_mul_sf. */
636 2, /* min_div_recip_mul_df. */
637 0, /* max_case_values. */
638 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
639 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
640 &generic_prefetch_tune
643 static const struct tune_params cortexa57_tunings
=
645 &cortexa57_extra_costs
,
646 &cortexa57_addrcost_table
,
647 &cortexa57_regmove_cost
,
648 &cortexa57_vector_cost
,
649 &cortexa57_branch_cost
,
650 &generic_approx_modes
,
653 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
654 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
655 16, /* function_align. */
658 2, /* int_reassoc_width. */
659 4, /* fp_reassoc_width. */
660 1, /* vec_reassoc_width. */
661 2, /* min_div_recip_mul_sf. */
662 2, /* min_div_recip_mul_df. */
663 0, /* max_case_values. */
664 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
665 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS
), /* tune_flags. */
666 &generic_prefetch_tune
669 static const struct tune_params cortexa72_tunings
=
671 &cortexa57_extra_costs
,
672 &cortexa57_addrcost_table
,
673 &cortexa57_regmove_cost
,
674 &cortexa57_vector_cost
,
675 &cortexa57_branch_cost
,
676 &generic_approx_modes
,
679 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
680 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
681 16, /* function_align. */
684 2, /* int_reassoc_width. */
685 4, /* fp_reassoc_width. */
686 1, /* vec_reassoc_width. */
687 2, /* min_div_recip_mul_sf. */
688 2, /* min_div_recip_mul_df. */
689 0, /* max_case_values. */
690 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
691 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
692 &generic_prefetch_tune
695 static const struct tune_params cortexa73_tunings
=
697 &cortexa57_extra_costs
,
698 &cortexa57_addrcost_table
,
699 &cortexa57_regmove_cost
,
700 &cortexa57_vector_cost
,
701 &cortexa57_branch_cost
,
702 &generic_approx_modes
,
703 4, /* memmov_cost. */
705 (AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
706 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
707 16, /* function_align. */
710 2, /* int_reassoc_width. */
711 4, /* fp_reassoc_width. */
712 1, /* vec_reassoc_width. */
713 2, /* min_div_recip_mul_sf. */
714 2, /* min_div_recip_mul_df. */
715 0, /* max_case_values. */
716 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
717 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
718 &generic_prefetch_tune
723 static const struct tune_params exynosm1_tunings
=
725 &exynosm1_extra_costs
,
726 &exynosm1_addrcost_table
,
727 &exynosm1_regmove_cost
,
728 &exynosm1_vector_cost
,
729 &generic_branch_cost
,
730 &exynosm1_approx_modes
,
733 (AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
734 4, /* function_align. */
737 2, /* int_reassoc_width. */
738 4, /* fp_reassoc_width. */
739 1, /* vec_reassoc_width. */
740 2, /* min_div_recip_mul_sf. */
741 2, /* min_div_recip_mul_df. */
742 48, /* max_case_values. */
743 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
744 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
745 &exynosm1_prefetch_tune
748 static const struct tune_params thunderx_tunings
=
750 &thunderx_extra_costs
,
751 &generic_addrcost_table
,
752 &thunderx_regmove_cost
,
753 &thunderx_vector_cost
,
754 &generic_branch_cost
,
755 &generic_approx_modes
,
758 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
759 8, /* function_align. */
762 2, /* int_reassoc_width. */
763 4, /* fp_reassoc_width. */
764 1, /* vec_reassoc_width. */
765 2, /* min_div_recip_mul_sf. */
766 2, /* min_div_recip_mul_df. */
767 0, /* max_case_values. */
768 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
769 (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
), /* tune_flags. */
770 &generic_prefetch_tune
773 static const struct tune_params xgene1_tunings
=
776 &xgene1_addrcost_table
,
777 &xgene1_regmove_cost
,
779 &generic_branch_cost
,
780 &xgene1_approx_modes
,
783 AARCH64_FUSE_NOTHING
, /* fusible_ops */
784 16, /* function_align. */
786 16, /* loop_align. */
787 2, /* int_reassoc_width. */
788 4, /* fp_reassoc_width. */
789 1, /* vec_reassoc_width. */
790 2, /* min_div_recip_mul_sf. */
791 2, /* min_div_recip_mul_df. */
792 0, /* max_case_values. */
793 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
794 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
795 &generic_prefetch_tune
798 static const struct tune_params qdf24xx_tunings
=
800 &qdf24xx_extra_costs
,
801 &qdf24xx_addrcost_table
,
802 &qdf24xx_regmove_cost
,
803 &generic_vector_cost
,
804 &generic_branch_cost
,
805 &generic_approx_modes
,
808 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
809 | AARCH64_FUSE_MOVK_MOVK
), /* fuseable_ops */
810 16, /* function_align. */
812 16, /* loop_align. */
813 2, /* int_reassoc_width. */
814 4, /* fp_reassoc_width. */
815 1, /* vec_reassoc_width. */
816 2, /* min_div_recip_mul_sf. */
817 2, /* min_div_recip_mul_df. */
818 0, /* max_case_values. */
819 tune_params::AUTOPREFETCHER_STRONG
, /* autoprefetcher_model. */
820 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
821 &qdf24xx_prefetch_tune
824 static const struct tune_params thunderx2t99_tunings
=
826 &thunderx2t99_extra_costs
,
827 &thunderx2t99_addrcost_table
,
828 &thunderx2t99_regmove_cost
,
829 &thunderx2t99_vector_cost
,
830 &thunderx2t99_branch_cost
,
831 &generic_approx_modes
,
832 4, /* memmov_cost. */
834 (AARCH64_FUSE_CMP_BRANCH
| AARCH64_FUSE_AES_AESMC
), /* fusible_ops */
835 16, /* function_align. */
837 16, /* loop_align. */
838 3, /* int_reassoc_width. */
839 2, /* fp_reassoc_width. */
840 2, /* vec_reassoc_width. */
841 2, /* min_div_recip_mul_sf. */
842 2, /* min_div_recip_mul_df. */
843 0, /* max_case_values. */
844 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
845 (AARCH64_EXTRA_TUNE_NONE
), /* tune_flags. */
846 &thunderx2t99_prefetch_tune
849 /* Support for fine-grained override of the tuning structures. */
850 struct aarch64_tuning_override_function
853 void (*parse_override
)(const char*, struct tune_params
*);
856 static void aarch64_parse_fuse_string (const char*, struct tune_params
*);
857 static void aarch64_parse_tune_string (const char*, struct tune_params
*);
859 static const struct aarch64_tuning_override_function
860 aarch64_tuning_override_functions
[] =
862 { "fuse", aarch64_parse_fuse_string
},
863 { "tune", aarch64_parse_tune_string
},
867 /* A processor implementing AArch64. */
870 const char *const name
;
871 enum aarch64_processor ident
;
872 enum aarch64_processor sched_core
;
873 enum aarch64_arch arch
;
874 unsigned architecture_version
;
875 const unsigned long flags
;
876 const struct tune_params
*const tune
;
879 /* Architectures implementing AArch64. */
880 static const struct processor all_architectures
[] =
882 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
883 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
884 #include "aarch64-arches.def"
885 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
888 /* Processor cores implementing AArch64. */
889 static const struct processor all_cores
[] =
891 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
892 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
893 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
894 FLAGS, &COSTS##_tunings},
895 #include "aarch64-cores.def"
896 {"generic", generic
, cortexa53
, AARCH64_ARCH_8A
, 8,
897 AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
898 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
902 /* Target specification. These are populated by the -march, -mtune, -mcpu
903 handling code or by target attributes. */
904 static const struct processor
*selected_arch
;
905 static const struct processor
*selected_cpu
;
906 static const struct processor
*selected_tune
;
908 /* The current tuning set. */
909 struct tune_params aarch64_tune_params
= generic_tunings
;
911 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
913 /* An ISA extension in the co-processor and main instruction set space. */
914 struct aarch64_option_extension
916 const char *const name
;
917 const unsigned long flags_on
;
918 const unsigned long flags_off
;
921 typedef enum aarch64_cond_code
923 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
924 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
925 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
929 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
931 /* The condition codes of the processor, and the inverse function. */
932 static const char * const aarch64_condition_codes
[] =
934 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
935 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
938 /* Generate code to enable conditional branches in functions over 1 MiB. */
940 aarch64_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
941 const char * branch_format
)
943 rtx_code_label
* tmp_label
= gen_label_rtx ();
946 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
,
947 CODE_LABEL_NUMBER (tmp_label
));
948 const char *label_ptr
= targetm
.strip_name_encoding (label_buf
);
949 rtx dest_label
= operands
[pos_label
];
950 operands
[pos_label
] = tmp_label
;
952 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
953 output_asm_insn (buffer
, operands
);
955 snprintf (buffer
, sizeof (buffer
), "b\t%%l%d\n%s:", pos_label
, label_ptr
);
956 operands
[pos_label
] = dest_label
;
957 output_asm_insn (buffer
, operands
);
962 aarch64_err_no_fpadvsimd (machine_mode mode
, const char *msg
)
964 const char *mc
= FLOAT_MODE_P (mode
) ? "floating-point" : "vector";
965 if (TARGET_GENERAL_REGS_ONLY
)
966 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc
, msg
);
968 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc
, msg
);
971 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
972 The register allocator chooses ALL_REGS if FP_REGS and GENERAL_REGS have
973 the same cost even if ALL_REGS has a much larger cost. ALL_REGS is also
974 used if the cost of both FP_REGS and GENERAL_REGS is lower than the memory
975 cost (in this case the best class is the lowest cost one). Using ALL_REGS
976 irrespectively of its cost results in bad allocations with many redundant
977 int<->FP moves which are expensive on various cores.
978 To avoid this we don't allow ALL_REGS as the allocno class, but force a
979 decision between FP_REGS and GENERAL_REGS. We use the allocno class if it
980 isn't ALL_REGS. Similarly, use the best class if it isn't ALL_REGS.
981 Otherwise set the allocno class depending on the mode.
982 The result of this is that it is no longer inefficient to have a higher
983 memory move cost than the register move cost.
987 aarch64_ira_change_pseudo_allocno_class (int regno
, reg_class_t allocno_class
,
988 reg_class_t best_class
)
990 enum machine_mode mode
;
992 if (allocno_class
!= ALL_REGS
)
993 return allocno_class
;
995 if (best_class
!= ALL_REGS
)
998 mode
= PSEUDO_REGNO_MODE (regno
);
999 return FLOAT_MODE_P (mode
) || VECTOR_MODE_P (mode
) ? FP_REGS
: GENERAL_REGS
;
1003 aarch64_min_divisions_for_recip_mul (enum machine_mode mode
)
1005 if (GET_MODE_UNIT_SIZE (mode
) == 4)
1006 return aarch64_tune_params
.min_div_recip_mul_sf
;
1007 return aarch64_tune_params
.min_div_recip_mul_df
;
1011 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
1012 enum machine_mode mode
)
1014 if (VECTOR_MODE_P (mode
))
1015 return aarch64_tune_params
.vec_reassoc_width
;
1016 if (INTEGRAL_MODE_P (mode
))
1017 return aarch64_tune_params
.int_reassoc_width
;
1018 if (FLOAT_MODE_P (mode
))
1019 return aarch64_tune_params
.fp_reassoc_width
;
1023 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
1025 aarch64_dbx_register_number (unsigned regno
)
1027 if (GP_REGNUM_P (regno
))
1028 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
1029 else if (regno
== SP_REGNUM
)
1030 return AARCH64_DWARF_SP
;
1031 else if (FP_REGNUM_P (regno
))
1032 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
1034 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
1035 equivalent DWARF register. */
1036 return DWARF_FRAME_REGISTERS
;
1039 /* Return TRUE if MODE is any of the large INT modes. */
1041 aarch64_vect_struct_mode_p (machine_mode mode
)
1043 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
1046 /* Return TRUE if MODE is any of the vector modes. */
1048 aarch64_vector_mode_p (machine_mode mode
)
1050 return aarch64_vector_mode_supported_p (mode
)
1051 || aarch64_vect_struct_mode_p (mode
);
1054 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
1056 aarch64_array_mode_supported_p (machine_mode mode
,
1057 unsigned HOST_WIDE_INT nelems
)
1060 && (AARCH64_VALID_SIMD_QREG_MODE (mode
)
1061 || AARCH64_VALID_SIMD_DREG_MODE (mode
))
1062 && (nelems
>= 2 && nelems
<= 4))
1068 /* Implement HARD_REGNO_NREGS. */
1071 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
1073 switch (aarch64_regno_regclass (regno
))
1077 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
1079 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1084 /* Implement HARD_REGNO_MODE_OK. */
1087 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
1089 if (GET_MODE_CLASS (mode
) == MODE_CC
)
1090 return regno
== CC_REGNUM
;
1092 if (regno
== SP_REGNUM
)
1093 /* The purpose of comparing with ptr_mode is to support the
1094 global register variable associated with the stack pointer
1095 register via the syntax of asm ("wsp") in ILP32. */
1096 return mode
== Pmode
|| mode
== ptr_mode
;
1098 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
1099 return mode
== Pmode
;
1101 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
1104 if (FP_REGNUM_P (regno
))
1106 if (aarch64_vect_struct_mode_p (mode
))
1108 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
1116 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
1118 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
1121 /* Handle modes that fit within single registers. */
1122 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
1124 if (GET_MODE_SIZE (mode
) >= 4)
1129 /* Fall back to generic for multi-reg and very large modes. */
1131 return choose_hard_reg_mode (regno
, nregs
, false);
1134 /* Return true if calls to DECL should be treated as
1135 long-calls (ie called via a register). */
1137 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
1142 /* Return true if calls to symbol-ref SYM should be treated as
1143 long-calls (ie called via a register). */
1145 aarch64_is_long_call_p (rtx sym
)
1147 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
1150 /* Return true if calls to symbol-ref SYM should not go through
1154 aarch64_is_noplt_call_p (rtx sym
)
1156 const_tree decl
= SYMBOL_REF_DECL (sym
);
1161 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl
)))
1162 && !targetm
.binds_local_p (decl
))
1168 /* Return true if the offsets to a zero/sign-extract operation
1169 represent an expression that matches an extend operation. The
1170 operands represent the paramters from
1172 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
1174 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
1177 HOST_WIDE_INT mult_val
, extract_val
;
1179 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
1182 mult_val
= INTVAL (mult_imm
);
1183 extract_val
= INTVAL (extract_imm
);
1186 && extract_val
< GET_MODE_BITSIZE (mode
)
1187 && exact_log2 (extract_val
& ~7) > 0
1188 && (extract_val
& 7) <= 4
1189 && mult_val
== (1 << (extract_val
& 7)))
1195 /* Emit an insn that's a simple single-set. Both the operands must be
1196 known to be valid. */
1197 inline static rtx_insn
*
1198 emit_set_insn (rtx x
, rtx y
)
1200 return emit_insn (gen_rtx_SET (x
, y
));
1203 /* X and Y are two things to compare using CODE. Emit the compare insn and
1204 return the rtx for register 0 in the proper mode. */
1206 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
1208 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
1209 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
1211 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
1215 /* Build the SYMBOL_REF for __tls_get_addr. */
1217 static GTY(()) rtx tls_get_addr_libfunc
;
1220 aarch64_tls_get_addr (void)
1222 if (!tls_get_addr_libfunc
)
1223 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
1224 return tls_get_addr_libfunc
;
1227 /* Return the TLS model to use for ADDR. */
1229 static enum tls_model
1230 tls_symbolic_operand_type (rtx addr
)
1232 enum tls_model tls_kind
= TLS_MODEL_NONE
;
1235 if (GET_CODE (addr
) == CONST
)
1237 split_const (addr
, &sym
, &addend
);
1238 if (GET_CODE (sym
) == SYMBOL_REF
)
1239 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
1241 else if (GET_CODE (addr
) == SYMBOL_REF
)
1242 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
1247 /* We'll allow lo_sum's in addresses in our legitimate addresses
1248 so that combine would take care of combining addresses where
1249 necessary, but for generation purposes, we'll generate the address
1252 tmp = hi (symbol_ref); adrp x1, foo
1253 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
1257 adrp x1, :got:foo adrp tmp, :tlsgd:foo
1258 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
1262 Load TLS symbol, depending on TLS mechanism and TLS access model.
1264 Global Dynamic - Traditional TLS:
1265 adrp tmp, :tlsgd:imm
1266 add dest, tmp, #:tlsgd_lo12:imm
1269 Global Dynamic - TLS Descriptors:
1270 adrp dest, :tlsdesc:imm
1271 ldr tmp, [dest, #:tlsdesc_lo12:imm]
1272 add dest, dest, #:tlsdesc_lo12:imm
1279 adrp tmp, :gottprel:imm
1280 ldr dest, [tmp, #:gottprel_lo12:imm]
1285 add t0, tp, #:tprel_hi12:imm, lsl #12
1286 add t0, t0, #:tprel_lo12_nc:imm
1290 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
1291 enum aarch64_symbol_type type
)
1295 case SYMBOL_SMALL_ABSOLUTE
:
1297 /* In ILP32, the mode of dest can be either SImode or DImode. */
1299 machine_mode mode
= GET_MODE (dest
);
1301 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1303 if (can_create_pseudo_p ())
1304 tmp_reg
= gen_reg_rtx (mode
);
1306 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1307 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
1311 case SYMBOL_TINY_ABSOLUTE
:
1312 emit_insn (gen_rtx_SET (dest
, imm
));
1315 case SYMBOL_SMALL_GOT_28K
:
1317 machine_mode mode
= GET_MODE (dest
);
1318 rtx gp_rtx
= pic_offset_table_rtx
;
1322 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
1323 here before rtl expand. Tree IVOPT will generate rtl pattern to
1324 decide rtx costs, in which case pic_offset_table_rtx is not
1325 initialized. For that case no need to generate the first adrp
1326 instruction as the final cost for global variable access is
1330 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
1331 using the page base as GOT base, the first page may be wasted,
1332 in the worst scenario, there is only 28K space for GOT).
1334 The generate instruction sequence for accessing global variable
1337 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
1339 Only one instruction needed. But we must initialize
1340 pic_offset_table_rtx properly. We generate initialize insn for
1341 every global access, and allow CSE to remove all redundant.
1343 The final instruction sequences will look like the following
1344 for multiply global variables access.
1346 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
1348 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
1349 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
1350 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
1353 rtx s
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1354 crtl
->uses_pic_offset_table
= 1;
1355 emit_move_insn (gp_rtx
, gen_rtx_HIGH (Pmode
, s
));
1357 if (mode
!= GET_MODE (gp_rtx
))
1358 gp_rtx
= gen_lowpart (mode
, gp_rtx
);
1362 if (mode
== ptr_mode
)
1365 insn
= gen_ldr_got_small_28k_di (dest
, gp_rtx
, imm
);
1367 insn
= gen_ldr_got_small_28k_si (dest
, gp_rtx
, imm
);
1369 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1373 gcc_assert (mode
== Pmode
);
1375 insn
= gen_ldr_got_small_28k_sidi (dest
, gp_rtx
, imm
);
1376 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1379 /* The operand is expected to be MEM. Whenever the related insn
1380 pattern changed, above code which calculate mem should be
1382 gcc_assert (GET_CODE (mem
) == MEM
);
1383 MEM_READONLY_P (mem
) = 1;
1384 MEM_NOTRAP_P (mem
) = 1;
1389 case SYMBOL_SMALL_GOT_4G
:
1391 /* In ILP32, the mode of dest can be either SImode or DImode,
1392 while the got entry is always of SImode size. The mode of
1393 dest depends on how dest is used: if dest is assigned to a
1394 pointer (e.g. in the memory), it has SImode; it may have
1395 DImode if dest is dereferenced to access the memeory.
1396 This is why we have to handle three different ldr_got_small
1397 patterns here (two patterns for ILP32). */
1402 machine_mode mode
= GET_MODE (dest
);
1404 if (can_create_pseudo_p ())
1405 tmp_reg
= gen_reg_rtx (mode
);
1407 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1408 if (mode
== ptr_mode
)
1411 insn
= gen_ldr_got_small_di (dest
, tmp_reg
, imm
);
1413 insn
= gen_ldr_got_small_si (dest
, tmp_reg
, imm
);
1415 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1419 gcc_assert (mode
== Pmode
);
1421 insn
= gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
);
1422 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1425 gcc_assert (GET_CODE (mem
) == MEM
);
1426 MEM_READONLY_P (mem
) = 1;
1427 MEM_NOTRAP_P (mem
) = 1;
1432 case SYMBOL_SMALL_TLSGD
:
1435 machine_mode mode
= GET_MODE (dest
);
1436 rtx result
= gen_rtx_REG (mode
, R0_REGNUM
);
1440 aarch64_emit_call_insn (gen_tlsgd_small_si (result
, imm
));
1442 aarch64_emit_call_insn (gen_tlsgd_small_di (result
, imm
));
1443 insns
= get_insns ();
1446 RTL_CONST_CALL_P (insns
) = 1;
1447 emit_libcall_block (insns
, dest
, result
, imm
);
1451 case SYMBOL_SMALL_TLSDESC
:
1453 machine_mode mode
= GET_MODE (dest
);
1454 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
1457 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1459 /* In ILP32, the got entry is always of SImode size. Unlike
1460 small GOT, the dest is fixed at reg 0. */
1462 emit_insn (gen_tlsdesc_small_si (imm
));
1464 emit_insn (gen_tlsdesc_small_di (imm
));
1465 tp
= aarch64_load_tp (NULL
);
1468 tp
= gen_lowpart (mode
, tp
);
1470 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
1471 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1475 case SYMBOL_SMALL_TLSIE
:
1477 /* In ILP32, the mode of dest can be either SImode or DImode,
1478 while the got entry is always of SImode size. The mode of
1479 dest depends on how dest is used: if dest is assigned to a
1480 pointer (e.g. in the memory), it has SImode; it may have
1481 DImode if dest is dereferenced to access the memeory.
1482 This is why we have to handle three different tlsie_small
1483 patterns here (two patterns for ILP32). */
1484 machine_mode mode
= GET_MODE (dest
);
1485 rtx tmp_reg
= gen_reg_rtx (mode
);
1486 rtx tp
= aarch64_load_tp (NULL
);
1488 if (mode
== ptr_mode
)
1491 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
1494 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
1495 tp
= gen_lowpart (mode
, tp
);
1500 gcc_assert (mode
== Pmode
);
1501 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
1504 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
1505 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1509 case SYMBOL_TLSLE12
:
1510 case SYMBOL_TLSLE24
:
1511 case SYMBOL_TLSLE32
:
1512 case SYMBOL_TLSLE48
:
1514 machine_mode mode
= GET_MODE (dest
);
1515 rtx tp
= aarch64_load_tp (NULL
);
1518 tp
= gen_lowpart (mode
, tp
);
1522 case SYMBOL_TLSLE12
:
1523 emit_insn ((mode
== DImode
? gen_tlsle12_di
: gen_tlsle12_si
)
1526 case SYMBOL_TLSLE24
:
1527 emit_insn ((mode
== DImode
? gen_tlsle24_di
: gen_tlsle24_si
)
1530 case SYMBOL_TLSLE32
:
1531 emit_insn ((mode
== DImode
? gen_tlsle32_di
: gen_tlsle32_si
)
1533 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1536 case SYMBOL_TLSLE48
:
1537 emit_insn ((mode
== DImode
? gen_tlsle48_di
: gen_tlsle48_si
)
1539 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1546 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1550 case SYMBOL_TINY_GOT
:
1551 emit_insn (gen_ldr_got_tiny (dest
, imm
));
1554 case SYMBOL_TINY_TLSIE
:
1556 machine_mode mode
= GET_MODE (dest
);
1557 rtx tp
= aarch64_load_tp (NULL
);
1559 if (mode
== ptr_mode
)
1562 emit_insn (gen_tlsie_tiny_di (dest
, imm
, tp
));
1565 tp
= gen_lowpart (mode
, tp
);
1566 emit_insn (gen_tlsie_tiny_si (dest
, imm
, tp
));
1571 gcc_assert (mode
== Pmode
);
1572 emit_insn (gen_tlsie_tiny_sidi (dest
, imm
, tp
));
1575 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1584 /* Emit a move from SRC to DEST. Assume that the move expanders can
1585 handle all moves if !can_create_pseudo_p (). The distinction is
1586 important because, unlike emit_move_insn, the move expanders know
1587 how to force Pmode objects into the constant pool even when the
1588 constant pool address is not itself legitimate. */
1590 aarch64_emit_move (rtx dest
, rtx src
)
1592 return (can_create_pseudo_p ()
1593 ? emit_move_insn (dest
, src
)
1594 : emit_move_insn_1 (dest
, src
));
1597 /* Split a 128-bit move operation into two 64-bit move operations,
1598 taking care to handle partial overlap of register to register
1599 copies. Special cases are needed when moving between GP regs and
1600 FP regs. SRC can be a register, constant or memory; DST a register
1601 or memory. If either operand is memory it must not have any side
1604 aarch64_split_128bit_move (rtx dst
, rtx src
)
1609 machine_mode mode
= GET_MODE (dst
);
1611 gcc_assert (mode
== TImode
|| mode
== TFmode
);
1612 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
1613 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
1615 if (REG_P (dst
) && REG_P (src
))
1617 int src_regno
= REGNO (src
);
1618 int dst_regno
= REGNO (dst
);
1620 /* Handle FP <-> GP regs. */
1621 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
1623 src_lo
= gen_lowpart (word_mode
, src
);
1624 src_hi
= gen_highpart (word_mode
, src
);
1628 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
1629 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1633 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1634 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1638 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1640 dst_lo
= gen_lowpart (word_mode
, dst
);
1641 dst_hi
= gen_highpart (word_mode
, dst
);
1645 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1646 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1650 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1651 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1657 dst_lo
= gen_lowpart (word_mode
, dst
);
1658 dst_hi
= gen_highpart (word_mode
, dst
);
1659 src_lo
= gen_lowpart (word_mode
, src
);
1660 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1662 /* At most one pairing may overlap. */
1663 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1665 aarch64_emit_move (dst_hi
, src_hi
);
1666 aarch64_emit_move (dst_lo
, src_lo
);
1670 aarch64_emit_move (dst_lo
, src_lo
);
1671 aarch64_emit_move (dst_hi
, src_hi
);
1676 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1678 return (! REG_P (src
)
1679 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1682 /* Split a complex SIMD combine. */
1685 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1687 machine_mode src_mode
= GET_MODE (src1
);
1688 machine_mode dst_mode
= GET_MODE (dst
);
1690 gcc_assert (VECTOR_MODE_P (dst_mode
));
1692 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1694 rtx (*gen
) (rtx
, rtx
, rtx
);
1699 gen
= gen_aarch64_simd_combinev8qi
;
1702 gen
= gen_aarch64_simd_combinev4hi
;
1705 gen
= gen_aarch64_simd_combinev2si
;
1708 gen
= gen_aarch64_simd_combinev4hf
;
1711 gen
= gen_aarch64_simd_combinev2sf
;
1714 gen
= gen_aarch64_simd_combinedi
;
1717 gen
= gen_aarch64_simd_combinedf
;
1723 emit_insn (gen (dst
, src1
, src2
));
1728 /* Split a complex SIMD move. */
1731 aarch64_split_simd_move (rtx dst
, rtx src
)
1733 machine_mode src_mode
= GET_MODE (src
);
1734 machine_mode dst_mode
= GET_MODE (dst
);
1736 gcc_assert (VECTOR_MODE_P (dst_mode
));
1738 if (REG_P (dst
) && REG_P (src
))
1740 rtx (*gen
) (rtx
, rtx
);
1742 gcc_assert (VECTOR_MODE_P (src_mode
));
1747 gen
= gen_aarch64_split_simd_movv16qi
;
1750 gen
= gen_aarch64_split_simd_movv8hi
;
1753 gen
= gen_aarch64_split_simd_movv4si
;
1756 gen
= gen_aarch64_split_simd_movv2di
;
1759 gen
= gen_aarch64_split_simd_movv8hf
;
1762 gen
= gen_aarch64_split_simd_movv4sf
;
1765 gen
= gen_aarch64_split_simd_movv2df
;
1771 emit_insn (gen (dst
, src
));
1777 aarch64_zero_extend_const_eq (machine_mode xmode
, rtx x
,
1778 machine_mode ymode
, rtx y
)
1780 rtx r
= simplify_const_unary_operation (ZERO_EXTEND
, xmode
, y
, ymode
);
1781 gcc_assert (r
!= NULL
);
1782 return rtx_equal_p (x
, r
);
1787 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1789 if (can_create_pseudo_p ())
1790 return force_reg (mode
, value
);
1793 x
= aarch64_emit_move (x
, value
);
1800 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1802 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1805 /* Load the full offset into a register. This
1806 might be improvable in the future. */
1807 high
= GEN_INT (offset
);
1809 high
= aarch64_force_temporary (mode
, temp
, high
);
1810 reg
= aarch64_force_temporary (mode
, temp
,
1811 gen_rtx_PLUS (mode
, high
, reg
));
1813 return plus_constant (mode
, reg
, offset
);
1817 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1821 unsigned HOST_WIDE_INT val
, val2
, mask
;
1822 int one_match
, zero_match
;
1827 if (aarch64_move_imm (val
, mode
))
1830 emit_insn (gen_rtx_SET (dest
, imm
));
1834 if ((val
>> 32) == 0 || mode
== SImode
)
1838 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
& 0xffff)));
1840 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1841 GEN_INT ((val
>> 16) & 0xffff)));
1843 emit_insn (gen_insv_immdi (dest
, GEN_INT (16),
1844 GEN_INT ((val
>> 16) & 0xffff)));
1849 /* Remaining cases are all for DImode. */
1852 zero_match
= ((val
& mask
) == 0) + ((val
& (mask
<< 16)) == 0) +
1853 ((val
& (mask
<< 32)) == 0) + ((val
& (mask
<< 48)) == 0);
1854 one_match
= ((~val
& mask
) == 0) + ((~val
& (mask
<< 16)) == 0) +
1855 ((~val
& (mask
<< 32)) == 0) + ((~val
& (mask
<< 48)) == 0);
1857 if (zero_match
!= 2 && one_match
!= 2)
1859 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1860 For a 64-bit bitmask try whether changing 16 bits to all ones or
1861 zeroes creates a valid bitmask. To check any repeated bitmask,
1862 try using 16 bits from the other 32-bit half of val. */
1864 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1867 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1870 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1872 val2
= val2
& ~mask
;
1873 val2
= val2
| (((val2
>> 32) | (val2
<< 32)) & mask
);
1874 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1881 emit_insn (gen_rtx_SET (dest
, GEN_INT (val2
)));
1882 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1883 GEN_INT ((val
>> i
) & 0xffff)));
1889 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1890 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1891 otherwise skip zero bits. */
1895 val2
= one_match
> zero_match
? ~val
: val
;
1896 i
= (val2
& mask
) != 0 ? 0 : (val2
& (mask
<< 16)) != 0 ? 16 : 32;
1899 emit_insn (gen_rtx_SET (dest
, GEN_INT (one_match
> zero_match
1900 ? (val
| ~(mask
<< i
))
1901 : (val
& (mask
<< i
)))));
1902 for (i
+= 16; i
< 64; i
+= 16)
1904 if ((val2
& (mask
<< i
)) == 0)
1907 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1908 GEN_INT ((val
>> i
) & 0xffff)));
1917 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1919 machine_mode mode
= GET_MODE (dest
);
1921 gcc_assert (mode
== SImode
|| mode
== DImode
);
1923 /* Check on what type of symbol it is. */
1924 if (GET_CODE (imm
) == SYMBOL_REF
1925 || GET_CODE (imm
) == LABEL_REF
1926 || GET_CODE (imm
) == CONST
)
1928 rtx mem
, base
, offset
;
1929 enum aarch64_symbol_type sty
;
1931 /* If we have (const (plus symbol offset)), separate out the offset
1932 before we start classifying the symbol. */
1933 split_const (imm
, &base
, &offset
);
1935 sty
= aarch64_classify_symbol (base
, offset
);
1938 case SYMBOL_FORCE_TO_MEM
:
1939 if (offset
!= const0_rtx
1940 && targetm
.cannot_force_const_mem (mode
, imm
))
1942 gcc_assert (can_create_pseudo_p ());
1943 base
= aarch64_force_temporary (mode
, dest
, base
);
1944 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1945 aarch64_emit_move (dest
, base
);
1949 mem
= force_const_mem (ptr_mode
, imm
);
1952 /* If we aren't generating PC relative literals, then
1953 we need to expand the literal pool access carefully.
1954 This is something that needs to be done in a number
1955 of places, so could well live as a separate function. */
1956 if (!aarch64_pcrelative_literal_loads
)
1958 gcc_assert (can_create_pseudo_p ());
1959 base
= gen_reg_rtx (ptr_mode
);
1960 aarch64_expand_mov_immediate (base
, XEXP (mem
, 0));
1961 mem
= gen_rtx_MEM (ptr_mode
, base
);
1964 if (mode
!= ptr_mode
)
1965 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1967 emit_insn (gen_rtx_SET (dest
, mem
));
1971 case SYMBOL_SMALL_TLSGD
:
1972 case SYMBOL_SMALL_TLSDESC
:
1973 case SYMBOL_SMALL_TLSIE
:
1974 case SYMBOL_SMALL_GOT_28K
:
1975 case SYMBOL_SMALL_GOT_4G
:
1976 case SYMBOL_TINY_GOT
:
1977 case SYMBOL_TINY_TLSIE
:
1978 if (offset
!= const0_rtx
)
1980 gcc_assert(can_create_pseudo_p ());
1981 base
= aarch64_force_temporary (mode
, dest
, base
);
1982 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1983 aarch64_emit_move (dest
, base
);
1988 case SYMBOL_SMALL_ABSOLUTE
:
1989 case SYMBOL_TINY_ABSOLUTE
:
1990 case SYMBOL_TLSLE12
:
1991 case SYMBOL_TLSLE24
:
1992 case SYMBOL_TLSLE32
:
1993 case SYMBOL_TLSLE48
:
1994 aarch64_load_symref_appropriately (dest
, imm
, sty
);
2002 if (!CONST_INT_P (imm
))
2004 if (GET_CODE (imm
) == HIGH
)
2005 emit_insn (gen_rtx_SET (dest
, imm
));
2008 rtx mem
= force_const_mem (mode
, imm
);
2010 emit_insn (gen_rtx_SET (dest
, mem
));
2016 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
2019 /* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a
2020 temporary value if necessary. FRAME_RELATED_P should be true if
2021 the RTX_FRAME_RELATED flag should be set and CFA adjustments added
2022 to the generated instructions. If SCRATCHREG is known to hold
2023 abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the
2026 Since this function may be used to adjust the stack pointer, we must
2027 ensure that it cannot cause transient stack deallocation (for example
2028 by first incrementing SP and then decrementing when adjusting by a
2029 large immediate). */
2032 aarch64_add_constant_internal (machine_mode mode
, int regnum
, int scratchreg
,
2033 HOST_WIDE_INT delta
, bool frame_related_p
,
2036 HOST_WIDE_INT mdelta
= abs_hwi (delta
);
2037 rtx this_rtx
= gen_rtx_REG (mode
, regnum
);
2043 /* Single instruction adjustment. */
2044 if (aarch64_uimm12_shift (mdelta
))
2046 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (delta
)));
2047 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2051 /* Emit 2 additions/subtractions if the adjustment is less than 24 bits.
2052 Only do this if mdelta is not a 16-bit move as adjusting using a move
2054 if (mdelta
< 0x1000000 && !aarch64_move_imm (mdelta
, mode
))
2056 HOST_WIDE_INT low_off
= mdelta
& 0xfff;
2058 low_off
= delta
< 0 ? -low_off
: low_off
;
2059 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (low_off
)));
2060 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2061 insn
= emit_insn (gen_add2_insn (this_rtx
, GEN_INT (delta
- low_off
)));
2062 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2066 /* Emit a move immediate if required and an addition/subtraction. */
2067 rtx scratch_rtx
= gen_rtx_REG (mode
, scratchreg
);
2069 aarch64_internal_mov_immediate (scratch_rtx
, GEN_INT (mdelta
), true, mode
);
2070 insn
= emit_insn (delta
< 0 ? gen_sub2_insn (this_rtx
, scratch_rtx
)
2071 : gen_add2_insn (this_rtx
, scratch_rtx
));
2072 if (frame_related_p
)
2074 RTX_FRAME_RELATED_P (insn
) = frame_related_p
;
2075 rtx adj
= plus_constant (mode
, this_rtx
, delta
);
2076 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, gen_rtx_SET (this_rtx
, adj
));
2081 aarch64_add_constant (machine_mode mode
, int regnum
, int scratchreg
,
2082 HOST_WIDE_INT delta
)
2084 aarch64_add_constant_internal (mode
, regnum
, scratchreg
, delta
, false, true);
2088 aarch64_add_sp (int scratchreg
, HOST_WIDE_INT delta
, bool emit_move_imm
)
2090 aarch64_add_constant_internal (Pmode
, SP_REGNUM
, scratchreg
, delta
,
2091 true, emit_move_imm
);
2095 aarch64_sub_sp (int scratchreg
, HOST_WIDE_INT delta
, bool frame_related_p
)
2097 aarch64_add_constant_internal (Pmode
, SP_REGNUM
, scratchreg
, -delta
,
2098 frame_related_p
, true);
2102 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
2103 tree exp ATTRIBUTE_UNUSED
)
2105 /* Currently, always true. */
2109 /* Implement TARGET_PASS_BY_REFERENCE. */
2112 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
2115 bool named ATTRIBUTE_UNUSED
)
2118 machine_mode dummymode
;
2121 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
2122 size
= (mode
== BLKmode
&& type
)
2123 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2125 /* Aggregates are passed by reference based on their size. */
2126 if (type
&& AGGREGATE_TYPE_P (type
))
2128 size
= int_size_in_bytes (type
);
2131 /* Variable sized arguments are always returned by reference. */
2135 /* Can this be a candidate to be passed in fp/simd register(s)? */
2136 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
2141 /* Arguments which are variable sized or larger than 2 registers are
2142 passed by reference unless they are a homogenous floating point
2144 return size
> 2 * UNITS_PER_WORD
;
2147 /* Return TRUE if VALTYPE is padded to its least significant bits. */
2149 aarch64_return_in_msb (const_tree valtype
)
2151 machine_mode dummy_mode
;
2154 /* Never happens in little-endian mode. */
2155 if (!BYTES_BIG_ENDIAN
)
2158 /* Only composite types smaller than or equal to 16 bytes can
2159 be potentially returned in registers. */
2160 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
2161 || int_size_in_bytes (valtype
) <= 0
2162 || int_size_in_bytes (valtype
) > 16)
2165 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
2166 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
2167 is always passed/returned in the least significant bits of fp/simd
2169 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
2170 &dummy_mode
, &dummy_int
, NULL
))
2176 /* Implement TARGET_FUNCTION_VALUE.
2177 Define how to find the value returned by a function. */
2180 aarch64_function_value (const_tree type
, const_tree func
,
2181 bool outgoing ATTRIBUTE_UNUSED
)
2186 machine_mode ag_mode
;
2188 mode
= TYPE_MODE (type
);
2189 if (INTEGRAL_TYPE_P (type
))
2190 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
2192 if (aarch64_return_in_msb (type
))
2194 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2196 if (size
% UNITS_PER_WORD
!= 0)
2198 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
2199 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
2203 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
2204 &ag_mode
, &count
, NULL
))
2206 if (!aarch64_composite_type_p (type
, mode
))
2208 gcc_assert (count
== 1 && mode
== ag_mode
);
2209 return gen_rtx_REG (mode
, V0_REGNUM
);
2216 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
2217 for (i
= 0; i
< count
; i
++)
2219 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
2220 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
2221 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
2222 XVECEXP (par
, 0, i
) = tmp
;
2228 return gen_rtx_REG (mode
, R0_REGNUM
);
2231 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
2232 Return true if REGNO is the number of a hard register in which the values
2233 of called function may come back. */
2236 aarch64_function_value_regno_p (const unsigned int regno
)
2238 /* Maximum of 16 bytes can be returned in the general registers. Examples
2239 of 16-byte return values are: 128-bit integers and 16-byte small
2240 structures (excluding homogeneous floating-point aggregates). */
2241 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
2244 /* Up to four fp/simd registers can return a function value, e.g. a
2245 homogeneous floating-point aggregate having four members. */
2246 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
2247 return TARGET_FLOAT
;
2252 /* Implement TARGET_RETURN_IN_MEMORY.
2254 If the type T of the result of a function is such that
2256 would require that arg be passed as a value in a register (or set of
2257 registers) according to the parameter passing rules, then the result
2258 is returned in the same registers as would be used for such an
2262 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
2265 machine_mode ag_mode
;
2268 if (!AGGREGATE_TYPE_P (type
)
2269 && TREE_CODE (type
) != COMPLEX_TYPE
2270 && TREE_CODE (type
) != VECTOR_TYPE
)
2271 /* Simple scalar types always returned in registers. */
2274 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
2281 /* Types larger than 2 registers returned in memory. */
2282 size
= int_size_in_bytes (type
);
2283 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
2287 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
2288 const_tree type
, int *nregs
)
2290 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2291 return aarch64_vfp_is_call_or_return_candidate (mode
,
2293 &pcum
->aapcs_vfp_rmode
,
2298 /* Given MODE and TYPE of a function argument, return the alignment in
2299 bits. The idea is to suppress any stronger alignment requested by
2300 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
2301 This is a helper function for local use only. */
2304 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
2307 return GET_MODE_ALIGNMENT (mode
);
2309 if (integer_zerop (TYPE_SIZE (type
)))
2312 gcc_assert (TYPE_MODE (type
) == mode
);
2314 if (!AGGREGATE_TYPE_P (type
))
2315 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type
));
2317 if (TREE_CODE (type
) == ARRAY_TYPE
)
2318 return TYPE_ALIGN (TREE_TYPE (type
));
2320 unsigned int alignment
= 0;
2321 for (tree field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2322 if (TREE_CODE (field
) == FIELD_DECL
)
2323 alignment
= std::max (alignment
, DECL_ALIGN (field
));
2328 /* Layout a function argument according to the AAPCS64 rules. The rule
2329 numbers refer to the rule numbers in the AAPCS64. */
2332 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2334 bool named ATTRIBUTE_UNUSED
)
2336 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2337 int ncrn
, nvrn
, nregs
;
2338 bool allocate_ncrn
, allocate_nvrn
;
2341 /* We need to do this once per argument. */
2342 if (pcum
->aapcs_arg_processed
)
2345 pcum
->aapcs_arg_processed
= true;
2347 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
2349 = ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
2352 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
2353 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
2358 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
2359 The following code thus handles passing by SIMD/FP registers first. */
2361 nvrn
= pcum
->aapcs_nvrn
;
2363 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
2364 and homogenous short-vector aggregates (HVA). */
2368 aarch64_err_no_fpadvsimd (mode
, "argument");
2370 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
2372 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
2373 if (!aarch64_composite_type_p (type
, mode
))
2375 gcc_assert (nregs
== 1);
2376 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
2382 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
2383 for (i
= 0; i
< nregs
; i
++)
2385 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
2386 V0_REGNUM
+ nvrn
+ i
);
2387 tmp
= gen_rtx_EXPR_LIST
2389 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
2390 XVECEXP (par
, 0, i
) = tmp
;
2392 pcum
->aapcs_reg
= par
;
2398 /* C.3 NSRN is set to 8. */
2399 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
2404 ncrn
= pcum
->aapcs_ncrn
;
2405 nregs
= size
/ UNITS_PER_WORD
;
2407 /* C6 - C9. though the sign and zero extension semantics are
2408 handled elsewhere. This is the case where the argument fits
2409 entirely general registers. */
2410 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
2413 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
2415 /* C.8 if the argument has an alignment of 16 then the NGRN is
2416 rounded up to the next even number. */
2419 /* The == 16 * BITS_PER_UNIT instead of >= 16 * BITS_PER_UNIT
2420 comparison is there because for > 16 * BITS_PER_UNIT
2421 alignment nregs should be > 2 and therefore it should be
2422 passed by reference rather than value. */
2423 && aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
2426 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
2429 /* NREGS can be 0 when e.g. an empty structure is to be passed.
2430 A reg is still generated for it, but the caller should be smart
2431 enough not to use it. */
2432 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
2433 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
2439 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
2440 for (i
= 0; i
< nregs
; i
++)
2442 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
2443 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
2444 GEN_INT (i
* UNITS_PER_WORD
));
2445 XVECEXP (par
, 0, i
) = tmp
;
2447 pcum
->aapcs_reg
= par
;
2450 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
2455 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
2457 /* The argument is passed on stack; record the needed number of words for
2458 this argument and align the total size if necessary. */
2460 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
2462 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
2463 pcum
->aapcs_stack_size
= ROUND_UP (pcum
->aapcs_stack_size
,
2464 16 / UNITS_PER_WORD
);
2468 /* Implement TARGET_FUNCTION_ARG. */
2471 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2472 const_tree type
, bool named
)
2474 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2475 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
2477 if (mode
== VOIDmode
)
2480 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2481 return pcum
->aapcs_reg
;
2485 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
2486 const_tree fntype ATTRIBUTE_UNUSED
,
2487 rtx libname ATTRIBUTE_UNUSED
,
2488 const_tree fndecl ATTRIBUTE_UNUSED
,
2489 unsigned n_named ATTRIBUTE_UNUSED
)
2491 pcum
->aapcs_ncrn
= 0;
2492 pcum
->aapcs_nvrn
= 0;
2493 pcum
->aapcs_nextncrn
= 0;
2494 pcum
->aapcs_nextnvrn
= 0;
2495 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
2496 pcum
->aapcs_reg
= NULL_RTX
;
2497 pcum
->aapcs_arg_processed
= false;
2498 pcum
->aapcs_stack_words
= 0;
2499 pcum
->aapcs_stack_size
= 0;
2502 && fndecl
&& TREE_PUBLIC (fndecl
)
2503 && fntype
&& fntype
!= error_mark_node
)
2505 const_tree type
= TREE_TYPE (fntype
);
2506 machine_mode mode ATTRIBUTE_UNUSED
; /* To pass pointer as argument. */
2507 int nregs ATTRIBUTE_UNUSED
; /* Likewise. */
2508 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
), type
,
2509 &mode
, &nregs
, NULL
))
2510 aarch64_err_no_fpadvsimd (TYPE_MODE (type
), "return type");
2516 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
2521 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2522 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
2524 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2525 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
2526 != (pcum
->aapcs_stack_words
!= 0));
2527 pcum
->aapcs_arg_processed
= false;
2528 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
2529 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
2530 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
2531 pcum
->aapcs_stack_words
= 0;
2532 pcum
->aapcs_reg
= NULL_RTX
;
2537 aarch64_function_arg_regno_p (unsigned regno
)
2539 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
2540 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
2543 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2544 PARM_BOUNDARY bits of alignment, but will be given anything up
2545 to STACK_BOUNDARY bits if the type requires it. This makes sure
2546 that both before and after the layout of each argument, the Next
2547 Stacked Argument Address (NSAA) will have a minimum alignment of
2551 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
2553 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
2554 return MIN (MAX (alignment
, PARM_BOUNDARY
), STACK_BOUNDARY
);
2557 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2559 Return true if an argument passed on the stack should be padded upwards,
2560 i.e. if the least-significant byte of the stack slot has useful data.
2562 Small aggregate types are placed in the lowest memory address.
2564 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2567 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
2569 /* On little-endian targets, the least significant byte of every stack
2570 argument is passed at the lowest byte address of the stack slot. */
2571 if (!BYTES_BIG_ENDIAN
)
2574 /* Otherwise, integral, floating-point and pointer types are padded downward:
2575 the least significant byte of a stack argument is passed at the highest
2576 byte address of the stack slot. */
2578 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
2579 || POINTER_TYPE_P (type
))
2580 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2583 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2587 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2589 It specifies padding for the last (may also be the only)
2590 element of a block move between registers and memory. If
2591 assuming the block is in the memory, padding upward means that
2592 the last element is padded after its highest significant byte,
2593 while in downward padding, the last element is padded at the
2594 its least significant byte side.
2596 Small aggregates and small complex types are always padded
2599 We don't need to worry about homogeneous floating-point or
2600 short-vector aggregates; their move is not affected by the
2601 padding direction determined here. Regardless of endianness,
2602 each element of such an aggregate is put in the least
2603 significant bits of a fp/simd register.
2605 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2606 register has useful data, and return the opposite if the most
2607 significant byte does. */
2610 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2611 bool first ATTRIBUTE_UNUSED
)
2614 /* Small composite types are always padded upward. */
2615 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2617 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2618 : GET_MODE_SIZE (mode
));
2619 if (size
< 2 * UNITS_PER_WORD
)
2623 /* Otherwise, use the default padding. */
2624 return !BYTES_BIG_ENDIAN
;
2628 aarch64_libgcc_cmp_return_mode (void)
2633 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
2635 /* We use the 12-bit shifted immediate arithmetic instructions so values
2636 must be multiple of (1 << 12), i.e. 4096. */
2637 #define ARITH_FACTOR 4096
2639 #if (PROBE_INTERVAL % ARITH_FACTOR) != 0
2640 #error Cannot use simple address calculation for stack probing
2643 /* The pair of scratch registers used for stack probing. */
2644 #define PROBE_STACK_FIRST_REG 9
2645 #define PROBE_STACK_SECOND_REG 10
2647 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
2648 inclusive. These are offsets from the current stack pointer. */
2651 aarch64_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
2653 rtx reg1
= gen_rtx_REG (Pmode
, PROBE_STACK_FIRST_REG
);
2655 /* See the same assertion on PROBE_INTERVAL above. */
2656 gcc_assert ((first
% ARITH_FACTOR
) == 0);
2658 /* See if we have a constant small number of probes to generate. If so,
2659 that's the easy case. */
2660 if (size
<= PROBE_INTERVAL
)
2662 const HOST_WIDE_INT base
= ROUND_UP (size
, ARITH_FACTOR
);
2664 emit_set_insn (reg1
,
2665 plus_constant (Pmode
,
2666 stack_pointer_rtx
, -(first
+ base
)));
2667 emit_stack_probe (plus_constant (Pmode
, reg1
, base
- size
));
2670 /* The run-time loop is made up of 8 insns in the generic case while the
2671 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
2672 else if (size
<= 4 * PROBE_INTERVAL
)
2674 HOST_WIDE_INT i
, rem
;
2676 emit_set_insn (reg1
,
2677 plus_constant (Pmode
,
2679 -(first
+ PROBE_INTERVAL
)));
2680 emit_stack_probe (reg1
);
2682 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
2683 it exceeds SIZE. If only two probes are needed, this will not
2684 generate any code. Then probe at FIRST + SIZE. */
2685 for (i
= 2 * PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
2687 emit_set_insn (reg1
,
2688 plus_constant (Pmode
, reg1
, -PROBE_INTERVAL
));
2689 emit_stack_probe (reg1
);
2692 rem
= size
- (i
- PROBE_INTERVAL
);
2695 const HOST_WIDE_INT base
= ROUND_UP (rem
, ARITH_FACTOR
);
2697 emit_set_insn (reg1
, plus_constant (Pmode
, reg1
, -base
));
2698 emit_stack_probe (plus_constant (Pmode
, reg1
, base
- rem
));
2701 emit_stack_probe (plus_constant (Pmode
, reg1
, -rem
));
2704 /* Otherwise, do the same as above, but in a loop. Note that we must be
2705 extra careful with variables wrapping around because we might be at
2706 the very top (or the very bottom) of the address space and we have
2707 to be able to handle this case properly; in particular, we use an
2708 equality test for the loop condition. */
2711 rtx reg2
= gen_rtx_REG (Pmode
, PROBE_STACK_SECOND_REG
);
2713 /* Step 1: round SIZE to the previous multiple of the interval. */
2715 HOST_WIDE_INT rounded_size
= size
& -PROBE_INTERVAL
;
2718 /* Step 2: compute initial and final value of the loop counter. */
2720 /* TEST_ADDR = SP + FIRST. */
2721 emit_set_insn (reg1
,
2722 plus_constant (Pmode
, stack_pointer_rtx
, -first
));
2724 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
2725 emit_set_insn (reg2
,
2726 plus_constant (Pmode
, stack_pointer_rtx
,
2727 -(first
+ rounded_size
)));
2734 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
2737 while (TEST_ADDR != LAST_ADDR)
2739 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
2740 until it is equal to ROUNDED_SIZE. */
2742 emit_insn (gen_probe_stack_range (reg1
, reg1
, reg2
));
2745 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
2746 that SIZE is equal to ROUNDED_SIZE. */
2748 if (size
!= rounded_size
)
2750 HOST_WIDE_INT rem
= size
- rounded_size
;
2754 const HOST_WIDE_INT base
= ROUND_UP (rem
, ARITH_FACTOR
);
2756 emit_set_insn (reg2
, plus_constant (Pmode
, reg2
, -base
));
2757 emit_stack_probe (plus_constant (Pmode
, reg2
, base
- rem
));
2760 emit_stack_probe (plus_constant (Pmode
, reg2
, -rem
));
2764 /* Make sure nothing is scheduled before we are done. */
2765 emit_insn (gen_blockage ());
2768 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
2769 absolute addresses. */
2772 aarch64_output_probe_stack_range (rtx reg1
, rtx reg2
)
2774 static int labelno
= 0;
2778 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
2781 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
2783 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
2785 xops
[1] = GEN_INT (PROBE_INTERVAL
);
2786 output_asm_insn ("sub\t%0, %0, %1", xops
);
2788 /* Probe at TEST_ADDR. */
2789 output_asm_insn ("str\txzr, [%0]", xops
);
2791 /* Test if TEST_ADDR == LAST_ADDR. */
2793 output_asm_insn ("cmp\t%0, %1", xops
);
2796 fputs ("\tb.ne\t", asm_out_file
);
2797 assemble_name_raw (asm_out_file
, loop_lab
);
2798 fputc ('\n', asm_out_file
);
2804 aarch64_frame_pointer_required (void)
2806 /* In aarch64_override_options_after_change
2807 flag_omit_leaf_frame_pointer turns off the frame pointer by
2808 default. Turn it back on now if we've not got a leaf
2810 if (flag_omit_leaf_frame_pointer
2811 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2814 /* Force a frame pointer for EH returns so the return address is at FP+8. */
2815 if (crtl
->calls_eh_return
)
2821 /* Mark the registers that need to be saved by the callee and calculate
2822 the size of the callee-saved registers area and frame record (both FP
2823 and LR may be omitted). */
2825 aarch64_layout_frame (void)
2827 HOST_WIDE_INT offset
= 0;
2828 int regno
, last_fp_reg
= INVALID_REGNUM
;
2830 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2833 #define SLOT_NOT_REQUIRED (-2)
2834 #define SLOT_REQUIRED (-1)
2836 cfun
->machine
->frame
.wb_candidate1
= INVALID_REGNUM
;
2837 cfun
->machine
->frame
.wb_candidate2
= INVALID_REGNUM
;
2839 /* First mark all the registers that really need to be saved... */
2840 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2841 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2843 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2844 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2846 /* ... that includes the eh data registers (if needed)... */
2847 if (crtl
->calls_eh_return
)
2848 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2849 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2852 /* ... and any callee saved register that dataflow says is live. */
2853 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2854 if (df_regs_ever_live_p (regno
)
2855 && (regno
== R30_REGNUM
2856 || !call_used_regs
[regno
]))
2857 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2859 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2860 if (df_regs_ever_live_p (regno
)
2861 && !call_used_regs
[regno
])
2863 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2864 last_fp_reg
= regno
;
2867 if (frame_pointer_needed
)
2869 /* FP and LR are placed in the linkage record. */
2870 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2871 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2872 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2873 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2874 offset
+= 2 * UNITS_PER_WORD
;
2877 /* Now assign stack slots for them. */
2878 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2879 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2881 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2882 if (cfun
->machine
->frame
.wb_candidate1
== INVALID_REGNUM
)
2883 cfun
->machine
->frame
.wb_candidate1
= regno
;
2884 else if (cfun
->machine
->frame
.wb_candidate2
== INVALID_REGNUM
)
2885 cfun
->machine
->frame
.wb_candidate2
= regno
;
2886 offset
+= UNITS_PER_WORD
;
2889 HOST_WIDE_INT max_int_offset
= offset
;
2890 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2891 bool has_align_gap
= offset
!= max_int_offset
;
2893 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2894 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2896 /* If there is an alignment gap between integer and fp callee-saves,
2897 allocate the last fp register to it if possible. */
2898 if (regno
== last_fp_reg
&& has_align_gap
&& (offset
& 8) == 0)
2900 cfun
->machine
->frame
.reg_offset
[regno
] = max_int_offset
;
2904 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2905 if (cfun
->machine
->frame
.wb_candidate1
== INVALID_REGNUM
)
2906 cfun
->machine
->frame
.wb_candidate1
= regno
;
2907 else if (cfun
->machine
->frame
.wb_candidate2
== INVALID_REGNUM
2908 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2909 cfun
->machine
->frame
.wb_candidate2
= regno
;
2910 offset
+= UNITS_PER_WORD
;
2913 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2915 cfun
->machine
->frame
.saved_regs_size
= offset
;
2917 HOST_WIDE_INT varargs_and_saved_regs_size
2918 = offset
+ cfun
->machine
->frame
.saved_varargs_size
;
2920 cfun
->machine
->frame
.hard_fp_offset
2921 = ROUND_UP (varargs_and_saved_regs_size
+ get_frame_size (),
2922 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2924 cfun
->machine
->frame
.frame_size
2925 = ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2926 + crtl
->outgoing_args_size
,
2927 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2929 cfun
->machine
->frame
.locals_offset
= cfun
->machine
->frame
.saved_varargs_size
;
2931 cfun
->machine
->frame
.initial_adjust
= 0;
2932 cfun
->machine
->frame
.final_adjust
= 0;
2933 cfun
->machine
->frame
.callee_adjust
= 0;
2934 cfun
->machine
->frame
.callee_offset
= 0;
2936 HOST_WIDE_INT max_push_offset
= 0;
2937 if (cfun
->machine
->frame
.wb_candidate2
!= INVALID_REGNUM
)
2938 max_push_offset
= 512;
2939 else if (cfun
->machine
->frame
.wb_candidate1
!= INVALID_REGNUM
)
2940 max_push_offset
= 256;
2942 if (cfun
->machine
->frame
.frame_size
< max_push_offset
2943 && crtl
->outgoing_args_size
== 0)
2945 /* Simple, small frame with no outgoing arguments:
2946 stp reg1, reg2, [sp, -frame_size]!
2947 stp reg3, reg4, [sp, 16] */
2948 cfun
->machine
->frame
.callee_adjust
= cfun
->machine
->frame
.frame_size
;
2950 else if ((crtl
->outgoing_args_size
2951 + cfun
->machine
->frame
.saved_regs_size
< 512)
2952 && !(cfun
->calls_alloca
2953 && cfun
->machine
->frame
.hard_fp_offset
< max_push_offset
))
2955 /* Frame with small outgoing arguments:
2956 sub sp, sp, frame_size
2957 stp reg1, reg2, [sp, outgoing_args_size]
2958 stp reg3, reg4, [sp, outgoing_args_size + 16] */
2959 cfun
->machine
->frame
.initial_adjust
= cfun
->machine
->frame
.frame_size
;
2960 cfun
->machine
->frame
.callee_offset
2961 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.hard_fp_offset
;
2963 else if (cfun
->machine
->frame
.hard_fp_offset
< max_push_offset
)
2965 /* Frame with large outgoing arguments but a small local area:
2966 stp reg1, reg2, [sp, -hard_fp_offset]!
2967 stp reg3, reg4, [sp, 16]
2968 sub sp, sp, outgoing_args_size */
2969 cfun
->machine
->frame
.callee_adjust
= cfun
->machine
->frame
.hard_fp_offset
;
2970 cfun
->machine
->frame
.final_adjust
2971 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.callee_adjust
;
2973 else if (!frame_pointer_needed
2974 && varargs_and_saved_regs_size
< max_push_offset
)
2976 /* Frame with large local area and outgoing arguments (this pushes the
2977 callee-saves first, followed by the locals and outgoing area):
2978 stp reg1, reg2, [sp, -varargs_and_saved_regs_size]!
2979 stp reg3, reg4, [sp, 16]
2980 sub sp, sp, frame_size - varargs_and_saved_regs_size */
2981 cfun
->machine
->frame
.callee_adjust
= varargs_and_saved_regs_size
;
2982 cfun
->machine
->frame
.final_adjust
2983 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.callee_adjust
;
2984 cfun
->machine
->frame
.hard_fp_offset
= cfun
->machine
->frame
.callee_adjust
;
2985 cfun
->machine
->frame
.locals_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2989 /* Frame with large local area and outgoing arguments using frame pointer:
2990 sub sp, sp, hard_fp_offset
2991 stp x29, x30, [sp, 0]
2993 stp reg3, reg4, [sp, 16]
2994 sub sp, sp, outgoing_args_size */
2995 cfun
->machine
->frame
.initial_adjust
= cfun
->machine
->frame
.hard_fp_offset
;
2996 cfun
->machine
->frame
.final_adjust
2997 = cfun
->machine
->frame
.frame_size
- cfun
->machine
->frame
.initial_adjust
;
3000 cfun
->machine
->frame
.laid_out
= true;
3003 /* Return true if the register REGNO is saved on entry to
3004 the current function. */
3007 aarch64_register_saved_on_entry (int regno
)
3009 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
3012 /* Return the next register up from REGNO up to LIMIT for the callee
3016 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
3018 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
3023 /* Push the register number REGNO of mode MODE to the stack with write-back
3024 adjusting the stack by ADJUSTMENT. */
3027 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
3028 HOST_WIDE_INT adjustment
)
3030 rtx base_rtx
= stack_pointer_rtx
;
3033 reg
= gen_rtx_REG (mode
, regno
);
3034 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
3035 plus_constant (Pmode
, base_rtx
, -adjustment
));
3036 mem
= gen_rtx_MEM (mode
, mem
);
3038 insn
= emit_move_insn (mem
, reg
);
3039 RTX_FRAME_RELATED_P (insn
) = 1;
3042 /* Generate and return an instruction to store the pair of registers
3043 REG and REG2 of mode MODE to location BASE with write-back adjusting
3044 the stack location BASE by ADJUSTMENT. */
3047 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
3048 HOST_WIDE_INT adjustment
)
3053 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
3054 GEN_INT (-adjustment
),
3055 GEN_INT (UNITS_PER_WORD
- adjustment
));
3057 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
3058 GEN_INT (-adjustment
),
3059 GEN_INT (UNITS_PER_WORD
- adjustment
));
3065 /* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the
3066 stack pointer by ADJUSTMENT. */
3069 aarch64_push_regs (unsigned regno1
, unsigned regno2
, HOST_WIDE_INT adjustment
)
3072 machine_mode mode
= (regno1
<= R30_REGNUM
) ? DImode
: DFmode
;
3074 if (regno2
== INVALID_REGNUM
)
3075 return aarch64_pushwb_single_reg (mode
, regno1
, adjustment
);
3077 rtx reg1
= gen_rtx_REG (mode
, regno1
);
3078 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3080 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
3082 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
3083 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3084 RTX_FRAME_RELATED_P (insn
) = 1;
3087 /* Load the pair of register REG, REG2 of mode MODE from stack location BASE,
3088 adjusting it by ADJUSTMENT afterwards. */
3091 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
3092 HOST_WIDE_INT adjustment
)
3097 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
3098 GEN_INT (UNITS_PER_WORD
));
3100 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
3101 GEN_INT (UNITS_PER_WORD
));
3107 /* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it
3108 afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
3112 aarch64_pop_regs (unsigned regno1
, unsigned regno2
, HOST_WIDE_INT adjustment
,
3115 machine_mode mode
= (regno1
<= R30_REGNUM
) ? DImode
: DFmode
;
3116 rtx reg1
= gen_rtx_REG (mode
, regno1
);
3118 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg1
, *cfi_ops
);
3120 if (regno2
== INVALID_REGNUM
)
3122 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, adjustment
);
3123 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
3124 emit_move_insn (reg1
, gen_rtx_MEM (mode
, mem
));
3128 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3129 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
3130 emit_insn (aarch64_gen_loadwb_pair (mode
, stack_pointer_rtx
, reg1
,
3135 /* Generate and return a store pair instruction of mode MODE to store
3136 register REG1 to MEM1 and register REG2 to MEM2. */
3139 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
3145 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
3148 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
3155 /* Generate and regurn a load pair isntruction of mode MODE to load register
3156 REG1 from MEM1 and register REG2 from MEM2. */
3159 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
3165 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
3168 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
3175 /* Return TRUE if return address signing should be enabled for the current
3176 function, otherwise return FALSE. */
3179 aarch64_return_address_signing_enabled (void)
3181 /* This function should only be called after frame laid out. */
3182 gcc_assert (cfun
->machine
->frame
.laid_out
);
3184 /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function
3185 if it's LR is pushed onto stack. */
3186 return (aarch64_ra_sign_scope
== AARCH64_FUNCTION_ALL
3187 || (aarch64_ra_sign_scope
== AARCH64_FUNCTION_NON_LEAF
3188 && cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] >= 0));
3191 /* Emit code to save the callee-saved registers from register number START
3192 to LIMIT to the stack at the location starting at offset START_OFFSET,
3193 skipping any write-back candidates if SKIP_WB is true. */
3196 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
3197 unsigned start
, unsigned limit
, bool skip_wb
)
3200 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
3201 ? gen_frame_mem
: gen_rtx_MEM
);
3205 for (regno
= aarch64_next_callee_save (start
, limit
);
3207 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
3210 HOST_WIDE_INT offset
;
3213 && (regno
== cfun
->machine
->frame
.wb_candidate1
3214 || regno
== cfun
->machine
->frame
.wb_candidate2
))
3217 if (cfun
->machine
->reg_is_wrapped_separately
[regno
])
3220 reg
= gen_rtx_REG (mode
, regno
);
3221 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
3222 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
3225 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
3228 && !cfun
->machine
->reg_is_wrapped_separately
[regno2
]
3229 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
3230 == cfun
->machine
->frame
.reg_offset
[regno2
]))
3233 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3236 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
3237 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
3239 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
3242 /* The first part of a frame-related parallel insn is
3243 always assumed to be relevant to the frame
3244 calculations; subsequent parts, are only
3245 frame-related if explicitly marked. */
3246 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3250 insn
= emit_move_insn (mem
, reg
);
3252 RTX_FRAME_RELATED_P (insn
) = 1;
3256 /* Emit code to restore the callee registers of mode MODE from register
3257 number START up to and including LIMIT. Restore from the stack offset
3258 START_OFFSET, skipping any write-back candidates if SKIP_WB is true.
3259 Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */
3262 aarch64_restore_callee_saves (machine_mode mode
,
3263 HOST_WIDE_INT start_offset
, unsigned start
,
3264 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
3266 rtx base_rtx
= stack_pointer_rtx
;
3267 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
3268 ? gen_frame_mem
: gen_rtx_MEM
);
3271 HOST_WIDE_INT offset
;
3273 for (regno
= aarch64_next_callee_save (start
, limit
);
3275 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
3277 if (cfun
->machine
->reg_is_wrapped_separately
[regno
])
3283 && (regno
== cfun
->machine
->frame
.wb_candidate1
3284 || regno
== cfun
->machine
->frame
.wb_candidate2
))
3287 reg
= gen_rtx_REG (mode
, regno
);
3288 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
3289 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
3291 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
3294 && !cfun
->machine
->reg_is_wrapped_separately
[regno2
]
3295 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
3296 == cfun
->machine
->frame
.reg_offset
[regno2
]))
3298 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3301 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
3302 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
3303 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
3305 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
3309 emit_move_insn (reg
, mem
);
3310 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
3315 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3316 HOST_WIDE_INT offset
)
3318 return offset
>= -256 && offset
< 256;
3322 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3325 && offset
< 4096 * GET_MODE_SIZE (mode
)
3326 && offset
% GET_MODE_SIZE (mode
) == 0);
3330 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3332 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3333 && offset
< 64 * GET_MODE_SIZE (mode
)
3334 && offset
% GET_MODE_SIZE (mode
) == 0);
3337 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
3340 aarch64_get_separate_components (void)
3342 aarch64_layout_frame ();
3344 sbitmap components
= sbitmap_alloc (LAST_SAVED_REGNUM
+ 1);
3345 bitmap_clear (components
);
3347 /* The registers we need saved to the frame. */
3348 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3349 if (aarch64_register_saved_on_entry (regno
))
3351 HOST_WIDE_INT offset
= cfun
->machine
->frame
.reg_offset
[regno
];
3352 if (!frame_pointer_needed
)
3353 offset
+= cfun
->machine
->frame
.frame_size
3354 - cfun
->machine
->frame
.hard_fp_offset
;
3355 /* Check that we can access the stack slot of the register with one
3356 direct load with no adjustments needed. */
3357 if (offset_12bit_unsigned_scaled_p (DImode
, offset
))
3358 bitmap_set_bit (components
, regno
);
3361 /* Don't mess with the hard frame pointer. */
3362 if (frame_pointer_needed
)
3363 bitmap_clear_bit (components
, HARD_FRAME_POINTER_REGNUM
);
3365 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3366 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3367 /* If aarch64_layout_frame has chosen registers to store/restore with
3368 writeback don't interfere with them to avoid having to output explicit
3369 stack adjustment instructions. */
3370 if (reg2
!= INVALID_REGNUM
)
3371 bitmap_clear_bit (components
, reg2
);
3372 if (reg1
!= INVALID_REGNUM
)
3373 bitmap_clear_bit (components
, reg1
);
3375 bitmap_clear_bit (components
, LR_REGNUM
);
3376 bitmap_clear_bit (components
, SP_REGNUM
);
3381 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
3384 aarch64_components_for_bb (basic_block bb
)
3386 bitmap in
= DF_LIVE_IN (bb
);
3387 bitmap gen
= &DF_LIVE_BB_INFO (bb
)->gen
;
3388 bitmap kill
= &DF_LIVE_BB_INFO (bb
)->kill
;
3390 sbitmap components
= sbitmap_alloc (LAST_SAVED_REGNUM
+ 1);
3391 bitmap_clear (components
);
3393 /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */
3394 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3395 if ((!call_used_regs
[regno
])
3396 && (bitmap_bit_p (in
, regno
)
3397 || bitmap_bit_p (gen
, regno
)
3398 || bitmap_bit_p (kill
, regno
)))
3399 bitmap_set_bit (components
, regno
);
3404 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
3405 Nothing to do for aarch64. */
3408 aarch64_disqualify_components (sbitmap
, edge
, sbitmap
, bool)
3412 /* Return the next set bit in BMP from START onwards. Return the total number
3413 of bits in BMP if no set bit is found at or after START. */
3416 aarch64_get_next_set_bit (sbitmap bmp
, unsigned int start
)
3418 unsigned int nbits
= SBITMAP_SIZE (bmp
);
3422 gcc_assert (start
< nbits
);
3423 for (unsigned int i
= start
; i
< nbits
; i
++)
3424 if (bitmap_bit_p (bmp
, i
))
3430 /* Do the work for aarch64_emit_prologue_components and
3431 aarch64_emit_epilogue_components. COMPONENTS is the bitmap of registers
3432 to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
3433 for these components or the epilogue sequence. That is, it determines
3434 whether we should emit stores or loads and what kind of CFA notes to attach
3435 to the insns. Otherwise the logic for the two sequences is very
3439 aarch64_process_components (sbitmap components
, bool prologue_p
)
3441 rtx ptr_reg
= gen_rtx_REG (Pmode
, frame_pointer_needed
3442 ? HARD_FRAME_POINTER_REGNUM
3443 : STACK_POINTER_REGNUM
);
3445 unsigned last_regno
= SBITMAP_SIZE (components
);
3446 unsigned regno
= aarch64_get_next_set_bit (components
, R0_REGNUM
);
3447 rtx_insn
*insn
= NULL
;
3449 while (regno
!= last_regno
)
3451 /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
3452 so DFmode for the vector registers is enough. */
3453 machine_mode mode
= GP_REGNUM_P (regno
) ? DImode
: DFmode
;
3454 rtx reg
= gen_rtx_REG (mode
, regno
);
3455 HOST_WIDE_INT offset
= cfun
->machine
->frame
.reg_offset
[regno
];
3456 if (!frame_pointer_needed
)
3457 offset
+= cfun
->machine
->frame
.frame_size
3458 - cfun
->machine
->frame
.hard_fp_offset
;
3459 rtx addr
= plus_constant (Pmode
, ptr_reg
, offset
);
3460 rtx mem
= gen_frame_mem (mode
, addr
);
3462 rtx set
= prologue_p
? gen_rtx_SET (mem
, reg
) : gen_rtx_SET (reg
, mem
);
3463 unsigned regno2
= aarch64_get_next_set_bit (components
, regno
+ 1);
3464 /* No more registers to handle after REGNO.
3465 Emit a single save/restore and exit. */
3466 if (regno2
== last_regno
)
3468 insn
= emit_insn (set
);
3469 RTX_FRAME_RELATED_P (insn
) = 1;
3471 add_reg_note (insn
, REG_CFA_OFFSET
, copy_rtx (set
));
3473 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3477 HOST_WIDE_INT offset2
= cfun
->machine
->frame
.reg_offset
[regno2
];
3478 /* The next register is not of the same class or its offset is not
3479 mergeable with the current one into a pair. */
3480 if (!satisfies_constraint_Ump (mem
)
3481 || GP_REGNUM_P (regno
) != GP_REGNUM_P (regno2
)
3482 || (offset2
- cfun
->machine
->frame
.reg_offset
[regno
])
3483 != GET_MODE_SIZE (mode
))
3485 insn
= emit_insn (set
);
3486 RTX_FRAME_RELATED_P (insn
) = 1;
3488 add_reg_note (insn
, REG_CFA_OFFSET
, copy_rtx (set
));
3490 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3496 /* REGNO2 can be saved/restored in a pair with REGNO. */
3497 rtx reg2
= gen_rtx_REG (mode
, regno2
);
3498 if (!frame_pointer_needed
)
3499 offset2
+= cfun
->machine
->frame
.frame_size
3500 - cfun
->machine
->frame
.hard_fp_offset
;
3501 rtx addr2
= plus_constant (Pmode
, ptr_reg
, offset2
);
3502 rtx mem2
= gen_frame_mem (mode
, addr2
);
3503 rtx set2
= prologue_p
? gen_rtx_SET (mem2
, reg2
)
3504 : gen_rtx_SET (reg2
, mem2
);
3507 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
, reg2
));
3509 insn
= emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
3511 RTX_FRAME_RELATED_P (insn
) = 1;
3514 add_reg_note (insn
, REG_CFA_OFFSET
, set
);
3515 add_reg_note (insn
, REG_CFA_OFFSET
, set2
);
3519 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
3520 add_reg_note (insn
, REG_CFA_RESTORE
, reg2
);
3523 regno
= aarch64_get_next_set_bit (components
, regno2
+ 1);
3527 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
3530 aarch64_emit_prologue_components (sbitmap components
)
3532 aarch64_process_components (components
, true);
3535 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
3538 aarch64_emit_epilogue_components (sbitmap components
)
3540 aarch64_process_components (components
, false);
3543 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
3546 aarch64_set_handled_components (sbitmap components
)
3548 for (unsigned regno
= 0; regno
<= LAST_SAVED_REGNUM
; regno
++)
3549 if (bitmap_bit_p (components
, regno
))
3550 cfun
->machine
->reg_is_wrapped_separately
[regno
] = true;
3553 /* AArch64 stack frames generated by this compiler look like:
3555 +-------------------------------+
3557 | incoming stack arguments |
3559 +-------------------------------+
3560 | | <-- incoming stack pointer (aligned)
3561 | callee-allocated save area |
3562 | for register varargs |
3564 +-------------------------------+
3565 | local variables | <-- frame_pointer_rtx
3567 +-------------------------------+
3569 +-------------------------------+ |
3570 | callee-saved registers | | frame.saved_regs_size
3571 +-------------------------------+ |
3573 +-------------------------------+ |
3574 | FP' | / <- hard_frame_pointer_rtx (aligned)
3575 +-------------------------------+
3576 | dynamic allocation |
3577 +-------------------------------+
3579 +-------------------------------+
3580 | outgoing stack arguments | <-- arg_pointer
3582 +-------------------------------+
3583 | | <-- stack_pointer_rtx (aligned)
3585 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
3586 but leave frame_pointer_rtx and hard_frame_pointer_rtx
3589 /* Generate the prologue instructions for entry into a function.
3590 Establish the stack frame by decreasing the stack pointer with a
3591 properly calculated size and, if necessary, create a frame record
3592 filled with the values of LR and previous frame pointer. The
3593 current FP is also set up if it is in use. */
3596 aarch64_expand_prologue (void)
3598 aarch64_layout_frame ();
3600 HOST_WIDE_INT frame_size
= cfun
->machine
->frame
.frame_size
;
3601 HOST_WIDE_INT initial_adjust
= cfun
->machine
->frame
.initial_adjust
;
3602 HOST_WIDE_INT callee_adjust
= cfun
->machine
->frame
.callee_adjust
;
3603 HOST_WIDE_INT final_adjust
= cfun
->machine
->frame
.final_adjust
;
3604 HOST_WIDE_INT callee_offset
= cfun
->machine
->frame
.callee_offset
;
3605 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3606 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3609 /* Sign return address for functions. */
3610 if (aarch64_return_address_signing_enabled ())
3612 insn
= emit_insn (gen_pacisp ());
3613 add_reg_note (insn
, REG_CFA_TOGGLE_RA_MANGLE
, const0_rtx
);
3614 RTX_FRAME_RELATED_P (insn
) = 1;
3617 if (flag_stack_usage_info
)
3618 current_function_static_stack_size
= frame_size
;
3620 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
3622 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
3624 if (frame_size
> PROBE_INTERVAL
&& frame_size
> STACK_CHECK_PROTECT
)
3625 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT
,
3626 frame_size
- STACK_CHECK_PROTECT
);
3628 else if (frame_size
> 0)
3629 aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT
, frame_size
);
3632 aarch64_sub_sp (IP0_REGNUM
, initial_adjust
, true);
3634 if (callee_adjust
!= 0)
3635 aarch64_push_regs (reg1
, reg2
, callee_adjust
);
3637 if (frame_pointer_needed
)
3639 if (callee_adjust
== 0)
3640 aarch64_save_callee_saves (DImode
, callee_offset
, R29_REGNUM
,
3642 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
3644 GEN_INT (callee_offset
)));
3645 RTX_FRAME_RELATED_P (insn
) = 1;
3646 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
3649 aarch64_save_callee_saves (DImode
, callee_offset
, R0_REGNUM
, R30_REGNUM
,
3650 callee_adjust
!= 0 || frame_pointer_needed
);
3651 aarch64_save_callee_saves (DFmode
, callee_offset
, V0_REGNUM
, V31_REGNUM
,
3652 callee_adjust
!= 0 || frame_pointer_needed
);
3653 aarch64_sub_sp (IP1_REGNUM
, final_adjust
, !frame_pointer_needed
);
3656 /* Return TRUE if we can use a simple_return insn.
3658 This function checks whether the callee saved stack is empty, which
3659 means no restore actions are need. The pro_and_epilogue will use
3660 this to check whether shrink-wrapping opt is feasible. */
3663 aarch64_use_return_insn_p (void)
3665 if (!reload_completed
)
3671 aarch64_layout_frame ();
3673 return cfun
->machine
->frame
.frame_size
== 0;
3676 /* Generate the epilogue instructions for returning from a function.
3677 This is almost exactly the reverse of the prolog sequence, except
3678 that we need to insert barriers to avoid scheduling loads that read
3679 from a deallocated stack, and we optimize the unwind records by
3680 emitting them all together if possible. */
3682 aarch64_expand_epilogue (bool for_sibcall
)
3684 aarch64_layout_frame ();
3686 HOST_WIDE_INT initial_adjust
= cfun
->machine
->frame
.initial_adjust
;
3687 HOST_WIDE_INT callee_adjust
= cfun
->machine
->frame
.callee_adjust
;
3688 HOST_WIDE_INT final_adjust
= cfun
->machine
->frame
.final_adjust
;
3689 HOST_WIDE_INT callee_offset
= cfun
->machine
->frame
.callee_offset
;
3690 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
3691 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
3695 /* We need to add memory barrier to prevent read from deallocated stack. */
3696 bool need_barrier_p
= (get_frame_size ()
3697 + cfun
->machine
->frame
.saved_varargs_size
) != 0;
3699 /* Emit a barrier to prevent loads from a deallocated stack. */
3700 if (final_adjust
> crtl
->outgoing_args_size
|| cfun
->calls_alloca
3701 || crtl
->calls_eh_return
)
3703 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
3704 need_barrier_p
= false;
3707 /* Restore the stack pointer from the frame pointer if it may not
3708 be the same as the stack pointer. */
3709 if (frame_pointer_needed
&& (final_adjust
|| cfun
->calls_alloca
))
3711 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
3712 hard_frame_pointer_rtx
,
3713 GEN_INT (-callee_offset
)));
3714 /* If writeback is used when restoring callee-saves, the CFA
3715 is restored on the instruction doing the writeback. */
3716 RTX_FRAME_RELATED_P (insn
) = callee_adjust
== 0;
3719 aarch64_add_sp (IP1_REGNUM
, final_adjust
, df_regs_ever_live_p (IP1_REGNUM
));
3721 aarch64_restore_callee_saves (DImode
, callee_offset
, R0_REGNUM
, R30_REGNUM
,
3722 callee_adjust
!= 0, &cfi_ops
);
3723 aarch64_restore_callee_saves (DFmode
, callee_offset
, V0_REGNUM
, V31_REGNUM
,
3724 callee_adjust
!= 0, &cfi_ops
);
3727 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
3729 if (callee_adjust
!= 0)
3730 aarch64_pop_regs (reg1
, reg2
, callee_adjust
, &cfi_ops
);
3732 if (callee_adjust
!= 0 || initial_adjust
> 65536)
3734 /* Emit delayed restores and set the CFA to be SP + initial_adjust. */
3735 insn
= get_last_insn ();
3736 rtx new_cfa
= plus_constant (Pmode
, stack_pointer_rtx
, initial_adjust
);
3737 REG_NOTES (insn
) = alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
3738 RTX_FRAME_RELATED_P (insn
) = 1;
3742 aarch64_add_sp (IP0_REGNUM
, initial_adjust
, df_regs_ever_live_p (IP0_REGNUM
));
3746 /* Emit delayed restores and reset the CFA to be SP. */
3747 insn
= get_last_insn ();
3748 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, stack_pointer_rtx
, cfi_ops
);
3749 REG_NOTES (insn
) = cfi_ops
;
3750 RTX_FRAME_RELATED_P (insn
) = 1;
3753 /* We prefer to emit the combined return/authenticate instruction RETAA,
3754 however there are three cases in which we must instead emit an explicit
3755 authentication instruction.
3757 1) Sibcalls don't return in a normal way, so if we're about to call one
3758 we must authenticate.
3760 2) The RETAA instruction is not available before ARMv8.3-A, so if we are
3761 generating code for !TARGET_ARMV8_3 we can't use it and must
3762 explicitly authenticate.
3764 3) On an eh_return path we make extra stack adjustments to update the
3765 canonical frame address to be the exception handler's CFA. We want
3766 to authenticate using the CFA of the function which calls eh_return.
3768 if (aarch64_return_address_signing_enabled ()
3769 && (for_sibcall
|| !TARGET_ARMV8_3
|| crtl
->calls_eh_return
))
3771 insn
= emit_insn (gen_autisp ());
3772 add_reg_note (insn
, REG_CFA_TOGGLE_RA_MANGLE
, const0_rtx
);
3773 RTX_FRAME_RELATED_P (insn
) = 1;
3776 /* Stack adjustment for exception handler. */
3777 if (crtl
->calls_eh_return
)
3779 /* We need to unwind the stack by the offset computed by
3780 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
3781 to be SP; letting the CFA move during this adjustment
3782 is just as correct as retaining the CFA from the body
3783 of the function. Therefore, do nothing special. */
3784 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
3787 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
3789 emit_jump_insn (ret_rtx
);
3792 /* Implement EH_RETURN_HANDLER_RTX. EH returns need to either return
3793 normally or return to a previous frame after unwinding.
3795 An EH return uses a single shared return sequence. The epilogue is
3796 exactly like a normal epilogue except that it has an extra input
3797 register (EH_RETURN_STACKADJ_RTX) which contains the stack adjustment
3798 that must be applied after the frame has been destroyed. An extra label
3799 is inserted before the epilogue which initializes this register to zero,
3800 and this is the entry point for a normal return.
3802 An actual EH return updates the return address, initializes the stack
3803 adjustment and jumps directly into the epilogue (bypassing the zeroing
3804 of the adjustment). Since the return address is typically saved on the
3805 stack when a function makes a call, the saved LR must be updated outside
3808 This poses problems as the store is generated well before the epilogue,
3809 so the offset of LR is not known yet. Also optimizations will remove the
3810 store as it appears dead, even after the epilogue is generated (as the
3811 base or offset for loading LR is different in many cases).
3813 To avoid these problems this implementation forces the frame pointer
3814 in eh_return functions so that the location of LR is fixed and known early.
3815 It also marks the store volatile, so no optimization is permitted to
3816 remove the store. */
3818 aarch64_eh_return_handler_rtx (void)
3820 rtx tmp
= gen_frame_mem (Pmode
,
3821 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
3823 /* Mark the store volatile, so no optimization is permitted to remove it. */
3824 MEM_VOLATILE_P (tmp
) = true;
3828 /* Output code to add DELTA to the first argument, and then jump
3829 to FUNCTION. Used for C++ multiple inheritance. */
3831 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
3832 HOST_WIDE_INT delta
,
3833 HOST_WIDE_INT vcall_offset
,
3836 /* The this pointer is always in x0. Note that this differs from
3837 Arm where the this pointer maybe bumped to r1 if r0 is required
3838 to return a pointer to an aggregate. On AArch64 a result value
3839 pointer will be in x8. */
3840 int this_regno
= R0_REGNUM
;
3841 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
3844 reload_completed
= 1;
3845 emit_note (NOTE_INSN_PROLOGUE_END
);
3847 if (vcall_offset
== 0)
3848 aarch64_add_constant (Pmode
, this_regno
, IP1_REGNUM
, delta
);
3851 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
3853 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
3854 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
3855 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
3860 if (delta
>= -256 && delta
< 256)
3861 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
3862 plus_constant (Pmode
, this_rtx
, delta
));
3864 aarch64_add_constant (Pmode
, this_regno
, IP1_REGNUM
, delta
);
3867 if (Pmode
== ptr_mode
)
3868 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
3870 aarch64_emit_move (temp0
,
3871 gen_rtx_ZERO_EXTEND (Pmode
,
3872 gen_rtx_MEM (ptr_mode
, addr
)));
3874 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
3875 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
3878 aarch64_internal_mov_immediate (temp1
, GEN_INT (vcall_offset
), true,
3880 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
3883 if (Pmode
== ptr_mode
)
3884 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
3886 aarch64_emit_move (temp1
,
3887 gen_rtx_SIGN_EXTEND (Pmode
,
3888 gen_rtx_MEM (ptr_mode
, addr
)));
3890 emit_insn (gen_add2_insn (this_rtx
, temp1
));
3893 /* Generate a tail call to the target function. */
3894 if (!TREE_USED (function
))
3896 assemble_external (function
);
3897 TREE_USED (function
) = 1;
3899 funexp
= XEXP (DECL_RTL (function
), 0);
3900 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
3901 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
3902 SIBLING_CALL_P (insn
) = 1;
3904 insn
= get_insns ();
3905 shorten_branches (insn
);
3906 final_start_function (insn
, file
, 1);
3907 final (insn
, file
, 1);
3908 final_end_function ();
3910 /* Stop pretending to be a post-reload pass. */
3911 reload_completed
= 0;
3915 aarch64_tls_referenced_p (rtx x
)
3917 if (!TARGET_HAVE_TLS
)
3919 subrtx_iterator::array_type array
;
3920 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3922 const_rtx x
= *iter
;
3923 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3925 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3926 TLS offsets, not real symbol references. */
3927 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3928 iter
.skip_subrtxes ();
3934 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3935 a left shift of 0 or 12 bits. */
3937 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3939 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3940 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3945 /* Return true if val is an immediate that can be loaded into a
3946 register by a MOVZ instruction. */
3948 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3950 if (GET_MODE_SIZE (mode
) > 4)
3952 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3953 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3958 /* Ignore sign extension. */
3959 val
&= (HOST_WIDE_INT
) 0xffffffff;
3961 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3962 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3965 /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3967 static const unsigned HOST_WIDE_INT bitmask_imm_mul
[] =
3969 0x0000000100000001ull
,
3970 0x0001000100010001ull
,
3971 0x0101010101010101ull
,
3972 0x1111111111111111ull
,
3973 0x5555555555555555ull
,
3977 /* Return true if val is a valid bitmask immediate. */
3980 aarch64_bitmask_imm (HOST_WIDE_INT val_in
, machine_mode mode
)
3982 unsigned HOST_WIDE_INT val
, tmp
, mask
, first_one
, next_one
;
3985 /* Check for a single sequence of one bits and return quickly if so.
3986 The special cases of all ones and all zeroes returns false. */
3987 val
= (unsigned HOST_WIDE_INT
) val_in
;
3988 tmp
= val
+ (val
& -val
);
3990 if (tmp
== (tmp
& -tmp
))
3991 return (val
+ 1) > 1;
3993 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3995 val
= (val
<< 32) | (val
& 0xffffffff);
3997 /* Invert if the immediate doesn't start with a zero bit - this means we
3998 only need to search for sequences of one bits. */
4002 /* Find the first set bit and set tmp to val with the first sequence of one
4003 bits removed. Return success if there is a single sequence of ones. */
4004 first_one
= val
& -val
;
4005 tmp
= val
& (val
+ first_one
);
4010 /* Find the next set bit and compute the difference in bit position. */
4011 next_one
= tmp
& -tmp
;
4012 bits
= clz_hwi (first_one
) - clz_hwi (next_one
);
4015 /* Check the bit position difference is a power of 2, and that the first
4016 sequence of one bits fits within 'bits' bits. */
4017 if ((mask
>> bits
) != 0 || bits
!= (bits
& -bits
))
4020 /* Check the sequence of one bits is repeated 64/bits times. */
4021 return val
== mask
* bitmask_imm_mul
[__builtin_clz (bits
) - 26];
4024 /* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
4025 Assumed precondition: VAL_IN Is not zero. */
4027 unsigned HOST_WIDE_INT
4028 aarch64_and_split_imm1 (HOST_WIDE_INT val_in
)
4030 int lowest_bit_set
= ctz_hwi (val_in
);
4031 int highest_bit_set
= floor_log2 (val_in
);
4032 gcc_assert (val_in
!= 0);
4034 return ((HOST_WIDE_INT_UC (2) << highest_bit_set
) -
4035 (HOST_WIDE_INT_1U
<< lowest_bit_set
));
4038 /* Create constant where bits outside of lowest bit set to highest bit set
4041 unsigned HOST_WIDE_INT
4042 aarch64_and_split_imm2 (HOST_WIDE_INT val_in
)
4044 return val_in
| ~aarch64_and_split_imm1 (val_in
);
4047 /* Return true if VAL_IN is a valid 'and' bitmask immediate. */
4050 aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in
, machine_mode mode
)
4052 if (aarch64_bitmask_imm (val_in
, mode
))
4055 if (aarch64_move_imm (val_in
, mode
))
4058 unsigned HOST_WIDE_INT imm2
= aarch64_and_split_imm2 (val_in
);
4060 return aarch64_bitmask_imm (imm2
, mode
);
4063 /* Return true if val is an immediate that can be loaded into a
4064 register in a single instruction. */
4066 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
4068 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
4070 return aarch64_bitmask_imm (val
, mode
);
4074 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
4078 if (GET_CODE (x
) == HIGH
)
4081 split_const (x
, &base
, &offset
);
4082 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
4084 if (aarch64_classify_symbol (base
, offset
)
4085 != SYMBOL_FORCE_TO_MEM
)
4088 /* Avoid generating a 64-bit relocation in ILP32; leave
4089 to aarch64_expand_mov_immediate to handle it properly. */
4090 return mode
!= ptr_mode
;
4093 return aarch64_tls_referenced_p (x
);
4096 /* Implement TARGET_CASE_VALUES_THRESHOLD.
4097 The expansion for a table switch is quite expensive due to the number
4098 of instructions, the table lookup and hard to predict indirect jump.
4099 When optimizing for speed, and -O3 enabled, use the per-core tuning if
4100 set, otherwise use tables for > 16 cases as a tradeoff between size and
4101 performance. When optimizing for size, use the default setting. */
4104 aarch64_case_values_threshold (void)
4106 /* Use the specified limit for the number of cases before using jump
4107 tables at higher optimization levels. */
4109 && selected_cpu
->tune
->max_case_values
!= 0)
4110 return selected_cpu
->tune
->max_case_values
;
4112 return optimize_size
? default_case_values_threshold () : 17;
4115 /* Return true if register REGNO is a valid index register.
4116 STRICT_P is true if REG_OK_STRICT is in effect. */
4119 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
4121 if (!HARD_REGISTER_NUM_P (regno
))
4129 regno
= reg_renumber
[regno
];
4131 return GP_REGNUM_P (regno
);
4134 /* Return true if register REGNO is a valid base register for mode MODE.
4135 STRICT_P is true if REG_OK_STRICT is in effect. */
4138 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
4140 if (!HARD_REGISTER_NUM_P (regno
))
4148 regno
= reg_renumber
[regno
];
4151 /* The fake registers will be eliminated to either the stack or
4152 hard frame pointer, both of which are usually valid base registers.
4153 Reload deals with the cases where the eliminated form isn't valid. */
4154 return (GP_REGNUM_P (regno
)
4155 || regno
== SP_REGNUM
4156 || regno
== FRAME_POINTER_REGNUM
4157 || regno
== ARG_POINTER_REGNUM
);
4160 /* Return true if X is a valid base register for mode MODE.
4161 STRICT_P is true if REG_OK_STRICT is in effect. */
4164 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
4166 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
4169 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
4172 /* Return true if address offset is a valid index. If it is, fill in INFO
4173 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
4176 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
4177 machine_mode mode
, bool strict_p
)
4179 enum aarch64_address_type type
;
4184 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
4185 && GET_MODE (x
) == Pmode
)
4187 type
= ADDRESS_REG_REG
;
4191 /* (sign_extend:DI (reg:SI)) */
4192 else if ((GET_CODE (x
) == SIGN_EXTEND
4193 || GET_CODE (x
) == ZERO_EXTEND
)
4194 && GET_MODE (x
) == DImode
4195 && GET_MODE (XEXP (x
, 0)) == SImode
)
4197 type
= (GET_CODE (x
) == SIGN_EXTEND
)
4198 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4199 index
= XEXP (x
, 0);
4202 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
4203 else if (GET_CODE (x
) == MULT
4204 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
4205 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
4206 && GET_MODE (XEXP (x
, 0)) == DImode
4207 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
4208 && CONST_INT_P (XEXP (x
, 1)))
4210 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4211 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4212 index
= XEXP (XEXP (x
, 0), 0);
4213 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
4215 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
4216 else if (GET_CODE (x
) == ASHIFT
4217 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
4218 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
4219 && GET_MODE (XEXP (x
, 0)) == DImode
4220 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
4221 && CONST_INT_P (XEXP (x
, 1)))
4223 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4224 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4225 index
= XEXP (XEXP (x
, 0), 0);
4226 shift
= INTVAL (XEXP (x
, 1));
4228 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
4229 else if ((GET_CODE (x
) == SIGN_EXTRACT
4230 || GET_CODE (x
) == ZERO_EXTRACT
)
4231 && GET_MODE (x
) == DImode
4232 && GET_CODE (XEXP (x
, 0)) == MULT
4233 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4234 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
4236 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
4237 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4238 index
= XEXP (XEXP (x
, 0), 0);
4239 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
4240 if (INTVAL (XEXP (x
, 1)) != 32 + shift
4241 || INTVAL (XEXP (x
, 2)) != 0)
4244 /* (and:DI (mult:DI (reg:DI) (const_int scale))
4245 (const_int 0xffffffff<<shift)) */
4246 else if (GET_CODE (x
) == AND
4247 && GET_MODE (x
) == DImode
4248 && GET_CODE (XEXP (x
, 0)) == MULT
4249 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4250 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4251 && CONST_INT_P (XEXP (x
, 1)))
4253 type
= ADDRESS_REG_UXTW
;
4254 index
= XEXP (XEXP (x
, 0), 0);
4255 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
4256 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
4259 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
4260 else if ((GET_CODE (x
) == SIGN_EXTRACT
4261 || GET_CODE (x
) == ZERO_EXTRACT
)
4262 && GET_MODE (x
) == DImode
4263 && GET_CODE (XEXP (x
, 0)) == ASHIFT
4264 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4265 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
4267 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
4268 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
4269 index
= XEXP (XEXP (x
, 0), 0);
4270 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
4271 if (INTVAL (XEXP (x
, 1)) != 32 + shift
4272 || INTVAL (XEXP (x
, 2)) != 0)
4275 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
4276 (const_int 0xffffffff<<shift)) */
4277 else if (GET_CODE (x
) == AND
4278 && GET_MODE (x
) == DImode
4279 && GET_CODE (XEXP (x
, 0)) == ASHIFT
4280 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
4281 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4282 && CONST_INT_P (XEXP (x
, 1)))
4284 type
= ADDRESS_REG_UXTW
;
4285 index
= XEXP (XEXP (x
, 0), 0);
4286 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
4287 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
4290 /* (mult:P (reg:P) (const_int scale)) */
4291 else if (GET_CODE (x
) == MULT
4292 && GET_MODE (x
) == Pmode
4293 && GET_MODE (XEXP (x
, 0)) == Pmode
4294 && CONST_INT_P (XEXP (x
, 1)))
4296 type
= ADDRESS_REG_REG
;
4297 index
= XEXP (x
, 0);
4298 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
4300 /* (ashift:P (reg:P) (const_int shift)) */
4301 else if (GET_CODE (x
) == ASHIFT
4302 && GET_MODE (x
) == Pmode
4303 && GET_MODE (XEXP (x
, 0)) == Pmode
4304 && CONST_INT_P (XEXP (x
, 1)))
4306 type
= ADDRESS_REG_REG
;
4307 index
= XEXP (x
, 0);
4308 shift
= INTVAL (XEXP (x
, 1));
4313 if (GET_CODE (index
) == SUBREG
)
4314 index
= SUBREG_REG (index
);
4317 (shift
> 0 && shift
<= 3
4318 && (1 << shift
) == GET_MODE_SIZE (mode
)))
4320 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
4323 info
->offset
= index
;
4324 info
->shift
= shift
;
4331 /* Return true if MODE is one of the modes for which we
4332 support LDP/STP operations. */
4335 aarch64_mode_valid_for_sched_fusion_p (machine_mode mode
)
4337 return mode
== SImode
|| mode
== DImode
4338 || mode
== SFmode
|| mode
== DFmode
4339 || (aarch64_vector_mode_supported_p (mode
)
4340 && GET_MODE_SIZE (mode
) == 8);
4343 /* Return true if REGNO is a virtual pointer register, or an eliminable
4344 "soft" frame register. Like REGNO_PTR_FRAME_P except that we don't
4345 include stack_pointer or hard_frame_pointer. */
4347 virt_or_elim_regno_p (unsigned regno
)
4349 return ((regno
>= FIRST_VIRTUAL_REGISTER
4350 && regno
<= LAST_VIRTUAL_POINTER_REGISTER
)
4351 || regno
== FRAME_POINTER_REGNUM
4352 || regno
== ARG_POINTER_REGNUM
);
4355 /* Return true if X is a valid address for machine mode MODE. If it is,
4356 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
4357 effect. OUTER_CODE is PARALLEL for a load/store pair. */
4360 aarch64_classify_address (struct aarch64_address_info
*info
,
4361 rtx x
, machine_mode mode
,
4362 RTX_CODE outer_code
, bool strict_p
)
4364 enum rtx_code code
= GET_CODE (x
);
4367 /* On BE, we use load/store pair for all large int mode load/stores.
4368 TI/TFmode may also use a load/store pair. */
4369 bool load_store_pair_p
= (outer_code
== PARALLEL
4372 || (BYTES_BIG_ENDIAN
4373 && aarch64_vect_struct_mode_p (mode
)));
4375 bool allow_reg_index_p
=
4377 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
4378 && !aarch64_vect_struct_mode_p (mode
);
4380 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
4382 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
4383 && (code
!= POST_INC
&& code
!= REG
))
4390 info
->type
= ADDRESS_REG_IMM
;
4392 info
->offset
= const0_rtx
;
4393 return aarch64_base_register_rtx_p (x
, strict_p
);
4401 && virt_or_elim_regno_p (REGNO (op0
))
4402 && CONST_INT_P (op1
))
4404 info
->type
= ADDRESS_REG_IMM
;
4411 if (GET_MODE_SIZE (mode
) != 0
4412 && CONST_INT_P (op1
)
4413 && aarch64_base_register_rtx_p (op0
, strict_p
))
4415 HOST_WIDE_INT offset
= INTVAL (op1
);
4417 info
->type
= ADDRESS_REG_IMM
;
4421 /* TImode and TFmode values are allowed in both pairs of X
4422 registers and individual Q registers. The available
4424 X,X: 7-bit signed scaled offset
4425 Q: 9-bit signed offset
4426 We conservatively require an offset representable in either mode.
4427 When performing the check for pairs of X registers i.e. LDP/STP
4428 pass down DImode since that is the natural size of the LDP/STP
4429 instruction memory accesses. */
4430 if (mode
== TImode
|| mode
== TFmode
)
4431 return (aarch64_offset_7bit_signed_scaled_p (DImode
, offset
)
4432 && (offset_9bit_signed_unscaled_p (mode
, offset
)
4433 || offset_12bit_unsigned_scaled_p (mode
, offset
)));
4435 /* A 7bit offset check because OImode will emit a ldp/stp
4436 instruction (only big endian will get here).
4437 For ldp/stp instructions, the offset is scaled for the size of a
4438 single element of the pair. */
4440 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
4442 /* Three 9/12 bit offsets checks because CImode will emit three
4443 ldr/str instructions (only big endian will get here). */
4445 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
4446 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
4447 || offset_12bit_unsigned_scaled_p (V16QImode
,
4450 /* Two 7bit offsets checks because XImode will emit two ldp/stp
4451 instructions (only big endian will get here). */
4453 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
4454 && aarch64_offset_7bit_signed_scaled_p (TImode
,
4457 if (load_store_pair_p
)
4458 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
4459 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
4461 return (offset_9bit_signed_unscaled_p (mode
, offset
)
4462 || offset_12bit_unsigned_scaled_p (mode
, offset
));
4465 if (allow_reg_index_p
)
4467 /* Look for base + (scaled/extended) index register. */
4468 if (aarch64_base_register_rtx_p (op0
, strict_p
)
4469 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
4474 if (aarch64_base_register_rtx_p (op1
, strict_p
)
4475 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
4488 info
->type
= ADDRESS_REG_WB
;
4489 info
->base
= XEXP (x
, 0);
4490 info
->offset
= NULL_RTX
;
4491 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
4495 info
->type
= ADDRESS_REG_WB
;
4496 info
->base
= XEXP (x
, 0);
4497 if (GET_CODE (XEXP (x
, 1)) == PLUS
4498 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
4499 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
4500 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
4502 HOST_WIDE_INT offset
;
4503 info
->offset
= XEXP (XEXP (x
, 1), 1);
4504 offset
= INTVAL (info
->offset
);
4506 /* TImode and TFmode values are allowed in both pairs of X
4507 registers and individual Q registers. The available
4509 X,X: 7-bit signed scaled offset
4510 Q: 9-bit signed offset
4511 We conservatively require an offset representable in either mode.
4513 if (mode
== TImode
|| mode
== TFmode
)
4514 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
4515 && offset_9bit_signed_unscaled_p (mode
, offset
));
4517 if (load_store_pair_p
)
4518 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
4519 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
4521 return offset_9bit_signed_unscaled_p (mode
, offset
);
4528 /* load literal: pc-relative constant pool entry. Only supported
4529 for SI mode or larger. */
4530 info
->type
= ADDRESS_SYMBOLIC
;
4532 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
4536 split_const (x
, &sym
, &addend
);
4537 return ((GET_CODE (sym
) == LABEL_REF
4538 || (GET_CODE (sym
) == SYMBOL_REF
4539 && CONSTANT_POOL_ADDRESS_P (sym
)
4540 && aarch64_pcrelative_literal_loads
)));
4545 info
->type
= ADDRESS_LO_SUM
;
4546 info
->base
= XEXP (x
, 0);
4547 info
->offset
= XEXP (x
, 1);
4548 if (allow_reg_index_p
4549 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
4552 split_const (info
->offset
, &sym
, &offs
);
4553 if (GET_CODE (sym
) == SYMBOL_REF
4554 && (aarch64_classify_symbol (sym
, offs
) == SYMBOL_SMALL_ABSOLUTE
))
4556 /* The symbol and offset must be aligned to the access size. */
4558 unsigned int ref_size
;
4560 if (CONSTANT_POOL_ADDRESS_P (sym
))
4561 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
4562 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
4564 tree exp
= SYMBOL_REF_DECL (sym
);
4565 align
= TYPE_ALIGN (TREE_TYPE (exp
));
4566 align
= CONSTANT_ALIGNMENT (exp
, align
);
4568 else if (SYMBOL_REF_DECL (sym
))
4569 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
4570 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
4571 && SYMBOL_REF_BLOCK (sym
) != NULL
)
4572 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
4574 align
= BITS_PER_UNIT
;
4576 ref_size
= GET_MODE_SIZE (mode
);
4578 ref_size
= GET_MODE_SIZE (DImode
);
4580 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
4581 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
4591 /* Return true if the address X is valid for a PRFM instruction.
4592 STRICT_P is true if we should do strict checking with
4593 aarch64_classify_address. */
4596 aarch64_address_valid_for_prefetch_p (rtx x
, bool strict_p
)
4598 struct aarch64_address_info addr
;
4600 /* PRFM accepts the same addresses as DImode... */
4601 bool res
= aarch64_classify_address (&addr
, x
, DImode
, MEM
, strict_p
);
4605 /* ... except writeback forms. */
4606 return addr
.type
!= ADDRESS_REG_WB
;
4610 aarch64_symbolic_address_p (rtx x
)
4614 split_const (x
, &x
, &offset
);
4615 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
4618 /* Classify the base of symbolic expression X. */
4620 enum aarch64_symbol_type
4621 aarch64_classify_symbolic_expression (rtx x
)
4625 split_const (x
, &x
, &offset
);
4626 return aarch64_classify_symbol (x
, offset
);
4630 /* Return TRUE if X is a legitimate address for accessing memory in
4633 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
4635 struct aarch64_address_info addr
;
4637 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
4640 /* Return TRUE if X is a legitimate address for accessing memory in
4641 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
4644 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
4645 RTX_CODE outer_code
, bool strict_p
)
4647 struct aarch64_address_info addr
;
4649 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
4652 /* Split an out-of-range address displacement into a base and offset.
4653 Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
4654 to increase opportunities for sharing the base address of different sizes.
4655 For unaligned accesses and TI/TF mode use the signed 9-bit range. */
4657 aarch64_legitimize_address_displacement (rtx
*disp
, rtx
*off
, machine_mode mode
)
4659 HOST_WIDE_INT offset
= INTVAL (*disp
);
4660 HOST_WIDE_INT base
= offset
& ~(GET_MODE_SIZE (mode
) < 4 ? 0xfff : 0x3ffc);
4662 if (mode
== TImode
|| mode
== TFmode
4663 || (offset
& (GET_MODE_SIZE (mode
) - 1)) != 0)
4664 base
= (offset
+ 0x100) & ~0x1ff;
4666 *off
= GEN_INT (base
);
4667 *disp
= GEN_INT (offset
- base
);
4671 /* Return TRUE if rtx X is immediate constant 0.0 */
4673 aarch64_float_const_zero_rtx_p (rtx x
)
4675 if (GET_MODE (x
) == VOIDmode
)
4678 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x
)))
4679 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
4680 return real_equal (CONST_DOUBLE_REAL_VALUE (x
), &dconst0
);
4683 /* Return the fixed registers used for condition codes. */
4686 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
4689 *p2
= INVALID_REGNUM
;
4693 /* This function is used by the call expanders of the machine description.
4694 RESULT is the register in which the result is returned. It's NULL for
4695 "call" and "sibcall".
4696 MEM is the location of the function call.
4697 SIBCALL indicates whether this function call is normal call or sibling call.
4698 It will generate different pattern accordingly. */
4701 aarch64_expand_call (rtx result
, rtx mem
, bool sibcall
)
4703 rtx call
, callee
, tmp
;
4707 gcc_assert (MEM_P (mem
));
4708 callee
= XEXP (mem
, 0);
4709 mode
= GET_MODE (callee
);
4710 gcc_assert (mode
== Pmode
);
4712 /* Decide if we should generate indirect calls by loading the
4713 address of the callee into a register before performing
4714 the branch-and-link. */
4715 if (SYMBOL_REF_P (callee
)
4716 ? (aarch64_is_long_call_p (callee
)
4717 || aarch64_is_noplt_call_p (callee
))
4719 XEXP (mem
, 0) = force_reg (mode
, callee
);
4721 call
= gen_rtx_CALL (VOIDmode
, mem
, const0_rtx
);
4723 if (result
!= NULL_RTX
)
4724 call
= gen_rtx_SET (result
, call
);
4729 tmp
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (Pmode
, LR_REGNUM
));
4731 vec
= gen_rtvec (2, call
, tmp
);
4732 call
= gen_rtx_PARALLEL (VOIDmode
, vec
);
4734 aarch64_emit_call_insn (call
);
4737 /* Emit call insn with PAT and do aarch64-specific handling. */
4740 aarch64_emit_call_insn (rtx pat
)
4742 rtx insn
= emit_call_insn (pat
);
4744 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
4745 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
4746 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
4750 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
4752 /* All floating point compares return CCFP if it is an equality
4753 comparison, and CCFPE otherwise. */
4754 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
4781 /* Equality comparisons of short modes against zero can be performed
4782 using the TST instruction with the appropriate bitmask. */
4783 if (y
== const0_rtx
&& REG_P (x
)
4784 && (code
== EQ
|| code
== NE
)
4785 && (GET_MODE (x
) == HImode
|| GET_MODE (x
) == QImode
))
4788 /* Similarly, comparisons of zero_extends from shorter modes can
4789 be performed using an ANDS with an immediate mask. */
4790 if (y
== const0_rtx
&& GET_CODE (x
) == ZERO_EXTEND
4791 && (GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4792 && (GET_MODE (XEXP (x
, 0)) == HImode
|| GET_MODE (XEXP (x
, 0)) == QImode
)
4793 && (code
== EQ
|| code
== NE
))
4796 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4798 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
4799 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
4800 || GET_CODE (x
) == NEG
4801 || (GET_CODE (x
) == ZERO_EXTRACT
&& CONST_INT_P (XEXP (x
, 1))
4802 && CONST_INT_P (XEXP (x
, 2)))))
4805 /* A compare with a shifted operand. Because of canonicalization,
4806 the comparison will have to be swapped when we emit the assembly
4808 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4809 && (REG_P (y
) || GET_CODE (y
) == SUBREG
|| y
== const0_rtx
)
4810 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
4811 || GET_CODE (x
) == LSHIFTRT
4812 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
4815 /* Similarly for a negated operand, but we can only do this for
4817 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
4818 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
4819 && (code
== EQ
|| code
== NE
)
4820 && GET_CODE (x
) == NEG
)
4823 /* A test for unsigned overflow. */
4824 if ((GET_MODE (x
) == DImode
|| GET_MODE (x
) == TImode
)
4826 && GET_CODE (x
) == PLUS
4827 && GET_CODE (y
) == ZERO_EXTEND
)
4830 /* For everything else, return CCmode. */
4835 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
4838 aarch64_get_condition_code (rtx x
)
4840 machine_mode mode
= GET_MODE (XEXP (x
, 0));
4841 enum rtx_code comp_code
= GET_CODE (x
);
4843 if (GET_MODE_CLASS (mode
) != MODE_CC
)
4844 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
4845 return aarch64_get_condition_code_1 (mode
, comp_code
);
4849 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
4857 case GE
: return AARCH64_GE
;
4858 case GT
: return AARCH64_GT
;
4859 case LE
: return AARCH64_LS
;
4860 case LT
: return AARCH64_MI
;
4861 case NE
: return AARCH64_NE
;
4862 case EQ
: return AARCH64_EQ
;
4863 case ORDERED
: return AARCH64_VC
;
4864 case UNORDERED
: return AARCH64_VS
;
4865 case UNLT
: return AARCH64_LT
;
4866 case UNLE
: return AARCH64_LE
;
4867 case UNGT
: return AARCH64_HI
;
4868 case UNGE
: return AARCH64_PL
;
4876 case NE
: return AARCH64_NE
;
4877 case EQ
: return AARCH64_EQ
;
4878 case GE
: return AARCH64_GE
;
4879 case GT
: return AARCH64_GT
;
4880 case LE
: return AARCH64_LE
;
4881 case LT
: return AARCH64_LT
;
4882 case GEU
: return AARCH64_CS
;
4883 case GTU
: return AARCH64_HI
;
4884 case LEU
: return AARCH64_LS
;
4885 case LTU
: return AARCH64_CC
;
4893 case NE
: return AARCH64_NE
;
4894 case EQ
: return AARCH64_EQ
;
4895 case GE
: return AARCH64_LE
;
4896 case GT
: return AARCH64_LT
;
4897 case LE
: return AARCH64_GE
;
4898 case LT
: return AARCH64_GT
;
4899 case GEU
: return AARCH64_LS
;
4900 case GTU
: return AARCH64_CC
;
4901 case LEU
: return AARCH64_CS
;
4902 case LTU
: return AARCH64_HI
;
4910 case NE
: return AARCH64_NE
;
4911 case EQ
: return AARCH64_EQ
;
4912 case GE
: return AARCH64_PL
;
4913 case LT
: return AARCH64_MI
;
4921 case NE
: return AARCH64_NE
;
4922 case EQ
: return AARCH64_EQ
;
4930 case NE
: return AARCH64_CS
;
4931 case EQ
: return AARCH64_CC
;
4944 aarch64_const_vec_all_same_in_range_p (rtx x
,
4945 HOST_WIDE_INT minval
,
4946 HOST_WIDE_INT maxval
)
4948 HOST_WIDE_INT firstval
;
4951 if (GET_CODE (x
) != CONST_VECTOR
4952 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
4955 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
4956 if (firstval
< minval
|| firstval
> maxval
)
4959 count
= CONST_VECTOR_NUNITS (x
);
4960 for (i
= 1; i
< count
; i
++)
4961 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
4968 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
4970 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
4975 #define AARCH64_CC_V 1
4976 #define AARCH64_CC_C (1 << 1)
4977 #define AARCH64_CC_Z (1 << 2)
4978 #define AARCH64_CC_N (1 << 3)
4980 /* N Z C V flags for ccmp. Indexed by AARCH64_COND_CODE. */
4981 static const int aarch64_nzcv_codes
[] =
4983 0, /* EQ, Z == 1. */
4984 AARCH64_CC_Z
, /* NE, Z == 0. */
4985 0, /* CS, C == 1. */
4986 AARCH64_CC_C
, /* CC, C == 0. */
4987 0, /* MI, N == 1. */
4988 AARCH64_CC_N
, /* PL, N == 0. */
4989 0, /* VS, V == 1. */
4990 AARCH64_CC_V
, /* VC, V == 0. */
4991 0, /* HI, C ==1 && Z == 0. */
4992 AARCH64_CC_C
, /* LS, !(C == 1 && Z == 0). */
4993 AARCH64_CC_V
, /* GE, N == V. */
4994 0, /* LT, N != V. */
4995 AARCH64_CC_Z
, /* GT, Z == 0 && N == V. */
4996 0, /* LE, !(Z == 0 && N == V). */
5002 aarch64_print_operand (FILE *f
, rtx x
, int code
)
5006 /* An integer or symbol address without a preceding # sign. */
5008 switch (GET_CODE (x
))
5011 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
5015 output_addr_const (f
, x
);
5019 if (GET_CODE (XEXP (x
, 0)) == PLUS
5020 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5022 output_addr_const (f
, x
);
5028 output_operand_lossage ("Unsupported operand for code '%c'", code
);
5033 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
5037 if (!CONST_INT_P (x
)
5038 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
5040 output_operand_lossage ("invalid operand for '%%%c'", code
);
5056 output_operand_lossage ("invalid operand for '%%%c'", code
);
5066 /* Print N such that 2^N == X. */
5067 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
5069 output_operand_lossage ("invalid operand for '%%%c'", code
);
5073 asm_fprintf (f
, "%d", n
);
5078 /* Print the number of non-zero bits in X (a const_int). */
5079 if (!CONST_INT_P (x
))
5081 output_operand_lossage ("invalid operand for '%%%c'", code
);
5085 asm_fprintf (f
, "%u", popcount_hwi (INTVAL (x
)));
5089 /* Print the higher numbered register of a pair (TImode) of regs. */
5090 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
5092 output_operand_lossage ("invalid operand for '%%%c'", code
);
5096 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
5103 /* Print a condition (eq, ne, etc) or its inverse. */
5105 /* CONST_TRUE_RTX means al/nv (al is the default, don't print it). */
5106 if (x
== const_true_rtx
)
5113 if (!COMPARISON_P (x
))
5115 output_operand_lossage ("invalid operand for '%%%c'", code
);
5119 cond_code
= aarch64_get_condition_code (x
);
5120 gcc_assert (cond_code
>= 0);
5122 cond_code
= AARCH64_INVERSE_CONDITION_CODE (cond_code
);
5123 fputs (aarch64_condition_codes
[cond_code
], f
);
5132 /* Print a scalar FP/SIMD register name. */
5133 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5135 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5138 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
5145 /* Print the first FP/SIMD register name in a list. */
5146 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5148 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5151 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
5155 /* Print a scalar FP/SIMD register name + 1. */
5156 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
5158 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
5161 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
5165 /* Print bottom 16 bits of integer constant in hex. */
5166 if (!CONST_INT_P (x
))
5168 output_operand_lossage ("invalid operand for '%%%c'", code
);
5171 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
5176 /* Print a general register name or the zero register (32-bit or
5179 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
5181 asm_fprintf (f
, "%czr", code
);
5185 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
5187 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
5191 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
5193 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
5200 /* Print a normal operand, if it's a general register, then we
5204 output_operand_lossage ("missing operand");
5208 switch (GET_CODE (x
))
5211 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
5215 output_address (GET_MODE (x
), XEXP (x
, 0));
5221 output_addr_const (asm_out_file
, x
);
5225 asm_fprintf (f
, "%wd", INTVAL (x
));
5229 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
5232 aarch64_const_vec_all_same_in_range_p (x
,
5234 HOST_WIDE_INT_MAX
));
5235 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
5237 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
5246 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
5247 be getting CONST_DOUBLEs holding integers. */
5248 gcc_assert (GET_MODE (x
) != VOIDmode
);
5249 if (aarch64_float_const_zero_rtx_p (x
))
5254 else if (aarch64_float_const_representable_p (x
))
5257 char float_buf
[buf_size
] = {'\0'};
5258 real_to_decimal_for_mode (float_buf
,
5259 CONST_DOUBLE_REAL_VALUE (x
),
5262 asm_fprintf (asm_out_file
, "%s", float_buf
);
5266 output_operand_lossage ("invalid constant");
5269 output_operand_lossage ("invalid operand");
5275 if (GET_CODE (x
) == HIGH
)
5278 switch (aarch64_classify_symbolic_expression (x
))
5280 case SYMBOL_SMALL_GOT_4G
:
5281 asm_fprintf (asm_out_file
, ":got:");
5284 case SYMBOL_SMALL_TLSGD
:
5285 asm_fprintf (asm_out_file
, ":tlsgd:");
5288 case SYMBOL_SMALL_TLSDESC
:
5289 asm_fprintf (asm_out_file
, ":tlsdesc:");
5292 case SYMBOL_SMALL_TLSIE
:
5293 asm_fprintf (asm_out_file
, ":gottprel:");
5296 case SYMBOL_TLSLE24
:
5297 asm_fprintf (asm_out_file
, ":tprel:");
5300 case SYMBOL_TINY_GOT
:
5307 output_addr_const (asm_out_file
, x
);
5311 switch (aarch64_classify_symbolic_expression (x
))
5313 case SYMBOL_SMALL_GOT_4G
:
5314 asm_fprintf (asm_out_file
, ":lo12:");
5317 case SYMBOL_SMALL_TLSGD
:
5318 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
5321 case SYMBOL_SMALL_TLSDESC
:
5322 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
5325 case SYMBOL_SMALL_TLSIE
:
5326 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
5329 case SYMBOL_TLSLE12
:
5330 asm_fprintf (asm_out_file
, ":tprel_lo12:");
5333 case SYMBOL_TLSLE24
:
5334 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
5337 case SYMBOL_TINY_GOT
:
5338 asm_fprintf (asm_out_file
, ":got:");
5341 case SYMBOL_TINY_TLSIE
:
5342 asm_fprintf (asm_out_file
, ":gottprel:");
5348 output_addr_const (asm_out_file
, x
);
5353 switch (aarch64_classify_symbolic_expression (x
))
5355 case SYMBOL_TLSLE24
:
5356 asm_fprintf (asm_out_file
, ":tprel_hi12:");
5361 output_addr_const (asm_out_file
, x
);
5366 HOST_WIDE_INT cond_code
;
5369 if (!CONST_INT_P (x
))
5371 output_operand_lossage ("invalid operand for '%%%c'", code
);
5375 cond_code
= INTVAL (x
);
5376 gcc_assert (cond_code
>= 0 && cond_code
<= AARCH64_NV
);
5377 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
]);
5382 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
5388 aarch64_print_operand_address (FILE *f
, machine_mode mode
, rtx x
)
5390 struct aarch64_address_info addr
;
5392 if (aarch64_classify_address (&addr
, x
, mode
, MEM
, true))
5395 case ADDRESS_REG_IMM
:
5396 if (addr
.offset
== const0_rtx
)
5397 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
5399 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
5400 INTVAL (addr
.offset
));
5403 case ADDRESS_REG_REG
:
5404 if (addr
.shift
== 0)
5405 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
5406 reg_names
[REGNO (addr
.offset
)]);
5408 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
5409 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
5412 case ADDRESS_REG_UXTW
:
5413 if (addr
.shift
== 0)
5414 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
5415 REGNO (addr
.offset
) - R0_REGNUM
);
5417 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
5418 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
5421 case ADDRESS_REG_SXTW
:
5422 if (addr
.shift
== 0)
5423 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
5424 REGNO (addr
.offset
) - R0_REGNUM
);
5426 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
5427 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
5430 case ADDRESS_REG_WB
:
5431 switch (GET_CODE (x
))
5434 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
5435 GET_MODE_SIZE (mode
));
5438 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
5439 GET_MODE_SIZE (mode
));
5442 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
5443 GET_MODE_SIZE (mode
));
5446 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
5447 GET_MODE_SIZE (mode
));
5450 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
5451 INTVAL (addr
.offset
));
5454 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
5455 INTVAL (addr
.offset
));
5462 case ADDRESS_LO_SUM
:
5463 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
5464 output_addr_const (f
, addr
.offset
);
5465 asm_fprintf (f
, "]");
5468 case ADDRESS_SYMBOLIC
:
5472 output_addr_const (f
, x
);
5476 aarch64_label_mentioned_p (rtx x
)
5481 if (GET_CODE (x
) == LABEL_REF
)
5484 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
5485 referencing instruction, but they are constant offsets, not
5487 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
5490 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
5491 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
5497 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
5498 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
5501 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
5508 /* Implement REGNO_REG_CLASS. */
5511 aarch64_regno_regclass (unsigned regno
)
5513 if (GP_REGNUM_P (regno
))
5514 return GENERAL_REGS
;
5516 if (regno
== SP_REGNUM
)
5519 if (regno
== FRAME_POINTER_REGNUM
5520 || regno
== ARG_POINTER_REGNUM
)
5521 return POINTER_REGS
;
5523 if (FP_REGNUM_P (regno
))
5524 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
5530 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
5532 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
5533 where mask is selected by alignment and size of the offset.
5534 We try to pick as large a range for the offset as possible to
5535 maximize the chance of a CSE. However, for aligned addresses
5536 we limit the range to 4k so that structures with different sized
5537 elements are likely to use the same base. We need to be careful
5538 not to split a CONST for some forms of address expression, otherwise
5539 it will generate sub-optimal code. */
5541 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
5543 rtx base
= XEXP (x
, 0);
5544 rtx offset_rtx
= XEXP (x
, 1);
5545 HOST_WIDE_INT offset
= INTVAL (offset_rtx
);
5547 if (GET_CODE (base
) == PLUS
)
5549 rtx op0
= XEXP (base
, 0);
5550 rtx op1
= XEXP (base
, 1);
5552 /* Force any scaling into a temp for CSE. */
5553 op0
= force_reg (Pmode
, op0
);
5554 op1
= force_reg (Pmode
, op1
);
5556 /* Let the pointer register be in op0. */
5557 if (REG_POINTER (op1
))
5558 std::swap (op0
, op1
);
5560 /* If the pointer is virtual or frame related, then we know that
5561 virtual register instantiation or register elimination is going
5562 to apply a second constant. We want the two constants folded
5563 together easily. Therefore, emit as (OP0 + CONST) + OP1. */
5564 if (virt_or_elim_regno_p (REGNO (op0
)))
5566 base
= expand_binop (Pmode
, add_optab
, op0
, offset_rtx
,
5567 NULL_RTX
, true, OPTAB_DIRECT
);
5568 return gen_rtx_PLUS (Pmode
, base
, op1
);
5571 /* Otherwise, in order to encourage CSE (and thence loop strength
5572 reduce) scaled addresses, emit as (OP0 + OP1) + CONST. */
5573 base
= expand_binop (Pmode
, add_optab
, op0
, op1
,
5574 NULL_RTX
, true, OPTAB_DIRECT
);
5575 x
= gen_rtx_PLUS (Pmode
, base
, offset_rtx
);
5578 /* Does it look like we'll need a 16-byte load/store-pair operation? */
5579 HOST_WIDE_INT base_offset
;
5580 if (GET_MODE_SIZE (mode
) > 16)
5581 base_offset
= (offset
+ 0x400) & ~0x7f0;
5582 /* For offsets aren't a multiple of the access size, the limit is
5584 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
5586 base_offset
= (offset
+ 0x100) & ~0x1ff;
5588 /* BLKmode typically uses LDP of X-registers. */
5589 if (mode
== BLKmode
)
5590 base_offset
= (offset
+ 512) & ~0x3ff;
5592 /* Small negative offsets are supported. */
5593 else if (IN_RANGE (offset
, -256, 0))
5595 else if (mode
== TImode
|| mode
== TFmode
)
5596 base_offset
= (offset
+ 0x100) & ~0x1ff;
5597 /* Use 12-bit offset by access size. */
5599 base_offset
= offset
& (~0xfff * GET_MODE_SIZE (mode
));
5601 if (base_offset
!= 0)
5603 base
= plus_constant (Pmode
, base
, base_offset
);
5604 base
= force_operand (base
, NULL_RTX
);
5605 return plus_constant (Pmode
, base
, offset
- base_offset
);
5612 /* Return the reload icode required for a constant pool in mode. */
5613 static enum insn_code
5614 aarch64_constant_pool_reload_icode (machine_mode mode
)
5619 return CODE_FOR_aarch64_reload_movcpsfdi
;
5622 return CODE_FOR_aarch64_reload_movcpdfdi
;
5625 return CODE_FOR_aarch64_reload_movcptfdi
;
5628 return CODE_FOR_aarch64_reload_movcpv8qidi
;
5631 return CODE_FOR_aarch64_reload_movcpv16qidi
;
5634 return CODE_FOR_aarch64_reload_movcpv4hidi
;
5637 return CODE_FOR_aarch64_reload_movcpv8hidi
;
5640 return CODE_FOR_aarch64_reload_movcpv2sidi
;
5643 return CODE_FOR_aarch64_reload_movcpv4sidi
;
5646 return CODE_FOR_aarch64_reload_movcpv2didi
;
5649 return CODE_FOR_aarch64_reload_movcpv2dfdi
;
5658 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
5661 secondary_reload_info
*sri
)
5664 /* If we have to disable direct literal pool loads and stores because the
5665 function is too big, then we need a scratch register. */
5666 if (MEM_P (x
) && GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
)
5667 && (SCALAR_FLOAT_MODE_P (GET_MODE (x
))
5668 || targetm
.vector_mode_supported_p (GET_MODE (x
)))
5669 && !aarch64_pcrelative_literal_loads
)
5671 sri
->icode
= aarch64_constant_pool_reload_icode (mode
);
5675 /* Without the TARGET_SIMD instructions we cannot move a Q register
5676 to a Q register directly. We need a scratch. */
5677 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
5678 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
5679 && reg_class_subset_p (rclass
, FP_REGS
))
5682 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
5683 else if (mode
== TImode
)
5684 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
5688 /* A TFmode or TImode memory access should be handled via an FP_REGS
5689 because AArch64 has richer addressing modes for LDR/STR instructions
5690 than LDP/STP instructions. */
5691 if (TARGET_FLOAT
&& rclass
== GENERAL_REGS
5692 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
5695 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
5696 return GENERAL_REGS
;
5702 aarch64_can_eliminate (const int from
, const int to
)
5704 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
5705 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
5707 if (frame_pointer_needed
)
5709 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5711 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
5713 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
5714 && !cfun
->calls_alloca
)
5716 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5723 /* If we decided that we didn't need a leaf frame pointer but then used
5724 LR in the function, then we'll want a frame pointer after all, so
5725 prevent this elimination to ensure a frame pointer is used. */
5726 if (to
== STACK_POINTER_REGNUM
5727 && flag_omit_leaf_frame_pointer
5728 && df_regs_ever_live_p (LR_REGNUM
))
5736 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
5738 aarch64_layout_frame ();
5740 if (to
== HARD_FRAME_POINTER_REGNUM
)
5742 if (from
== ARG_POINTER_REGNUM
)
5743 return cfun
->machine
->frame
.hard_fp_offset
;
5745 if (from
== FRAME_POINTER_REGNUM
)
5746 return cfun
->machine
->frame
.hard_fp_offset
5747 - cfun
->machine
->frame
.locals_offset
;
5750 if (to
== STACK_POINTER_REGNUM
)
5752 if (from
== FRAME_POINTER_REGNUM
)
5753 return cfun
->machine
->frame
.frame_size
5754 - cfun
->machine
->frame
.locals_offset
;
5757 return cfun
->machine
->frame
.frame_size
;
5760 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
5764 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
5768 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
5773 aarch64_asm_trampoline_template (FILE *f
)
5777 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
5778 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
5782 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
5783 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
5785 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
5786 assemble_aligned_integer (4, const0_rtx
);
5787 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5788 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5792 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
5794 rtx fnaddr
, mem
, a_tramp
;
5795 const int tramp_code_sz
= 16;
5797 /* Don't need to copy the trailing D-words, we fill those in below. */
5798 emit_block_move (m_tramp
, assemble_trampoline_template (),
5799 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
5800 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
5801 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5802 if (GET_MODE (fnaddr
) != ptr_mode
)
5803 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
5804 emit_move_insn (mem
, fnaddr
);
5806 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
5807 emit_move_insn (mem
, chain_value
);
5809 /* XXX We should really define a "clear_cache" pattern and use
5810 gen_clear_cache(). */
5811 a_tramp
= XEXP (m_tramp
, 0);
5812 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
5813 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
5814 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
5818 static unsigned char
5819 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
5823 case CALLER_SAVE_REGS
:
5830 aarch64_vector_mode_p (mode
)
5831 ? (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
5832 : (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5846 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
5848 if (regclass
== POINTER_REGS
)
5849 return GENERAL_REGS
;
5851 if (regclass
== STACK_REG
)
5854 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
5860 /* If it's an integer immediate that MOVI can't handle, then
5861 FP_REGS is not an option, so we return NO_REGS instead. */
5862 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
5863 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
5866 /* Register eliminiation can result in a request for
5867 SP+constant->FP_REGS. We cannot support such operations which
5868 use SP as source and an FP_REG as destination, so reject out
5870 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
5872 rtx lhs
= XEXP (x
, 0);
5874 /* Look through a possible SUBREG introduced by ILP32. */
5875 if (GET_CODE (lhs
) == SUBREG
)
5876 lhs
= SUBREG_REG (lhs
);
5878 gcc_assert (REG_P (lhs
));
5879 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
5888 aarch64_asm_output_labelref (FILE* f
, const char *name
)
5890 asm_fprintf (f
, "%U%s", name
);
5894 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
5896 if (priority
== DEFAULT_INIT_PRIORITY
)
5897 default_ctor_section_asm_out_constructor (symbol
, priority
);
5901 /* While priority is known to be in range [0, 65535], so 18 bytes
5902 would be enough, the compiler might not know that. To avoid
5903 -Wformat-truncation false positive, use a larger size. */
5905 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
5906 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5907 switch_to_section (s
);
5908 assemble_align (POINTER_SIZE
);
5909 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5914 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
5916 if (priority
== DEFAULT_INIT_PRIORITY
)
5917 default_dtor_section_asm_out_destructor (symbol
, priority
);
5921 /* While priority is known to be in range [0, 65535], so 18 bytes
5922 would be enough, the compiler might not know that. To avoid
5923 -Wformat-truncation false positive, use a larger size. */
5925 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
5926 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5927 switch_to_section (s
);
5928 assemble_align (POINTER_SIZE
);
5929 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5934 aarch64_output_casesi (rtx
*operands
)
5938 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5940 static const char *const patterns
[4][2] =
5943 "ldrb\t%w3, [%0,%w1,uxtw]",
5944 "add\t%3, %4, %w3, sxtb #2"
5947 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5948 "add\t%3, %4, %w3, sxth #2"
5951 "ldr\t%w3, [%0,%w1,uxtw #2]",
5952 "add\t%3, %4, %w3, sxtw #2"
5954 /* We assume that DImode is only generated when not optimizing and
5955 that we don't really need 64-bit address offsets. That would
5956 imply an object file with 8GB of code in a single function! */
5958 "ldr\t%w3, [%0,%w1,uxtw #2]",
5959 "add\t%3, %4, %w3, sxtw #2"
5963 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5965 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5967 gcc_assert (index
>= 0 && index
<= 3);
5969 /* Need to implement table size reduction, by chaning the code below. */
5970 output_asm_insn (patterns
[index
][0], operands
);
5971 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5972 snprintf (buf
, sizeof (buf
),
5973 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5974 output_asm_insn (buf
, operands
);
5975 output_asm_insn (patterns
[index
][1], operands
);
5976 output_asm_insn ("br\t%3", operands
);
5977 assemble_label (asm_out_file
, label
);
5982 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5983 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5987 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5989 if (shift
>= 0 && shift
<= 3)
5992 for (size
= 8; size
<= 32; size
*= 2)
5994 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5995 if (mask
== bits
<< shift
)
6002 /* Constant pools are per function only when PC relative
6003 literal loads are true or we are in the large memory
6007 aarch64_can_use_per_function_literal_pools_p (void)
6009 return (aarch64_pcrelative_literal_loads
6010 || aarch64_cmodel
== AARCH64_CMODEL_LARGE
);
6014 aarch64_use_blocks_for_constant_p (machine_mode
, const_rtx
)
6016 /* Fixme:: In an ideal world this would work similar
6017 to the logic in aarch64_select_rtx_section but this
6018 breaks bootstrap in gcc go. For now we workaround
6019 this by returning false here. */
6023 /* Select appropriate section for constants depending
6024 on where we place literal pools. */
6027 aarch64_select_rtx_section (machine_mode mode
,
6029 unsigned HOST_WIDE_INT align
)
6031 if (aarch64_can_use_per_function_literal_pools_p ())
6032 return function_section (current_function_decl
);
6034 return default_elf_select_rtx_section (mode
, x
, align
);
6037 /* Implement ASM_OUTPUT_POOL_EPILOGUE. */
6039 aarch64_asm_output_pool_epilogue (FILE *f
, const char *, tree
,
6040 HOST_WIDE_INT offset
)
6042 /* When using per-function literal pools, we must ensure that any code
6043 section is aligned to the minimal instruction length, lest we get
6044 errors from the assembler re "unaligned instructions". */
6045 if ((offset
& 3) && aarch64_can_use_per_function_literal_pools_p ())
6046 ASM_OUTPUT_ALIGN (f
, 2);
6051 /* Helper function for rtx cost calculation. Strip a shift expression
6052 from X. Returns the inner operand if successful, or the original
6053 expression on failure. */
6055 aarch64_strip_shift (rtx x
)
6059 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
6060 we can convert both to ROR during final output. */
6061 if ((GET_CODE (op
) == ASHIFT
6062 || GET_CODE (op
) == ASHIFTRT
6063 || GET_CODE (op
) == LSHIFTRT
6064 || GET_CODE (op
) == ROTATERT
6065 || GET_CODE (op
) == ROTATE
)
6066 && CONST_INT_P (XEXP (op
, 1)))
6067 return XEXP (op
, 0);
6069 if (GET_CODE (op
) == MULT
6070 && CONST_INT_P (XEXP (op
, 1))
6071 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
6072 return XEXP (op
, 0);
6077 /* Helper function for rtx cost calculation. Strip an extend
6078 expression from X. Returns the inner operand if successful, or the
6079 original expression on failure. We deal with a number of possible
6080 canonicalization variations here. */
6082 aarch64_strip_extend (rtx x
)
6086 /* Zero and sign extraction of a widened value. */
6087 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
6088 && XEXP (op
, 2) == const0_rtx
6089 && GET_CODE (XEXP (op
, 0)) == MULT
6090 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
6092 return XEXP (XEXP (op
, 0), 0);
6094 /* It can also be represented (for zero-extend) as an AND with an
6096 if (GET_CODE (op
) == AND
6097 && GET_CODE (XEXP (op
, 0)) == MULT
6098 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
6099 && CONST_INT_P (XEXP (op
, 1))
6100 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
6101 INTVAL (XEXP (op
, 1))) != 0)
6102 return XEXP (XEXP (op
, 0), 0);
6104 /* Now handle extended register, as this may also have an optional
6105 left shift by 1..4. */
6106 if (GET_CODE (op
) == ASHIFT
6107 && CONST_INT_P (XEXP (op
, 1))
6108 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
6111 if (GET_CODE (op
) == ZERO_EXTEND
6112 || GET_CODE (op
) == SIGN_EXTEND
)
6121 /* Return true iff CODE is a shift supported in combination
6122 with arithmetic instructions. */
6125 aarch64_shift_p (enum rtx_code code
)
6127 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
6130 /* Helper function for rtx cost calculation. Calculate the cost of
6131 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
6132 Return the calculated cost of the expression, recursing manually in to
6133 operands where needed. */
6136 aarch64_rtx_mult_cost (rtx x
, enum rtx_code code
, int outer
, bool speed
)
6139 const struct cpu_cost_table
*extra_cost
6140 = aarch64_tune_params
.insn_extra_cost
;
6142 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
6143 machine_mode mode
= GET_MODE (x
);
6145 gcc_checking_assert (code
== MULT
);
6150 if (VECTOR_MODE_P (mode
))
6151 mode
= GET_MODE_INNER (mode
);
6153 /* Integer multiply/fma. */
6154 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6156 /* The multiply will be canonicalized as a shift, cost it as such. */
6157 if (aarch64_shift_p (GET_CODE (x
))
6158 || (CONST_INT_P (op1
)
6159 && exact_log2 (INTVAL (op1
)) > 0))
6161 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
6162 || GET_CODE (op0
) == SIGN_EXTEND
;
6168 /* ARITH + shift-by-register. */
6169 cost
+= extra_cost
->alu
.arith_shift_reg
;
6171 /* ARITH + extended register. We don't have a cost field
6172 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
6173 cost
+= extra_cost
->alu
.extend_arith
;
6175 /* ARITH + shift-by-immediate. */
6176 cost
+= extra_cost
->alu
.arith_shift
;
6179 /* LSL (immediate). */
6180 cost
+= extra_cost
->alu
.shift
;
6183 /* Strip extends as we will have costed them in the case above. */
6185 op0
= aarch64_strip_extend (op0
);
6187 cost
+= rtx_cost (op0
, VOIDmode
, code
, 0, speed
);
6192 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
6193 compound and let the below cases handle it. After all, MNEG is a
6194 special-case alias of MSUB. */
6195 if (GET_CODE (op0
) == NEG
)
6197 op0
= XEXP (op0
, 0);
6201 /* Integer multiplies or FMAs have zero/sign extending variants. */
6202 if ((GET_CODE (op0
) == ZERO_EXTEND
6203 && GET_CODE (op1
) == ZERO_EXTEND
)
6204 || (GET_CODE (op0
) == SIGN_EXTEND
6205 && GET_CODE (op1
) == SIGN_EXTEND
))
6207 cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, MULT
, 0, speed
);
6208 cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, MULT
, 1, speed
);
6213 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
6214 cost
+= extra_cost
->mult
[0].extend_add
;
6216 /* MUL/SMULL/UMULL. */
6217 cost
+= extra_cost
->mult
[0].extend
;
6223 /* This is either an integer multiply or a MADD. In both cases
6224 we want to recurse and cost the operands. */
6225 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
6226 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
6232 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
6235 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
6244 /* Floating-point FMA/FMUL can also support negations of the
6245 operands, unless the rounding mode is upward or downward in
6246 which case FNMUL is different than FMUL with operand negation. */
6247 bool neg0
= GET_CODE (op0
) == NEG
;
6248 bool neg1
= GET_CODE (op1
) == NEG
;
6249 if (compound_p
|| !flag_rounding_math
|| (neg0
&& neg1
))
6252 op0
= XEXP (op0
, 0);
6254 op1
= XEXP (op1
, 0);
6258 /* FMADD/FNMADD/FNMSUB/FMSUB. */
6259 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6262 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
6265 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
6266 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
6272 aarch64_address_cost (rtx x
,
6274 addr_space_t as ATTRIBUTE_UNUSED
,
6277 enum rtx_code c
= GET_CODE (x
);
6278 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
.addr_cost
;
6279 struct aarch64_address_info info
;
6283 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
6285 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
6287 /* This is a CONST or SYMBOL ref which will be split
6288 in a different way depending on the code model in use.
6289 Cost it through the generic infrastructure. */
6290 int cost_symbol_ref
= rtx_cost (x
, Pmode
, MEM
, 1, speed
);
6291 /* Divide through by the cost of one instruction to
6292 bring it to the same units as the address costs. */
6293 cost_symbol_ref
/= COSTS_N_INSNS (1);
6294 /* The cost is then the cost of preparing the address,
6295 followed by an immediate (possibly 0) offset. */
6296 return cost_symbol_ref
+ addr_cost
->imm_offset
;
6300 /* This is most likely a jump table from a case
6302 return addr_cost
->register_offset
;
6308 case ADDRESS_LO_SUM
:
6309 case ADDRESS_SYMBOLIC
:
6310 case ADDRESS_REG_IMM
:
6311 cost
+= addr_cost
->imm_offset
;
6314 case ADDRESS_REG_WB
:
6315 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
6316 cost
+= addr_cost
->pre_modify
;
6317 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
6318 cost
+= addr_cost
->post_modify
;
6324 case ADDRESS_REG_REG
:
6325 cost
+= addr_cost
->register_offset
;
6328 case ADDRESS_REG_SXTW
:
6329 cost
+= addr_cost
->register_sextend
;
6332 case ADDRESS_REG_UXTW
:
6333 cost
+= addr_cost
->register_zextend
;
6343 /* For the sake of calculating the cost of the shifted register
6344 component, we can treat same sized modes in the same way. */
6345 switch (GET_MODE_BITSIZE (mode
))
6348 cost
+= addr_cost
->addr_scale_costs
.hi
;
6352 cost
+= addr_cost
->addr_scale_costs
.si
;
6356 cost
+= addr_cost
->addr_scale_costs
.di
;
6359 /* We can't tell, or this is a 128-bit vector. */
6361 cost
+= addr_cost
->addr_scale_costs
.ti
;
6369 /* Return the cost of a branch. If SPEED_P is true then the compiler is
6370 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
6374 aarch64_branch_cost (bool speed_p
, bool predictable_p
)
6376 /* When optimizing for speed, use the cost of unpredictable branches. */
6377 const struct cpu_branch_cost
*branch_costs
=
6378 aarch64_tune_params
.branch_costs
;
6380 if (!speed_p
|| predictable_p
)
6381 return branch_costs
->predictable
;
6383 return branch_costs
->unpredictable
;
6386 /* Return true if the RTX X in mode MODE is a zero or sign extract
6387 usable in an ADD or SUB (extended register) instruction. */
6389 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
6391 /* Catch add with a sign extract.
6392 This is add_<optab><mode>_multp2. */
6393 if (GET_CODE (x
) == SIGN_EXTRACT
6394 || GET_CODE (x
) == ZERO_EXTRACT
)
6396 rtx op0
= XEXP (x
, 0);
6397 rtx op1
= XEXP (x
, 1);
6398 rtx op2
= XEXP (x
, 2);
6400 if (GET_CODE (op0
) == MULT
6401 && CONST_INT_P (op1
)
6402 && op2
== const0_rtx
6403 && CONST_INT_P (XEXP (op0
, 1))
6404 && aarch64_is_extend_from_extract (mode
,
6411 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
6413 else if (GET_CODE (x
) == SIGN_EXTEND
6414 || GET_CODE (x
) == ZERO_EXTEND
)
6415 return REG_P (XEXP (x
, 0));
6421 aarch64_frint_unspec_p (unsigned int u
)
6439 /* Return true iff X is an rtx that will match an extr instruction
6440 i.e. as described in the *extr<mode>5_insn family of patterns.
6441 OP0 and OP1 will be set to the operands of the shifts involved
6442 on success and will be NULL_RTX otherwise. */
6445 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
6448 machine_mode mode
= GET_MODE (x
);
6450 *res_op0
= NULL_RTX
;
6451 *res_op1
= NULL_RTX
;
6453 if (GET_CODE (x
) != IOR
)
6459 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
6460 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
6462 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
6463 if (GET_CODE (op1
) == ASHIFT
)
6464 std::swap (op0
, op1
);
6466 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
6469 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
6470 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
6472 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
6473 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
6475 *res_op0
= XEXP (op0
, 0);
6476 *res_op1
= XEXP (op1
, 0);
6484 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
6485 storing it in *COST. Result is true if the total cost of the operation
6486 has now been calculated. */
6488 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
6492 enum rtx_code cmpcode
;
6494 if (COMPARISON_P (op0
))
6496 inner
= XEXP (op0
, 0);
6497 comparator
= XEXP (op0
, 1);
6498 cmpcode
= GET_CODE (op0
);
6503 comparator
= const0_rtx
;
6507 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
6509 /* Conditional branch. */
6510 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
6514 if (cmpcode
== NE
|| cmpcode
== EQ
)
6516 if (comparator
== const0_rtx
)
6518 /* TBZ/TBNZ/CBZ/CBNZ. */
6519 if (GET_CODE (inner
) == ZERO_EXTRACT
)
6521 *cost
+= rtx_cost (XEXP (inner
, 0), VOIDmode
,
6522 ZERO_EXTRACT
, 0, speed
);
6525 *cost
+= rtx_cost (inner
, VOIDmode
, cmpcode
, 0, speed
);
6530 else if (cmpcode
== LT
|| cmpcode
== GE
)
6533 if (comparator
== const0_rtx
)
6538 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
6541 if (GET_CODE (op1
) == COMPARE
)
6543 /* Increase cost of CCMP reg, 0, imm, CC to prefer CMP reg, 0. */
6544 if (XEXP (op1
, 1) == const0_rtx
)
6548 machine_mode mode
= GET_MODE (XEXP (op1
, 0));
6549 const struct cpu_cost_table
*extra_cost
6550 = aarch64_tune_params
.insn_extra_cost
;
6552 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6553 *cost
+= extra_cost
->alu
.arith
;
6555 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6560 /* It's a conditional operation based on the status flags,
6561 so it must be some flavor of CSEL. */
6563 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
6564 if (GET_CODE (op1
) == NEG
6565 || GET_CODE (op1
) == NOT
6566 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
6567 op1
= XEXP (op1
, 0);
6568 else if (GET_CODE (op1
) == ZERO_EXTEND
&& GET_CODE (op2
) == ZERO_EXTEND
)
6570 /* CSEL with zero-extension (*cmovdi_insn_uxtw). */
6571 op1
= XEXP (op1
, 0);
6572 op2
= XEXP (op2
, 0);
6575 *cost
+= rtx_cost (op1
, VOIDmode
, IF_THEN_ELSE
, 1, speed
);
6576 *cost
+= rtx_cost (op2
, VOIDmode
, IF_THEN_ELSE
, 2, speed
);
6580 /* We don't know what this is, cost all operands. */
6584 /* Check whether X is a bitfield operation of the form shift + extend that
6585 maps down to a UBFIZ/SBFIZ/UBFX/SBFX instruction. If so, return the
6586 operand to which the bitfield operation is applied. Otherwise return
6590 aarch64_extend_bitfield_pattern_p (rtx x
)
6592 rtx_code outer_code
= GET_CODE (x
);
6593 machine_mode outer_mode
= GET_MODE (x
);
6595 if (outer_code
!= ZERO_EXTEND
&& outer_code
!= SIGN_EXTEND
6596 && outer_mode
!= SImode
&& outer_mode
!= DImode
)
6599 rtx inner
= XEXP (x
, 0);
6600 rtx_code inner_code
= GET_CODE (inner
);
6601 machine_mode inner_mode
= GET_MODE (inner
);
6607 if (CONST_INT_P (XEXP (inner
, 1))
6608 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6609 op
= XEXP (inner
, 0);
6612 if (outer_code
== ZERO_EXTEND
&& CONST_INT_P (XEXP (inner
, 1))
6613 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6614 op
= XEXP (inner
, 0);
6617 if (outer_code
== SIGN_EXTEND
&& CONST_INT_P (XEXP (inner
, 1))
6618 && (inner_mode
== QImode
|| inner_mode
== HImode
))
6619 op
= XEXP (inner
, 0);
6628 /* Return true if the mask and a shift amount from an RTX of the form
6629 (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of
6630 mode MODE. See the *andim_ashift<mode>_bfiz pattern. */
6633 aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode
, rtx mask
, rtx shft_amnt
)
6635 return CONST_INT_P (mask
) && CONST_INT_P (shft_amnt
)
6636 && INTVAL (shft_amnt
) < GET_MODE_BITSIZE (mode
)
6637 && exact_log2 ((INTVAL (mask
) >> INTVAL (shft_amnt
)) + 1) >= 0
6638 && (INTVAL (mask
) & ((1 << INTVAL (shft_amnt
)) - 1)) == 0;
6641 /* Calculate the cost of calculating X, storing it in *COST. Result
6642 is true if the total cost of the operation has now been calculated. */
6644 aarch64_rtx_costs (rtx x
, machine_mode mode
, int outer ATTRIBUTE_UNUSED
,
6645 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
6648 const struct cpu_cost_table
*extra_cost
6649 = aarch64_tune_params
.insn_extra_cost
;
6650 int code
= GET_CODE (x
);
6652 /* By default, assume that everything has equivalent cost to the
6653 cheapest instruction. Any additional costs are applied as a delta
6654 above this default. */
6655 *cost
= COSTS_N_INSNS (1);
6660 /* The cost depends entirely on the operands to SET. */
6665 switch (GET_CODE (op0
))
6670 rtx address
= XEXP (op0
, 0);
6671 if (VECTOR_MODE_P (mode
))
6672 *cost
+= extra_cost
->ldst
.storev
;
6673 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6674 *cost
+= extra_cost
->ldst
.store
;
6675 else if (mode
== SFmode
)
6676 *cost
+= extra_cost
->ldst
.storef
;
6677 else if (mode
== DFmode
)
6678 *cost
+= extra_cost
->ldst
.stored
;
6681 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6685 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
6689 if (! REG_P (SUBREG_REG (op0
)))
6690 *cost
+= rtx_cost (SUBREG_REG (op0
), VOIDmode
, SET
, 0, speed
);
6694 /* The cost is one per vector-register copied. */
6695 if (VECTOR_MODE_P (GET_MODE (op0
)) && REG_P (op1
))
6697 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
6698 / GET_MODE_SIZE (V4SImode
);
6699 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
6701 /* const0_rtx is in general free, but we will use an
6702 instruction to set a register to 0. */
6703 else if (REG_P (op1
) || op1
== const0_rtx
)
6705 /* The cost is 1 per register copied. */
6706 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
6708 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
6711 /* Cost is just the cost of the RHS of the set. */
6712 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
6717 /* Bit-field insertion. Strip any redundant widening of
6718 the RHS to meet the width of the target. */
6719 if (GET_CODE (op1
) == SUBREG
)
6720 op1
= SUBREG_REG (op1
);
6721 if ((GET_CODE (op1
) == ZERO_EXTEND
6722 || GET_CODE (op1
) == SIGN_EXTEND
)
6723 && CONST_INT_P (XEXP (op0
, 1))
6724 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
6725 >= INTVAL (XEXP (op0
, 1))))
6726 op1
= XEXP (op1
, 0);
6728 if (CONST_INT_P (op1
))
6730 /* MOV immediate is assumed to always be cheap. */
6731 *cost
= COSTS_N_INSNS (1);
6737 *cost
+= extra_cost
->alu
.bfi
;
6738 *cost
+= rtx_cost (op1
, VOIDmode
, (enum rtx_code
) code
, 1, speed
);
6744 /* We can't make sense of this, assume default cost. */
6745 *cost
= COSTS_N_INSNS (1);
6751 /* If an instruction can incorporate a constant within the
6752 instruction, the instruction's expression avoids calling
6753 rtx_cost() on the constant. If rtx_cost() is called on a
6754 constant, then it is usually because the constant must be
6755 moved into a register by one or more instructions.
6757 The exception is constant 0, which can be expressed
6758 as XZR/WZR and is therefore free. The exception to this is
6759 if we have (set (reg) (const0_rtx)) in which case we must cost
6760 the move. However, we can catch that when we cost the SET, so
6761 we don't need to consider that here. */
6762 if (x
== const0_rtx
)
6766 /* To an approximation, building any other constant is
6767 proportionally expensive to the number of instructions
6768 required to build that constant. This is true whether we
6769 are compiling for SPEED or otherwise. */
6770 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
6771 (NULL_RTX
, x
, false, mode
));
6778 /* mov[df,sf]_aarch64. */
6779 if (aarch64_float_const_representable_p (x
))
6780 /* FMOV (scalar immediate). */
6781 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
6782 else if (!aarch64_float_const_zero_rtx_p (x
))
6784 /* This will be a load from memory. */
6786 *cost
+= extra_cost
->ldst
.loadd
;
6788 *cost
+= extra_cost
->ldst
.loadf
;
6791 /* Otherwise this is +0.0. We get this using MOVI d0, #0
6792 or MOV v0.s[0], wzr - neither of which are modeled by the
6793 cost tables. Just use the default cost. */
6803 /* For loads we want the base cost of a load, plus an
6804 approximation for the additional cost of the addressing
6806 rtx address
= XEXP (x
, 0);
6807 if (VECTOR_MODE_P (mode
))
6808 *cost
+= extra_cost
->ldst
.loadv
;
6809 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6810 *cost
+= extra_cost
->ldst
.load
;
6811 else if (mode
== SFmode
)
6812 *cost
+= extra_cost
->ldst
.loadf
;
6813 else if (mode
== DFmode
)
6814 *cost
+= extra_cost
->ldst
.loadd
;
6817 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6826 if (VECTOR_MODE_P (mode
))
6831 *cost
+= extra_cost
->vect
.alu
;
6836 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6838 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
6839 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
6842 *cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, NEG
, 0, speed
);
6846 /* Cost this as SUB wzr, X. */
6847 op0
= CONST0_RTX (mode
);
6852 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6854 /* Support (neg(fma...)) as a single instruction only if
6855 sign of zeros is unimportant. This matches the decision
6856 making in aarch64.md. */
6857 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
6860 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
6863 if (GET_CODE (op0
) == MULT
)
6866 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
6871 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6881 if (VECTOR_MODE_P (mode
))
6882 *cost
+= extra_cost
->vect
.alu
;
6884 *cost
+= extra_cost
->alu
.clz
;
6893 if (op1
== const0_rtx
6894 && GET_CODE (op0
) == AND
)
6897 mode
= GET_MODE (op0
);
6901 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
6903 /* TODO: A write to the CC flags possibly costs extra, this
6904 needs encoding in the cost tables. */
6906 mode
= GET_MODE (op0
);
6908 if (GET_CODE (op0
) == AND
)
6914 if (GET_CODE (op0
) == PLUS
)
6916 /* ADDS (and CMN alias). */
6921 if (GET_CODE (op0
) == MINUS
)
6928 if (GET_CODE (op0
) == ZERO_EXTRACT
&& op1
== const0_rtx
6929 && GET_MODE (x
) == CC_NZmode
&& CONST_INT_P (XEXP (op0
, 1))
6930 && CONST_INT_P (XEXP (op0
, 2)))
6932 /* COMPARE of ZERO_EXTRACT form of TST-immediate.
6933 Handle it here directly rather than going to cost_logic
6934 since we know the immediate generated for the TST is valid
6935 so we can avoid creating an intermediate rtx for it only
6936 for costing purposes. */
6938 *cost
+= extra_cost
->alu
.logical
;
6940 *cost
+= rtx_cost (XEXP (op0
, 0), GET_MODE (op0
),
6941 ZERO_EXTRACT
, 0, speed
);
6945 if (GET_CODE (op1
) == NEG
)
6949 *cost
+= extra_cost
->alu
.arith
;
6951 *cost
+= rtx_cost (op0
, mode
, COMPARE
, 0, speed
);
6952 *cost
+= rtx_cost (XEXP (op1
, 0), mode
, NEG
, 1, speed
);
6958 Compare can freely swap the order of operands, and
6959 canonicalization puts the more complex operation first.
6960 But the integer MINUS logic expects the shift/extend
6961 operation in op1. */
6963 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
6971 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
6975 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6977 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
6979 *cost
+= rtx_cost (op0
, VOIDmode
, COMPARE
, 0, speed
);
6980 /* FCMP supports constant 0.0 for no extra cost. */
6986 if (VECTOR_MODE_P (mode
))
6988 /* Vector compare. */
6990 *cost
+= extra_cost
->vect
.alu
;
6992 if (aarch64_float_const_zero_rtx_p (op1
))
6994 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
7008 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed
);
7010 /* Detect valid immediates. */
7011 if ((GET_MODE_CLASS (mode
) == MODE_INT
7012 || (GET_MODE_CLASS (mode
) == MODE_CC
7013 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
7014 && CONST_INT_P (op1
)
7015 && aarch64_uimm12_shift (INTVAL (op1
)))
7018 /* SUB(S) (immediate). */
7019 *cost
+= extra_cost
->alu
.arith
;
7023 /* Look for SUB (extended register). */
7024 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
7027 *cost
+= extra_cost
->alu
.extend_arith
;
7029 op1
= aarch64_strip_extend (op1
);
7030 *cost
+= rtx_cost (op1
, VOIDmode
,
7031 (enum rtx_code
) GET_CODE (op1
), 0, speed
);
7035 rtx new_op1
= aarch64_strip_extend (op1
);
7037 /* Cost this as an FMA-alike operation. */
7038 if ((GET_CODE (new_op1
) == MULT
7039 || aarch64_shift_p (GET_CODE (new_op1
)))
7042 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
7043 (enum rtx_code
) code
,
7048 *cost
+= rtx_cost (new_op1
, VOIDmode
, MINUS
, 1, speed
);
7052 if (VECTOR_MODE_P (mode
))
7055 *cost
+= extra_cost
->vect
.alu
;
7057 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7060 *cost
+= extra_cost
->alu
.arith
;
7062 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7065 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7079 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
7080 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
7083 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, PLUS
, 0, speed
);
7084 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
7088 if (GET_MODE_CLASS (mode
) == MODE_INT
7089 && CONST_INT_P (op1
)
7090 && aarch64_uimm12_shift (INTVAL (op1
)))
7092 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed
);
7095 /* ADD (immediate). */
7096 *cost
+= extra_cost
->alu
.arith
;
7100 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
7102 /* Look for ADD (extended register). */
7103 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
7106 *cost
+= extra_cost
->alu
.extend_arith
;
7108 op0
= aarch64_strip_extend (op0
);
7109 *cost
+= rtx_cost (op0
, VOIDmode
,
7110 (enum rtx_code
) GET_CODE (op0
), 0, speed
);
7114 /* Strip any extend, leave shifts behind as we will
7115 cost them through mult_cost. */
7116 new_op0
= aarch64_strip_extend (op0
);
7118 if (GET_CODE (new_op0
) == MULT
7119 || aarch64_shift_p (GET_CODE (new_op0
)))
7121 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
7126 *cost
+= rtx_cost (new_op0
, VOIDmode
, PLUS
, 0, speed
);
7130 if (VECTOR_MODE_P (mode
))
7133 *cost
+= extra_cost
->vect
.alu
;
7135 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7138 *cost
+= extra_cost
->alu
.arith
;
7140 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7143 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7150 *cost
= COSTS_N_INSNS (1);
7154 if (VECTOR_MODE_P (mode
))
7155 *cost
+= extra_cost
->vect
.alu
;
7157 *cost
+= extra_cost
->alu
.rev
;
7162 if (aarch_rev16_p (x
))
7164 *cost
= COSTS_N_INSNS (1);
7168 if (VECTOR_MODE_P (mode
))
7169 *cost
+= extra_cost
->vect
.alu
;
7171 *cost
+= extra_cost
->alu
.rev
;
7176 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
7178 *cost
+= rtx_cost (op0
, mode
, IOR
, 0, speed
);
7179 *cost
+= rtx_cost (op1
, mode
, IOR
, 1, speed
);
7181 *cost
+= extra_cost
->alu
.shift
;
7192 if (VECTOR_MODE_P (mode
))
7195 *cost
+= extra_cost
->vect
.alu
;
7200 && GET_CODE (op0
) == MULT
7201 && CONST_INT_P (XEXP (op0
, 1))
7202 && CONST_INT_P (op1
)
7203 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
7206 /* This is a UBFM/SBFM. */
7207 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, ZERO_EXTRACT
, 0, speed
);
7209 *cost
+= extra_cost
->alu
.bfx
;
7213 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7215 if (CONST_INT_P (op1
))
7217 /* We have a mask + shift version of a UBFIZ
7218 i.e. the *andim_ashift<mode>_bfiz pattern. */
7219 if (GET_CODE (op0
) == ASHIFT
7220 && aarch64_mask_and_shift_for_ubfiz_p (mode
, op1
,
7223 *cost
+= rtx_cost (XEXP (op0
, 0), mode
,
7224 (enum rtx_code
) code
, 0, speed
);
7226 *cost
+= extra_cost
->alu
.bfx
;
7230 else if (aarch64_bitmask_imm (INTVAL (op1
), mode
))
7232 /* We possibly get the immediate for free, this is not
7234 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7236 *cost
+= extra_cost
->alu
.logical
;
7245 /* Handle ORN, EON, or BIC. */
7246 if (GET_CODE (op0
) == NOT
)
7247 op0
= XEXP (op0
, 0);
7249 new_op0
= aarch64_strip_shift (op0
);
7251 /* If we had a shift on op0 then this is a logical-shift-
7252 by-register/immediate operation. Otherwise, this is just
7253 a logical operation. */
7258 /* Shift by immediate. */
7259 if (CONST_INT_P (XEXP (op0
, 1)))
7260 *cost
+= extra_cost
->alu
.log_shift
;
7262 *cost
+= extra_cost
->alu
.log_shift_reg
;
7265 *cost
+= extra_cost
->alu
.logical
;
7268 /* In both cases we want to cost both operands. */
7269 *cost
+= rtx_cost (new_op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7270 *cost
+= rtx_cost (op1
, mode
, (enum rtx_code
) code
, 1, speed
);
7279 op0
= aarch64_strip_shift (x
);
7281 if (VECTOR_MODE_P (mode
))
7284 *cost
+= extra_cost
->vect
.alu
;
7288 /* MVN-shifted-reg. */
7291 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7294 *cost
+= extra_cost
->alu
.log_shift
;
7298 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
7299 Handle the second form here taking care that 'a' in the above can
7301 else if (GET_CODE (op0
) == XOR
)
7303 rtx newop0
= XEXP (op0
, 0);
7304 rtx newop1
= XEXP (op0
, 1);
7305 rtx op0_stripped
= aarch64_strip_shift (newop0
);
7307 *cost
+= rtx_cost (newop1
, mode
, (enum rtx_code
) code
, 1, speed
);
7308 *cost
+= rtx_cost (op0_stripped
, mode
, XOR
, 0, speed
);
7312 if (op0_stripped
!= newop0
)
7313 *cost
+= extra_cost
->alu
.log_shift
;
7315 *cost
+= extra_cost
->alu
.logical
;
7322 *cost
+= extra_cost
->alu
.logical
;
7329 /* If a value is written in SI mode, then zero extended to DI
7330 mode, the operation will in general be free as a write to
7331 a 'w' register implicitly zeroes the upper bits of an 'x'
7332 register. However, if this is
7334 (set (reg) (zero_extend (reg)))
7336 we must cost the explicit register move. */
7338 && GET_MODE (op0
) == SImode
7341 int op_cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, 0, speed
);
7343 /* If OP_COST is non-zero, then the cost of the zero extend
7344 is effectively the cost of the inner operation. Otherwise
7345 we have a MOV instruction and we take the cost from the MOV
7346 itself. This is true independently of whether we are
7347 optimizing for space or time. */
7353 else if (MEM_P (op0
))
7355 /* All loads can zero extend to any size for free. */
7356 *cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, param
, speed
);
7360 op0
= aarch64_extend_bitfield_pattern_p (x
);
7363 *cost
+= rtx_cost (op0
, mode
, ZERO_EXTEND
, 0, speed
);
7365 *cost
+= extra_cost
->alu
.bfx
;
7371 if (VECTOR_MODE_P (mode
))
7374 *cost
+= extra_cost
->vect
.alu
;
7378 /* We generate an AND instead of UXTB/UXTH. */
7379 *cost
+= extra_cost
->alu
.logical
;
7385 if (MEM_P (XEXP (x
, 0)))
7390 rtx address
= XEXP (XEXP (x
, 0), 0);
7391 *cost
+= extra_cost
->ldst
.load_sign_extend
;
7394 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
7400 op0
= aarch64_extend_bitfield_pattern_p (x
);
7403 *cost
+= rtx_cost (op0
, mode
, SIGN_EXTEND
, 0, speed
);
7405 *cost
+= extra_cost
->alu
.bfx
;
7411 if (VECTOR_MODE_P (mode
))
7412 *cost
+= extra_cost
->vect
.alu
;
7414 *cost
+= extra_cost
->alu
.extend
;
7422 if (CONST_INT_P (op1
))
7426 if (VECTOR_MODE_P (mode
))
7428 /* Vector shift (immediate). */
7429 *cost
+= extra_cost
->vect
.alu
;
7433 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
7435 *cost
+= extra_cost
->alu
.shift
;
7439 /* We can incorporate zero/sign extend for free. */
7440 if (GET_CODE (op0
) == ZERO_EXTEND
7441 || GET_CODE (op0
) == SIGN_EXTEND
)
7442 op0
= XEXP (op0
, 0);
7444 *cost
+= rtx_cost (op0
, VOIDmode
, ASHIFT
, 0, speed
);
7451 if (VECTOR_MODE_P (mode
))
7453 /* Vector shift (register). */
7454 *cost
+= extra_cost
->vect
.alu
;
7459 *cost
+= extra_cost
->alu
.shift_reg
;
7462 return false; /* All arguments need to be in registers. */
7472 if (CONST_INT_P (op1
))
7474 /* ASR (immediate) and friends. */
7477 if (VECTOR_MODE_P (mode
))
7478 *cost
+= extra_cost
->vect
.alu
;
7480 *cost
+= extra_cost
->alu
.shift
;
7483 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
7489 /* ASR (register) and friends. */
7492 if (VECTOR_MODE_P (mode
))
7493 *cost
+= extra_cost
->vect
.alu
;
7495 *cost
+= extra_cost
->alu
.shift_reg
;
7497 return false; /* All arguments need to be in registers. */
7502 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
7503 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
)
7507 *cost
+= extra_cost
->ldst
.load
;
7509 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
7510 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
7512 /* ADRP, followed by ADD. */
7513 *cost
+= COSTS_N_INSNS (1);
7515 *cost
+= 2 * extra_cost
->alu
.arith
;
7517 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
7518 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
7522 *cost
+= extra_cost
->alu
.arith
;
7527 /* One extra load instruction, after accessing the GOT. */
7528 *cost
+= COSTS_N_INSNS (1);
7530 *cost
+= extra_cost
->ldst
.load
;
7536 /* ADRP/ADD (immediate). */
7538 *cost
+= extra_cost
->alu
.arith
;
7546 if (VECTOR_MODE_P (mode
))
7547 *cost
+= extra_cost
->vect
.alu
;
7549 *cost
+= extra_cost
->alu
.bfx
;
7552 /* We can trust that the immediates used will be correct (there
7553 are no by-register forms), so we need only cost op0. */
7554 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (enum rtx_code
) code
, 0, speed
);
7558 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
7559 /* aarch64_rtx_mult_cost always handles recursion to its
7564 /* We can expand signed mod by power of 2 using a NEGS, two parallel
7565 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
7566 an unconditional negate. This case should only ever be reached through
7567 the set_smod_pow2_cheap check in expmed.c. */
7568 if (CONST_INT_P (XEXP (x
, 1))
7569 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
7570 && (mode
== SImode
|| mode
== DImode
))
7572 /* We expand to 4 instructions. Reset the baseline. */
7573 *cost
= COSTS_N_INSNS (4);
7576 *cost
+= 2 * extra_cost
->alu
.logical
7577 + 2 * extra_cost
->alu
.arith
;
7586 /* Slighly prefer UMOD over SMOD. */
7587 if (VECTOR_MODE_P (mode
))
7588 *cost
+= extra_cost
->vect
.alu
;
7589 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7590 *cost
+= (extra_cost
->mult
[mode
== DImode
].add
7591 + extra_cost
->mult
[mode
== DImode
].idiv
7592 + (code
== MOD
? 1 : 0));
7594 return false; /* All arguments need to be in registers. */
7601 if (VECTOR_MODE_P (mode
))
7602 *cost
+= extra_cost
->vect
.alu
;
7603 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
7604 /* There is no integer SQRT, so only DIV and UDIV can get
7606 *cost
+= (extra_cost
->mult
[mode
== DImode
].idiv
7607 /* Slighly prefer UDIV over SDIV. */
7608 + (code
== DIV
? 1 : 0));
7610 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
7612 return false; /* All arguments need to be in registers. */
7615 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
7616 XEXP (x
, 2), cost
, speed
);
7629 return false; /* All arguments must be in registers. */
7638 if (VECTOR_MODE_P (mode
))
7639 *cost
+= extra_cost
->vect
.alu
;
7641 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
7644 /* FMSUB, FNMADD, and FNMSUB are free. */
7645 if (GET_CODE (op0
) == NEG
)
7646 op0
= XEXP (op0
, 0);
7648 if (GET_CODE (op2
) == NEG
)
7649 op2
= XEXP (op2
, 0);
7651 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
7652 and the by-element operand as operand 0. */
7653 if (GET_CODE (op1
) == NEG
)
7654 op1
= XEXP (op1
, 0);
7656 /* Catch vector-by-element operations. The by-element operand can
7657 either be (vec_duplicate (vec_select (x))) or just
7658 (vec_select (x)), depending on whether we are multiplying by
7659 a vector or a scalar.
7661 Canonicalization is not very good in these cases, FMA4 will put the
7662 by-element operand as operand 0, FNMA4 will have it as operand 1. */
7663 if (GET_CODE (op0
) == VEC_DUPLICATE
)
7664 op0
= XEXP (op0
, 0);
7665 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
7666 op1
= XEXP (op1
, 0);
7668 if (GET_CODE (op0
) == VEC_SELECT
)
7669 op0
= XEXP (op0
, 0);
7670 else if (GET_CODE (op1
) == VEC_SELECT
)
7671 op1
= XEXP (op1
, 0);
7673 /* If the remaining parameters are not registers,
7674 get the cost to put them into registers. */
7675 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed
);
7676 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed
);
7677 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed
);
7681 case UNSIGNED_FLOAT
:
7683 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
7689 if (VECTOR_MODE_P (mode
))
7691 /*Vector truncate. */
7692 *cost
+= extra_cost
->vect
.alu
;
7695 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
7699 case FLOAT_TRUNCATE
:
7702 if (VECTOR_MODE_P (mode
))
7704 /*Vector conversion. */
7705 *cost
+= extra_cost
->vect
.alu
;
7708 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
7715 /* Strip the rounding part. They will all be implemented
7716 by the fcvt* family of instructions anyway. */
7717 if (GET_CODE (x
) == UNSPEC
)
7719 unsigned int uns_code
= XINT (x
, 1);
7721 if (uns_code
== UNSPEC_FRINTA
7722 || uns_code
== UNSPEC_FRINTM
7723 || uns_code
== UNSPEC_FRINTN
7724 || uns_code
== UNSPEC_FRINTP
7725 || uns_code
== UNSPEC_FRINTZ
)
7726 x
= XVECEXP (x
, 0, 0);
7731 if (VECTOR_MODE_P (mode
))
7732 *cost
+= extra_cost
->vect
.alu
;
7734 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
7737 /* We can combine fmul by a power of 2 followed by a fcvt into a single
7738 fixed-point fcvt. */
7739 if (GET_CODE (x
) == MULT
7740 && ((VECTOR_MODE_P (mode
)
7741 && aarch64_vec_fpconst_pow_of_2 (XEXP (x
, 1)) > 0)
7742 || aarch64_fpconst_pow_of_2 (XEXP (x
, 1)) > 0))
7744 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (rtx_code
) code
,
7749 *cost
+= rtx_cost (x
, VOIDmode
, (enum rtx_code
) code
, 0, speed
);
7753 if (VECTOR_MODE_P (mode
))
7757 *cost
+= extra_cost
->vect
.alu
;
7759 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7763 /* FABD, which is analogous to FADD. */
7764 if (GET_CODE (op0
) == MINUS
)
7766 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed
);
7767 *cost
+= rtx_cost (XEXP (op0
, 1), mode
, MINUS
, 1, speed
);
7769 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7773 /* Simple FABS is analogous to FNEG. */
7775 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
7779 /* Integer ABS will either be split to
7780 two arithmetic instructions, or will be an ABS
7781 (scalar), which we don't model. */
7782 *cost
= COSTS_N_INSNS (2);
7784 *cost
+= 2 * extra_cost
->alu
.arith
;
7792 if (VECTOR_MODE_P (mode
))
7793 *cost
+= extra_cost
->vect
.alu
;
7796 /* FMAXNM/FMINNM/FMAX/FMIN.
7797 TODO: This may not be accurate for all implementations, but
7798 we do not model this in the cost tables. */
7799 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
7805 /* The floating point round to integer frint* instructions. */
7806 if (aarch64_frint_unspec_p (XINT (x
, 1)))
7809 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
7814 if (XINT (x
, 1) == UNSPEC_RBIT
)
7817 *cost
+= extra_cost
->alu
.rev
;
7825 /* Decompose <su>muldi3_highpart. */
7826 if (/* (truncate:DI */
7829 && GET_MODE (XEXP (x
, 0)) == TImode
7830 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7832 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7833 /* (ANY_EXTEND:TI (reg:DI))
7834 (ANY_EXTEND:TI (reg:DI))) */
7835 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7836 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
7837 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
7838 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
7839 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
7840 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
7841 /* (const_int 64) */
7842 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
7843 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
7847 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
7848 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
7849 mode
, MULT
, 0, speed
);
7850 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
7851 mode
, MULT
, 1, speed
);
7861 && flag_aarch64_verbose_cost
)
7863 "\nFailed to cost RTX. Assuming default cost.\n");
7868 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
7869 calculated for X. This cost is stored in *COST. Returns true
7870 if the total cost of X was calculated. */
7872 aarch64_rtx_costs_wrapper (rtx x
, machine_mode mode
, int outer
,
7873 int param
, int *cost
, bool speed
)
7875 bool result
= aarch64_rtx_costs (x
, mode
, outer
, param
, cost
, speed
);
7878 && flag_aarch64_verbose_cost
)
7880 print_rtl_single (dump_file
, x
);
7881 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
7882 speed
? "Hot" : "Cold",
7883 *cost
, result
? "final" : "partial");
7890 aarch64_register_move_cost (machine_mode mode
,
7891 reg_class_t from_i
, reg_class_t to_i
)
7893 enum reg_class from
= (enum reg_class
) from_i
;
7894 enum reg_class to
= (enum reg_class
) to_i
;
7895 const struct cpu_regmove_cost
*regmove_cost
7896 = aarch64_tune_params
.regmove_cost
;
7898 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
7899 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
7902 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
7903 from
= GENERAL_REGS
;
7905 /* Moving between GPR and stack cost is the same as GP2GP. */
7906 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
7907 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
7908 return regmove_cost
->GP2GP
;
7910 /* To/From the stack register, we move via the gprs. */
7911 if (to
== STACK_REG
|| from
== STACK_REG
)
7912 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
7913 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
7915 if (GET_MODE_SIZE (mode
) == 16)
7917 /* 128-bit operations on general registers require 2 instructions. */
7918 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
7919 return regmove_cost
->GP2GP
* 2;
7920 else if (from
== GENERAL_REGS
)
7921 return regmove_cost
->GP2FP
* 2;
7922 else if (to
== GENERAL_REGS
)
7923 return regmove_cost
->FP2GP
* 2;
7925 /* When AdvSIMD instructions are disabled it is not possible to move
7926 a 128-bit value directly between Q registers. This is handled in
7927 secondary reload. A general register is used as a scratch to move
7928 the upper DI value and the lower DI value is moved directly,
7929 hence the cost is the sum of three moves. */
7931 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
7933 return regmove_cost
->FP2FP
;
7936 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
7937 return regmove_cost
->GP2GP
;
7938 else if (from
== GENERAL_REGS
)
7939 return regmove_cost
->GP2FP
;
7940 else if (to
== GENERAL_REGS
)
7941 return regmove_cost
->FP2GP
;
7943 return regmove_cost
->FP2FP
;
7947 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
7948 reg_class_t rclass ATTRIBUTE_UNUSED
,
7949 bool in ATTRIBUTE_UNUSED
)
7951 return aarch64_tune_params
.memmov_cost
;
7954 /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
7955 to optimize 1.0/sqrt. */
7958 use_rsqrt_p (machine_mode mode
)
7960 return (!flag_trapping_math
7961 && flag_unsafe_math_optimizations
7962 && ((aarch64_tune_params
.approx_modes
->recip_sqrt
7963 & AARCH64_APPROX_MODE (mode
))
7964 || flag_mrecip_low_precision_sqrt
));
7967 /* Function to decide when to use the approximate reciprocal square root
7971 aarch64_builtin_reciprocal (tree fndecl
)
7973 machine_mode mode
= TYPE_MODE (TREE_TYPE (fndecl
));
7975 if (!use_rsqrt_p (mode
))
7977 return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl
));
7980 typedef rtx (*rsqrte_type
) (rtx
, rtx
);
7982 /* Select reciprocal square root initial estimate insn depending on machine
7986 get_rsqrte_type (machine_mode mode
)
7990 case DFmode
: return gen_aarch64_rsqrtedf
;
7991 case SFmode
: return gen_aarch64_rsqrtesf
;
7992 case V2DFmode
: return gen_aarch64_rsqrtev2df
;
7993 case V2SFmode
: return gen_aarch64_rsqrtev2sf
;
7994 case V4SFmode
: return gen_aarch64_rsqrtev4sf
;
7995 default: gcc_unreachable ();
7999 typedef rtx (*rsqrts_type
) (rtx
, rtx
, rtx
);
8001 /* Select reciprocal square root series step insn depending on machine mode. */
8004 get_rsqrts_type (machine_mode mode
)
8008 case DFmode
: return gen_aarch64_rsqrtsdf
;
8009 case SFmode
: return gen_aarch64_rsqrtssf
;
8010 case V2DFmode
: return gen_aarch64_rsqrtsv2df
;
8011 case V2SFmode
: return gen_aarch64_rsqrtsv2sf
;
8012 case V4SFmode
: return gen_aarch64_rsqrtsv4sf
;
8013 default: gcc_unreachable ();
8017 /* Emit instruction sequence to compute either the approximate square root
8018 or its approximate reciprocal, depending on the flag RECP, and return
8019 whether the sequence was emitted or not. */
8022 aarch64_emit_approx_sqrt (rtx dst
, rtx src
, bool recp
)
8024 machine_mode mode
= GET_MODE (dst
);
8026 if (GET_MODE_INNER (mode
) == HFmode
)
8033 = mode_for_vector (int_mode_for_mode (GET_MODE_INNER (mode
)),
8034 GET_MODE_NUNITS (mode
));
8037 if (!(flag_mlow_precision_sqrt
8038 || (aarch64_tune_params
.approx_modes
->sqrt
8039 & AARCH64_APPROX_MODE (mode
))))
8042 if (flag_finite_math_only
8043 || flag_trapping_math
8044 || !flag_unsafe_math_optimizations
8045 || optimize_function_for_size_p (cfun
))
8049 /* Caller assumes we cannot fail. */
8050 gcc_assert (use_rsqrt_p (mode
));
8053 rtx xmsk
= gen_reg_rtx (mmsk
);
8055 /* When calculating the approximate square root, compare the
8056 argument with 0.0 and create a mask. */
8057 emit_insn (gen_rtx_SET (xmsk
,
8059 gen_rtx_EQ (mmsk
, src
,
8060 CONST0_RTX (mode
)))));
8062 /* Estimate the approximate reciprocal square root. */
8063 rtx xdst
= gen_reg_rtx (mode
);
8064 emit_insn ((*get_rsqrte_type (mode
)) (xdst
, src
));
8066 /* Iterate over the series twice for SF and thrice for DF. */
8067 int iterations
= (GET_MODE_INNER (mode
) == DFmode
) ? 3 : 2;
8069 /* Optionally iterate over the series once less for faster performance
8070 while sacrificing the accuracy. */
8071 if ((recp
&& flag_mrecip_low_precision_sqrt
)
8072 || (!recp
&& flag_mlow_precision_sqrt
))
8075 /* Iterate over the series to calculate the approximate reciprocal square
8077 rtx x1
= gen_reg_rtx (mode
);
8078 while (iterations
--)
8080 rtx x2
= gen_reg_rtx (mode
);
8081 emit_set_insn (x2
, gen_rtx_MULT (mode
, xdst
, xdst
));
8083 emit_insn ((*get_rsqrts_type (mode
)) (x1
, src
, x2
));
8086 emit_set_insn (xdst
, gen_rtx_MULT (mode
, xdst
, x1
));
8091 /* Qualify the approximate reciprocal square root when the argument is
8092 0.0 by squashing the intermediary result to 0.0. */
8093 rtx xtmp
= gen_reg_rtx (mmsk
);
8094 emit_set_insn (xtmp
, gen_rtx_AND (mmsk
, gen_rtx_NOT (mmsk
, xmsk
),
8095 gen_rtx_SUBREG (mmsk
, xdst
, 0)));
8096 emit_move_insn (xdst
, gen_rtx_SUBREG (mode
, xtmp
, 0));
8098 /* Calculate the approximate square root. */
8099 emit_set_insn (xdst
, gen_rtx_MULT (mode
, xdst
, src
));
8102 /* Finalize the approximation. */
8103 emit_set_insn (dst
, gen_rtx_MULT (mode
, xdst
, x1
));
8108 typedef rtx (*recpe_type
) (rtx
, rtx
);
8110 /* Select reciprocal initial estimate insn depending on machine mode. */
8113 get_recpe_type (machine_mode mode
)
8117 case SFmode
: return (gen_aarch64_frecpesf
);
8118 case V2SFmode
: return (gen_aarch64_frecpev2sf
);
8119 case V4SFmode
: return (gen_aarch64_frecpev4sf
);
8120 case DFmode
: return (gen_aarch64_frecpedf
);
8121 case V2DFmode
: return (gen_aarch64_frecpev2df
);
8122 default: gcc_unreachable ();
8126 typedef rtx (*recps_type
) (rtx
, rtx
, rtx
);
8128 /* Select reciprocal series step insn depending on machine mode. */
8131 get_recps_type (machine_mode mode
)
8135 case SFmode
: return (gen_aarch64_frecpssf
);
8136 case V2SFmode
: return (gen_aarch64_frecpsv2sf
);
8137 case V4SFmode
: return (gen_aarch64_frecpsv4sf
);
8138 case DFmode
: return (gen_aarch64_frecpsdf
);
8139 case V2DFmode
: return (gen_aarch64_frecpsv2df
);
8140 default: gcc_unreachable ();
8144 /* Emit the instruction sequence to compute the approximation for the division
8145 of NUM by DEN in QUO and return whether the sequence was emitted or not. */
8148 aarch64_emit_approx_div (rtx quo
, rtx num
, rtx den
)
8150 machine_mode mode
= GET_MODE (quo
);
8152 if (GET_MODE_INNER (mode
) == HFmode
)
8155 bool use_approx_division_p
= (flag_mlow_precision_div
8156 || (aarch64_tune_params
.approx_modes
->division
8157 & AARCH64_APPROX_MODE (mode
)));
8159 if (!flag_finite_math_only
8160 || flag_trapping_math
8161 || !flag_unsafe_math_optimizations
8162 || optimize_function_for_size_p (cfun
)
8163 || !use_approx_division_p
)
8166 /* Estimate the approximate reciprocal. */
8167 rtx xrcp
= gen_reg_rtx (mode
);
8168 emit_insn ((*get_recpe_type (mode
)) (xrcp
, den
));
8170 /* Iterate over the series twice for SF and thrice for DF. */
8171 int iterations
= (GET_MODE_INNER (mode
) == DFmode
) ? 3 : 2;
8173 /* Optionally iterate over the series once less for faster performance,
8174 while sacrificing the accuracy. */
8175 if (flag_mlow_precision_div
)
8178 /* Iterate over the series to calculate the approximate reciprocal. */
8179 rtx xtmp
= gen_reg_rtx (mode
);
8180 while (iterations
--)
8182 emit_insn ((*get_recps_type (mode
)) (xtmp
, xrcp
, den
));
8185 emit_set_insn (xrcp
, gen_rtx_MULT (mode
, xrcp
, xtmp
));
8188 if (num
!= CONST1_RTX (mode
))
8190 /* As the approximate reciprocal of DEN is already calculated, only
8191 calculate the approximate division when NUM is not 1.0. */
8192 rtx xnum
= force_reg (mode
, num
);
8193 emit_set_insn (xrcp
, gen_rtx_MULT (mode
, xrcp
, xnum
));
8196 /* Finalize the approximation. */
8197 emit_set_insn (quo
, gen_rtx_MULT (mode
, xrcp
, xtmp
));
8201 /* Return the number of instructions that can be issued per cycle. */
8203 aarch64_sched_issue_rate (void)
8205 return aarch64_tune_params
.issue_rate
;
8209 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
8211 int issue_rate
= aarch64_sched_issue_rate ();
8213 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
8217 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
8218 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
8219 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
8222 aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
,
8225 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
8229 /* Vectorizer cost model target hooks. */
8231 /* Implement targetm.vectorize.builtin_vectorization_cost. */
8233 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
8235 int misalign ATTRIBUTE_UNUSED
)
8238 const cpu_vector_cost
*costs
= aarch64_tune_params
.vec_costs
;
8241 if (vectype
!= NULL
)
8242 fp
= FLOAT_TYPE_P (vectype
);
8244 switch (type_of_cost
)
8247 return fp
? costs
->scalar_fp_stmt_cost
: costs
->scalar_int_stmt_cost
;
8250 return costs
->scalar_load_cost
;
8253 return costs
->scalar_store_cost
;
8256 return fp
? costs
->vec_fp_stmt_cost
: costs
->vec_int_stmt_cost
;
8259 return costs
->vec_align_load_cost
;
8262 return costs
->vec_store_cost
;
8265 return costs
->vec_to_scalar_cost
;
8268 return costs
->scalar_to_vec_cost
;
8270 case unaligned_load
:
8271 return costs
->vec_unalign_load_cost
;
8273 case unaligned_store
:
8274 return costs
->vec_unalign_store_cost
;
8276 case cond_branch_taken
:
8277 return costs
->cond_taken_branch_cost
;
8279 case cond_branch_not_taken
:
8280 return costs
->cond_not_taken_branch_cost
;
8283 return costs
->vec_permute_cost
;
8285 case vec_promote_demote
:
8286 return fp
? costs
->vec_fp_stmt_cost
: costs
->vec_int_stmt_cost
;
8289 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
8290 return elements
/ 2 + 1;
8297 /* Implement targetm.vectorize.add_stmt_cost. */
8299 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
8300 struct _stmt_vec_info
*stmt_info
, int misalign
,
8301 enum vect_cost_model_location where
)
8303 unsigned *cost
= (unsigned *) data
;
8304 unsigned retval
= 0;
8306 if (flag_vect_cost_model
)
8308 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
8310 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
8312 /* Statements in an inner loop relative to the loop being
8313 vectorized are weighted more heavily. The value here is
8314 arbitrary and could potentially be improved with analysis. */
8315 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
8316 count
*= 50; /* FIXME */
8318 retval
= (unsigned) (count
* stmt_cost
);
8319 cost
[where
] += retval
;
8325 static void initialize_aarch64_code_model (struct gcc_options
*);
8327 /* Parse the TO_PARSE string and put the architecture struct that it
8328 selects into RES and the architectural features into ISA_FLAGS.
8329 Return an aarch64_parse_opt_result describing the parse result.
8330 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
8332 static enum aarch64_parse_opt_result
8333 aarch64_parse_arch (const char *to_parse
, const struct processor
**res
,
8334 unsigned long *isa_flags
)
8337 const struct processor
*arch
;
8338 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8341 strcpy (str
, to_parse
);
8343 ext
= strchr (str
, '+');
8351 return AARCH64_PARSE_MISSING_ARG
;
8354 /* Loop through the list of supported ARCHes to find a match. */
8355 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
8357 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
8359 unsigned long isa_temp
= arch
->flags
;
8363 /* TO_PARSE string contains at least one extension. */
8364 enum aarch64_parse_opt_result ext_res
8365 = aarch64_parse_extension (ext
, &isa_temp
);
8367 if (ext_res
!= AARCH64_PARSE_OK
)
8370 /* Extension parsing was successful. Confirm the result
8371 arch and ISA flags. */
8373 *isa_flags
= isa_temp
;
8374 return AARCH64_PARSE_OK
;
8378 /* ARCH name not found in list. */
8379 return AARCH64_PARSE_INVALID_ARG
;
8382 /* Parse the TO_PARSE string and put the result tuning in RES and the
8383 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
8384 describing the parse result. If there is an error parsing, RES and
8385 ISA_FLAGS are left unchanged. */
8387 static enum aarch64_parse_opt_result
8388 aarch64_parse_cpu (const char *to_parse
, const struct processor
**res
,
8389 unsigned long *isa_flags
)
8392 const struct processor
*cpu
;
8393 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8396 strcpy (str
, to_parse
);
8398 ext
= strchr (str
, '+');
8406 return AARCH64_PARSE_MISSING_ARG
;
8409 /* Loop through the list of supported CPUs to find a match. */
8410 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
8412 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
8414 unsigned long isa_temp
= cpu
->flags
;
8419 /* TO_PARSE string contains at least one extension. */
8420 enum aarch64_parse_opt_result ext_res
8421 = aarch64_parse_extension (ext
, &isa_temp
);
8423 if (ext_res
!= AARCH64_PARSE_OK
)
8426 /* Extension parsing was successfull. Confirm the result
8427 cpu and ISA flags. */
8429 *isa_flags
= isa_temp
;
8430 return AARCH64_PARSE_OK
;
8434 /* CPU name not found in list. */
8435 return AARCH64_PARSE_INVALID_ARG
;
8438 /* Parse the TO_PARSE string and put the cpu it selects into RES.
8439 Return an aarch64_parse_opt_result describing the parse result.
8440 If the parsing fails the RES does not change. */
8442 static enum aarch64_parse_opt_result
8443 aarch64_parse_tune (const char *to_parse
, const struct processor
**res
)
8445 const struct processor
*cpu
;
8446 char *str
= (char *) alloca (strlen (to_parse
) + 1);
8448 strcpy (str
, to_parse
);
8450 /* Loop through the list of supported CPUs to find a match. */
8451 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
8453 if (strcmp (cpu
->name
, str
) == 0)
8456 return AARCH64_PARSE_OK
;
8460 /* CPU name not found in list. */
8461 return AARCH64_PARSE_INVALID_ARG
;
8464 /* Parse TOKEN, which has length LENGTH to see if it is an option
8465 described in FLAG. If it is, return the index bit for that fusion type.
8466 If not, error (printing OPTION_NAME) and return zero. */
8469 aarch64_parse_one_option_token (const char *token
,
8471 const struct aarch64_flag_desc
*flag
,
8472 const char *option_name
)
8474 for (; flag
->name
!= NULL
; flag
++)
8476 if (length
== strlen (flag
->name
)
8477 && !strncmp (flag
->name
, token
, length
))
8481 error ("unknown flag passed in -moverride=%s (%s)", option_name
, token
);
8485 /* Parse OPTION which is a comma-separated list of flags to enable.
8486 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
8487 default state we inherit from the CPU tuning structures. OPTION_NAME
8488 gives the top-level option we are parsing in the -moverride string,
8489 for use in error messages. */
8492 aarch64_parse_boolean_options (const char *option
,
8493 const struct aarch64_flag_desc
*flags
,
8494 unsigned int initial_state
,
8495 const char *option_name
)
8497 const char separator
= '.';
8498 const char* specs
= option
;
8499 const char* ntoken
= option
;
8500 unsigned int found_flags
= initial_state
;
8502 while ((ntoken
= strchr (specs
, separator
)))
8504 size_t token_length
= ntoken
- specs
;
8505 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
8509 /* If we find "none" (or, for simplicity's sake, an error) anywhere
8510 in the token stream, reset the supported operations. So:
8512 adrp+add.cmp+branch.none.adrp+add
8514 would have the result of turning on only adrp+add fusion. */
8518 found_flags
|= token_ops
;
8522 /* We ended with a comma, print something. */
8525 error ("%s string ill-formed\n", option_name
);
8529 /* We still have one more token to parse. */
8530 size_t token_length
= strlen (specs
);
8531 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
8538 found_flags
|= token_ops
;
8542 /* Support for overriding instruction fusion. */
8545 aarch64_parse_fuse_string (const char *fuse_string
,
8546 struct tune_params
*tune
)
8548 tune
->fusible_ops
= aarch64_parse_boolean_options (fuse_string
,
8549 aarch64_fusible_pairs
,
8554 /* Support for overriding other tuning flags. */
8557 aarch64_parse_tune_string (const char *tune_string
,
8558 struct tune_params
*tune
)
8560 tune
->extra_tuning_flags
8561 = aarch64_parse_boolean_options (tune_string
,
8562 aarch64_tuning_flags
,
8563 tune
->extra_tuning_flags
,
8567 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
8568 we understand. If it is, extract the option string and handoff to
8569 the appropriate function. */
8572 aarch64_parse_one_override_token (const char* token
,
8574 struct tune_params
*tune
)
8576 const struct aarch64_tuning_override_function
*fn
8577 = aarch64_tuning_override_functions
;
8579 const char *option_part
= strchr (token
, '=');
8582 error ("tuning string missing in option (%s)", token
);
8586 /* Get the length of the option name. */
8587 length
= option_part
- token
;
8588 /* Skip the '=' to get to the option string. */
8591 for (; fn
->name
!= NULL
; fn
++)
8593 if (!strncmp (fn
->name
, token
, length
))
8595 fn
->parse_override (option_part
, tune
);
8600 error ("unknown tuning option (%s)",token
);
8604 /* A checking mechanism for the implementation of the tls size. */
8607 initialize_aarch64_tls_size (struct gcc_options
*opts
)
8609 if (aarch64_tls_size
== 0)
8610 aarch64_tls_size
= 24;
8612 switch (opts
->x_aarch64_cmodel_var
)
8614 case AARCH64_CMODEL_TINY
:
8615 /* Both the default and maximum TLS size allowed under tiny is 1M which
8616 needs two instructions to address, so we clamp the size to 24. */
8617 if (aarch64_tls_size
> 24)
8618 aarch64_tls_size
= 24;
8620 case AARCH64_CMODEL_SMALL
:
8621 /* The maximum TLS size allowed under small is 4G. */
8622 if (aarch64_tls_size
> 32)
8623 aarch64_tls_size
= 32;
8625 case AARCH64_CMODEL_LARGE
:
8626 /* The maximum TLS size allowed under large is 16E.
8627 FIXME: 16E should be 64bit, we only support 48bit offset now. */
8628 if (aarch64_tls_size
> 48)
8629 aarch64_tls_size
= 48;
8638 /* Parse STRING looking for options in the format:
8639 string :: option:string
8640 option :: name=substring
8642 substring :: defined by option. */
8645 aarch64_parse_override_string (const char* input_string
,
8646 struct tune_params
* tune
)
8648 const char separator
= ':';
8649 size_t string_length
= strlen (input_string
) + 1;
8650 char *string_root
= (char *) xmalloc (sizeof (*string_root
) * string_length
);
8651 char *string
= string_root
;
8652 strncpy (string
, input_string
, string_length
);
8653 string
[string_length
- 1] = '\0';
8655 char* ntoken
= string
;
8657 while ((ntoken
= strchr (string
, separator
)))
8659 size_t token_length
= ntoken
- string
;
8660 /* Make this substring look like a string. */
8662 aarch64_parse_one_override_token (string
, token_length
, tune
);
8666 /* One last option to parse. */
8667 aarch64_parse_one_override_token (string
, strlen (string
), tune
);
8673 aarch64_override_options_after_change_1 (struct gcc_options
*opts
)
8675 /* The logic here is that if we are disabling all frame pointer generation
8676 then we do not need to disable leaf frame pointer generation as a
8677 separate operation. But if we are *only* disabling leaf frame pointer
8678 generation then we set flag_omit_frame_pointer to true, but in
8679 aarch64_frame_pointer_required we return false only for leaf functions.
8681 PR 70044: We have to be careful about being called multiple times for the
8682 same function. Once we have decided to set flag_omit_frame_pointer just
8683 so that we can omit leaf frame pointers, we must then not interpret a
8684 second call as meaning that all frame pointer generation should be
8685 omitted. We do this by setting flag_omit_frame_pointer to a special,
8687 if (opts
->x_flag_omit_frame_pointer
== 2)
8688 opts
->x_flag_omit_frame_pointer
= 0;
8690 if (opts
->x_flag_omit_frame_pointer
)
8691 opts
->x_flag_omit_leaf_frame_pointer
= false;
8692 else if (opts
->x_flag_omit_leaf_frame_pointer
)
8693 opts
->x_flag_omit_frame_pointer
= 2;
8695 /* If not optimizing for size, set the default
8696 alignment to what the target wants. */
8697 if (!opts
->x_optimize_size
)
8699 if (opts
->x_align_loops
<= 0)
8700 opts
->x_align_loops
= aarch64_tune_params
.loop_align
;
8701 if (opts
->x_align_jumps
<= 0)
8702 opts
->x_align_jumps
= aarch64_tune_params
.jump_align
;
8703 if (opts
->x_align_functions
<= 0)
8704 opts
->x_align_functions
= aarch64_tune_params
.function_align
;
8707 /* We default to no pc-relative literal loads. */
8709 aarch64_pcrelative_literal_loads
= false;
8711 /* If -mpc-relative-literal-loads is set on the command line, this
8712 implies that the user asked for PC relative literal loads. */
8713 if (opts
->x_pcrelative_literal_loads
== 1)
8714 aarch64_pcrelative_literal_loads
= true;
8716 /* This is PR70113. When building the Linux kernel with
8717 CONFIG_ARM64_ERRATUM_843419, support for relocations
8718 R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is
8719 removed from the kernel to avoid loading objects with possibly
8720 offending sequences. Without -mpc-relative-literal-loads we would
8721 generate such relocations, preventing the kernel build from
8723 if (opts
->x_pcrelative_literal_loads
== 2
8724 && TARGET_FIX_ERR_A53_843419
)
8725 aarch64_pcrelative_literal_loads
= true;
8727 /* In the tiny memory model it makes no sense to disallow PC relative
8728 literal pool loads. */
8729 if (aarch64_cmodel
== AARCH64_CMODEL_TINY
8730 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
8731 aarch64_pcrelative_literal_loads
= true;
8733 /* When enabling the lower precision Newton series for the square root, also
8734 enable it for the reciprocal square root, since the latter is an
8735 intermediary step for the former. */
8736 if (flag_mlow_precision_sqrt
)
8737 flag_mrecip_low_precision_sqrt
= true;
8740 /* 'Unpack' up the internal tuning structs and update the options
8741 in OPTS. The caller must have set up selected_tune and selected_arch
8742 as all the other target-specific codegen decisions are
8743 derived from them. */
8746 aarch64_override_options_internal (struct gcc_options
*opts
)
8748 aarch64_tune_flags
= selected_tune
->flags
;
8749 aarch64_tune
= selected_tune
->sched_core
;
8750 /* Make a copy of the tuning parameters attached to the core, which
8751 we may later overwrite. */
8752 aarch64_tune_params
= *(selected_tune
->tune
);
8753 aarch64_architecture_version
= selected_arch
->architecture_version
;
8755 if (opts
->x_aarch64_override_tune_string
)
8756 aarch64_parse_override_string (opts
->x_aarch64_override_tune_string
,
8757 &aarch64_tune_params
);
8759 /* This target defaults to strict volatile bitfields. */
8760 if (opts
->x_flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
8761 opts
->x_flag_strict_volatile_bitfields
= 1;
8763 initialize_aarch64_code_model (opts
);
8764 initialize_aarch64_tls_size (opts
);
8766 int queue_depth
= 0;
8767 switch (aarch64_tune_params
.autoprefetcher_model
)
8769 case tune_params::AUTOPREFETCHER_OFF
:
8772 case tune_params::AUTOPREFETCHER_WEAK
:
8775 case tune_params::AUTOPREFETCHER_STRONG
:
8776 queue_depth
= max_insn_queue_index
+ 1;
8782 /* We don't mind passing in global_options_set here as we don't use
8783 the *options_set structs anyway. */
8784 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
8786 opts
->x_param_values
,
8787 global_options_set
.x_param_values
);
8789 /* Set up parameters to be used in prefetching algorithm. Do not
8790 override the defaults unless we are tuning for a core we have
8791 researched values for. */
8792 if (aarch64_tune_params
.prefetch
->num_slots
> 0)
8793 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
8794 aarch64_tune_params
.prefetch
->num_slots
,
8795 opts
->x_param_values
,
8796 global_options_set
.x_param_values
);
8797 if (aarch64_tune_params
.prefetch
->l1_cache_size
>= 0)
8798 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
8799 aarch64_tune_params
.prefetch
->l1_cache_size
,
8800 opts
->x_param_values
,
8801 global_options_set
.x_param_values
);
8802 if (aarch64_tune_params
.prefetch
->l1_cache_line_size
>= 0)
8803 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
8804 aarch64_tune_params
.prefetch
->l1_cache_line_size
,
8805 opts
->x_param_values
,
8806 global_options_set
.x_param_values
);
8807 if (aarch64_tune_params
.prefetch
->l2_cache_size
>= 0)
8808 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
8809 aarch64_tune_params
.prefetch
->l2_cache_size
,
8810 opts
->x_param_values
,
8811 global_options_set
.x_param_values
);
8813 /* Enable sw prefetching at specified optimization level for
8814 CPUS that have prefetch. Lower optimization level threshold by 1
8815 when profiling is enabled. */
8816 if (opts
->x_flag_prefetch_loop_arrays
< 0
8817 && !opts
->x_optimize_size
8818 && aarch64_tune_params
.prefetch
->default_opt_level
>= 0
8819 && opts
->x_optimize
>= aarch64_tune_params
.prefetch
->default_opt_level
)
8820 opts
->x_flag_prefetch_loop_arrays
= 1;
8822 aarch64_override_options_after_change_1 (opts
);
8825 /* Print a hint with a suggestion for a core or architecture name that
8826 most closely resembles what the user passed in STR. ARCH is true if
8827 the user is asking for an architecture name. ARCH is false if the user
8828 is asking for a core name. */
8831 aarch64_print_hint_for_core_or_arch (const char *str
, bool arch
)
8833 auto_vec
<const char *> candidates
;
8834 const struct processor
*entry
= arch
? all_architectures
: all_cores
;
8835 for (; entry
->name
!= NULL
; entry
++)
8836 candidates
.safe_push (entry
->name
);
8838 const char *hint
= candidates_list_and_hint (str
, s
, candidates
);
8840 inform (input_location
, "valid arguments are: %s;"
8841 " did you mean %qs?", s
, hint
);
8845 /* Print a hint with a suggestion for a core name that most closely resembles
8846 what the user passed in STR. */
8849 aarch64_print_hint_for_core (const char *str
)
8851 aarch64_print_hint_for_core_or_arch (str
, false);
8854 /* Print a hint with a suggestion for an architecture name that most closely
8855 resembles what the user passed in STR. */
8858 aarch64_print_hint_for_arch (const char *str
)
8860 aarch64_print_hint_for_core_or_arch (str
, true);
8863 /* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
8864 specified in STR and throw errors if appropriate. Put the results if
8865 they are valid in RES and ISA_FLAGS. Return whether the option is
8869 aarch64_validate_mcpu (const char *str
, const struct processor
**res
,
8870 unsigned long *isa_flags
)
8872 enum aarch64_parse_opt_result parse_res
8873 = aarch64_parse_cpu (str
, res
, isa_flags
);
8875 if (parse_res
== AARCH64_PARSE_OK
)
8880 case AARCH64_PARSE_MISSING_ARG
:
8881 error ("missing cpu name in %<-mcpu=%s%>", str
);
8883 case AARCH64_PARSE_INVALID_ARG
:
8884 error ("unknown value %qs for -mcpu", str
);
8885 aarch64_print_hint_for_core (str
);
8887 case AARCH64_PARSE_INVALID_FEATURE
:
8888 error ("invalid feature modifier in %<-mcpu=%s%>", str
);
8897 /* Validate a command-line -march option. Parse the arch and extensions
8898 (if any) specified in STR and throw errors if appropriate. Put the
8899 results, if they are valid, in RES and ISA_FLAGS. Return whether the
8903 aarch64_validate_march (const char *str
, const struct processor
**res
,
8904 unsigned long *isa_flags
)
8906 enum aarch64_parse_opt_result parse_res
8907 = aarch64_parse_arch (str
, res
, isa_flags
);
8909 if (parse_res
== AARCH64_PARSE_OK
)
8914 case AARCH64_PARSE_MISSING_ARG
:
8915 error ("missing arch name in %<-march=%s%>", str
);
8917 case AARCH64_PARSE_INVALID_ARG
:
8918 error ("unknown value %qs for -march", str
);
8919 aarch64_print_hint_for_arch (str
);
8921 case AARCH64_PARSE_INVALID_FEATURE
:
8922 error ("invalid feature modifier in %<-march=%s%>", str
);
8931 /* Validate a command-line -mtune option. Parse the cpu
8932 specified in STR and throw errors if appropriate. Put the
8933 result, if it is valid, in RES. Return whether the option is
8937 aarch64_validate_mtune (const char *str
, const struct processor
**res
)
8939 enum aarch64_parse_opt_result parse_res
8940 = aarch64_parse_tune (str
, res
);
8942 if (parse_res
== AARCH64_PARSE_OK
)
8947 case AARCH64_PARSE_MISSING_ARG
:
8948 error ("missing cpu name in %<-mtune=%s%>", str
);
8950 case AARCH64_PARSE_INVALID_ARG
:
8951 error ("unknown value %qs for -mtune", str
);
8952 aarch64_print_hint_for_core (str
);
8960 /* Return the CPU corresponding to the enum CPU.
8961 If it doesn't specify a cpu, return the default. */
8963 static const struct processor
*
8964 aarch64_get_tune_cpu (enum aarch64_processor cpu
)
8966 if (cpu
!= aarch64_none
)
8967 return &all_cores
[cpu
];
8969 /* The & 0x3f is to extract the bottom 6 bits that encode the
8970 default cpu as selected by the --with-cpu GCC configure option
8972 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
8973 flags mechanism should be reworked to make it more sane. */
8974 return &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
8977 /* Return the architecture corresponding to the enum ARCH.
8978 If it doesn't specify a valid architecture, return the default. */
8980 static const struct processor
*
8981 aarch64_get_arch (enum aarch64_arch arch
)
8983 if (arch
!= aarch64_no_arch
)
8984 return &all_architectures
[arch
];
8986 const struct processor
*cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
8988 return &all_architectures
[cpu
->arch
];
8991 /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
8992 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
8993 tuning structs. In particular it must set selected_tune and
8994 aarch64_isa_flags that define the available ISA features and tuning
8995 decisions. It must also set selected_arch as this will be used to
8996 output the .arch asm tags for each function. */
8999 aarch64_override_options (void)
9001 unsigned long cpu_isa
= 0;
9002 unsigned long arch_isa
= 0;
9003 aarch64_isa_flags
= 0;
9005 bool valid_cpu
= true;
9006 bool valid_tune
= true;
9007 bool valid_arch
= true;
9009 selected_cpu
= NULL
;
9010 selected_arch
= NULL
;
9011 selected_tune
= NULL
;
9013 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
9014 If either of -march or -mtune is given, they override their
9015 respective component of -mcpu. */
9016 if (aarch64_cpu_string
)
9017 valid_cpu
= aarch64_validate_mcpu (aarch64_cpu_string
, &selected_cpu
,
9020 if (aarch64_arch_string
)
9021 valid_arch
= aarch64_validate_march (aarch64_arch_string
, &selected_arch
,
9024 if (aarch64_tune_string
)
9025 valid_tune
= aarch64_validate_mtune (aarch64_tune_string
, &selected_tune
);
9027 /* If the user did not specify a processor, choose the default
9028 one for them. This will be the CPU set during configuration using
9029 --with-cpu, otherwise it is "generic". */
9034 selected_cpu
= &all_cores
[selected_arch
->ident
];
9035 aarch64_isa_flags
= arch_isa
;
9036 explicit_arch
= selected_arch
->arch
;
9040 /* Get default configure-time CPU. */
9041 selected_cpu
= aarch64_get_tune_cpu (aarch64_none
);
9042 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
9046 explicit_tune_core
= selected_tune
->ident
;
9048 /* If both -mcpu and -march are specified check that they are architecturally
9049 compatible, warn if they're not and prefer the -march ISA flags. */
9050 else if (selected_arch
)
9052 if (selected_arch
->arch
!= selected_cpu
->arch
)
9054 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
9055 all_architectures
[selected_cpu
->arch
].name
,
9056 selected_arch
->name
);
9058 aarch64_isa_flags
= arch_isa
;
9059 explicit_arch
= selected_arch
->arch
;
9060 explicit_tune_core
= selected_tune
? selected_tune
->ident
9061 : selected_cpu
->ident
;
9065 /* -mcpu but no -march. */
9066 aarch64_isa_flags
= cpu_isa
;
9067 explicit_tune_core
= selected_tune
? selected_tune
->ident
9068 : selected_cpu
->ident
;
9069 gcc_assert (selected_cpu
);
9070 selected_arch
= &all_architectures
[selected_cpu
->arch
];
9071 explicit_arch
= selected_arch
->arch
;
9074 /* Set the arch as well as we will need it when outputing
9075 the .arch directive in assembly. */
9078 gcc_assert (selected_cpu
);
9079 selected_arch
= &all_architectures
[selected_cpu
->arch
];
9083 selected_tune
= selected_cpu
;
9085 #ifndef HAVE_AS_MABI_OPTION
9086 /* The compiler may have been configured with 2.23.* binutils, which does
9087 not have support for ILP32. */
9089 error ("Assembler does not support -mabi=ilp32");
9092 if (aarch64_ra_sign_scope
!= AARCH64_FUNCTION_NONE
&& TARGET_ILP32
)
9093 sorry ("Return address signing is only supported for -mabi=lp64");
9095 /* Make sure we properly set up the explicit options. */
9096 if ((aarch64_cpu_string
&& valid_cpu
)
9097 || (aarch64_tune_string
&& valid_tune
))
9098 gcc_assert (explicit_tune_core
!= aarch64_none
);
9100 if ((aarch64_cpu_string
&& valid_cpu
)
9101 || (aarch64_arch_string
&& valid_arch
))
9102 gcc_assert (explicit_arch
!= aarch64_no_arch
);
9104 aarch64_override_options_internal (&global_options
);
9106 /* Save these options as the default ones in case we push and pop them later
9107 while processing functions with potential target attributes. */
9108 target_option_default_node
= target_option_current_node
9109 = build_target_option_node (&global_options
);
9112 /* Implement targetm.override_options_after_change. */
9115 aarch64_override_options_after_change (void)
9117 aarch64_override_options_after_change_1 (&global_options
);
9120 static struct machine_function
*
9121 aarch64_init_machine_status (void)
9123 struct machine_function
*machine
;
9124 machine
= ggc_cleared_alloc
<machine_function
> ();
9129 aarch64_init_expanders (void)
9131 init_machine_status
= aarch64_init_machine_status
;
9134 /* A checking mechanism for the implementation of the various code models. */
9136 initialize_aarch64_code_model (struct gcc_options
*opts
)
9138 if (opts
->x_flag_pic
)
9140 switch (opts
->x_aarch64_cmodel_var
)
9142 case AARCH64_CMODEL_TINY
:
9143 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
9145 case AARCH64_CMODEL_SMALL
:
9146 #ifdef HAVE_AS_SMALL_PIC_RELOCS
9147 aarch64_cmodel
= (flag_pic
== 2
9148 ? AARCH64_CMODEL_SMALL_PIC
9149 : AARCH64_CMODEL_SMALL_SPIC
);
9151 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
9154 case AARCH64_CMODEL_LARGE
:
9155 sorry ("code model %qs with -f%s", "large",
9156 opts
->x_flag_pic
> 1 ? "PIC" : "pic");
9163 aarch64_cmodel
= opts
->x_aarch64_cmodel_var
;
9166 /* Implement TARGET_OPTION_SAVE. */
9169 aarch64_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
9171 ptr
->x_aarch64_override_tune_string
= opts
->x_aarch64_override_tune_string
;
9174 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
9175 using the information saved in PTR. */
9178 aarch64_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
9180 opts
->x_explicit_tune_core
= ptr
->x_explicit_tune_core
;
9181 selected_tune
= aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
9182 opts
->x_explicit_arch
= ptr
->x_explicit_arch
;
9183 selected_arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
9184 opts
->x_aarch64_override_tune_string
= ptr
->x_aarch64_override_tune_string
;
9186 aarch64_override_options_internal (opts
);
9189 /* Implement TARGET_OPTION_PRINT. */
9192 aarch64_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
9194 const struct processor
*cpu
9195 = aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
9196 unsigned long isa_flags
= ptr
->x_aarch64_isa_flags
;
9197 const struct processor
*arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
9198 std::string extension
9199 = aarch64_get_extension_string_for_isa_flags (isa_flags
, arch
->flags
);
9201 fprintf (file
, "%*sselected tune = %s\n", indent
, "", cpu
->name
);
9202 fprintf (file
, "%*sselected arch = %s%s\n", indent
, "",
9203 arch
->name
, extension
.c_str ());
9206 static GTY(()) tree aarch64_previous_fndecl
;
9209 aarch64_reset_previous_fndecl (void)
9211 aarch64_previous_fndecl
= NULL
;
9214 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE.
9215 Used by aarch64_set_current_function and aarch64_pragma_target_parse to
9216 make sure optab availability predicates are recomputed when necessary. */
9219 aarch64_save_restore_target_globals (tree new_tree
)
9221 if (TREE_TARGET_GLOBALS (new_tree
))
9222 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
9223 else if (new_tree
== target_option_default_node
)
9224 restore_target_globals (&default_target_globals
);
9226 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
9229 /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
9230 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
9231 of the function, if such exists. This function may be called multiple
9232 times on a single function so use aarch64_previous_fndecl to avoid
9233 setting up identical state. */
9236 aarch64_set_current_function (tree fndecl
)
9238 if (!fndecl
|| fndecl
== aarch64_previous_fndecl
)
9241 tree old_tree
= (aarch64_previous_fndecl
9242 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl
)
9245 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
9247 /* If current function has no attributes but the previous one did,
9248 use the default node. */
9249 if (!new_tree
&& old_tree
)
9250 new_tree
= target_option_default_node
;
9252 /* If nothing to do, return. #pragma GCC reset or #pragma GCC pop to
9253 the default have been handled by aarch64_save_restore_target_globals from
9254 aarch64_pragma_target_parse. */
9255 if (old_tree
== new_tree
)
9258 aarch64_previous_fndecl
= fndecl
;
9260 /* First set the target options. */
9261 cl_target_option_restore (&global_options
, TREE_TARGET_OPTION (new_tree
));
9263 aarch64_save_restore_target_globals (new_tree
);
9266 /* Enum describing the various ways we can handle attributes.
9267 In many cases we can reuse the generic option handling machinery. */
9269 enum aarch64_attr_opt_type
9271 aarch64_attr_mask
, /* Attribute should set a bit in target_flags. */
9272 aarch64_attr_bool
, /* Attribute sets or unsets a boolean variable. */
9273 aarch64_attr_enum
, /* Attribute sets an enum variable. */
9274 aarch64_attr_custom
/* Attribute requires a custom handling function. */
9277 /* All the information needed to handle a target attribute.
9278 NAME is the name of the attribute.
9279 ATTR_TYPE specifies the type of behavior of the attribute as described
9280 in the definition of enum aarch64_attr_opt_type.
9281 ALLOW_NEG is true if the attribute supports a "no-" form.
9282 HANDLER is the function that takes the attribute string and whether
9283 it is a pragma or attribute and handles the option. It is needed only
9284 when the ATTR_TYPE is aarch64_attr_custom.
9285 OPT_NUM is the enum specifying the option that the attribute modifies.
9286 This is needed for attributes that mirror the behavior of a command-line
9287 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
9288 aarch64_attr_enum. */
9290 struct aarch64_attribute_info
9293 enum aarch64_attr_opt_type attr_type
;
9295 bool (*handler
) (const char *, const char *);
9296 enum opt_code opt_num
;
9299 /* Handle the ARCH_STR argument to the arch= target attribute.
9300 PRAGMA_OR_ATTR is used in potential error messages. */
9303 aarch64_handle_attr_arch (const char *str
, const char *pragma_or_attr
)
9305 const struct processor
*tmp_arch
= NULL
;
9306 enum aarch64_parse_opt_result parse_res
9307 = aarch64_parse_arch (str
, &tmp_arch
, &aarch64_isa_flags
);
9309 if (parse_res
== AARCH64_PARSE_OK
)
9311 gcc_assert (tmp_arch
);
9312 selected_arch
= tmp_arch
;
9313 explicit_arch
= selected_arch
->arch
;
9319 case AARCH64_PARSE_MISSING_ARG
:
9320 error ("missing architecture name in 'arch' target %s", pragma_or_attr
);
9322 case AARCH64_PARSE_INVALID_ARG
:
9323 error ("unknown value %qs for 'arch' target %s", str
, pragma_or_attr
);
9324 aarch64_print_hint_for_arch (str
);
9326 case AARCH64_PARSE_INVALID_FEATURE
:
9327 error ("invalid feature modifier %qs for 'arch' target %s",
9328 str
, pragma_or_attr
);
9337 /* Handle the argument CPU_STR to the cpu= target attribute.
9338 PRAGMA_OR_ATTR is used in potential error messages. */
9341 aarch64_handle_attr_cpu (const char *str
, const char *pragma_or_attr
)
9343 const struct processor
*tmp_cpu
= NULL
;
9344 enum aarch64_parse_opt_result parse_res
9345 = aarch64_parse_cpu (str
, &tmp_cpu
, &aarch64_isa_flags
);
9347 if (parse_res
== AARCH64_PARSE_OK
)
9349 gcc_assert (tmp_cpu
);
9350 selected_tune
= tmp_cpu
;
9351 explicit_tune_core
= selected_tune
->ident
;
9353 selected_arch
= &all_architectures
[tmp_cpu
->arch
];
9354 explicit_arch
= selected_arch
->arch
;
9360 case AARCH64_PARSE_MISSING_ARG
:
9361 error ("missing cpu name in 'cpu' target %s", pragma_or_attr
);
9363 case AARCH64_PARSE_INVALID_ARG
:
9364 error ("unknown value %qs for 'cpu' target %s", str
, pragma_or_attr
);
9365 aarch64_print_hint_for_core (str
);
9367 case AARCH64_PARSE_INVALID_FEATURE
:
9368 error ("invalid feature modifier %qs for 'cpu' target %s",
9369 str
, pragma_or_attr
);
9378 /* Handle the argument STR to the tune= target attribute.
9379 PRAGMA_OR_ATTR is used in potential error messages. */
9382 aarch64_handle_attr_tune (const char *str
, const char *pragma_or_attr
)
9384 const struct processor
*tmp_tune
= NULL
;
9385 enum aarch64_parse_opt_result parse_res
9386 = aarch64_parse_tune (str
, &tmp_tune
);
9388 if (parse_res
== AARCH64_PARSE_OK
)
9390 gcc_assert (tmp_tune
);
9391 selected_tune
= tmp_tune
;
9392 explicit_tune_core
= selected_tune
->ident
;
9398 case AARCH64_PARSE_INVALID_ARG
:
9399 error ("unknown value %qs for 'tune' target %s", str
, pragma_or_attr
);
9400 aarch64_print_hint_for_core (str
);
9409 /* Parse an architecture extensions target attribute string specified in STR.
9410 For example "+fp+nosimd". Show any errors if needed. Return TRUE
9411 if successful. Update aarch64_isa_flags to reflect the ISA features
9413 PRAGMA_OR_ATTR is used in potential error messages. */
9416 aarch64_handle_attr_isa_flags (char *str
, const char *pragma_or_attr
)
9418 enum aarch64_parse_opt_result parse_res
;
9419 unsigned long isa_flags
= aarch64_isa_flags
;
9421 /* We allow "+nothing" in the beginning to clear out all architectural
9422 features if the user wants to handpick specific features. */
9423 if (strncmp ("+nothing", str
, 8) == 0)
9429 parse_res
= aarch64_parse_extension (str
, &isa_flags
);
9431 if (parse_res
== AARCH64_PARSE_OK
)
9433 aarch64_isa_flags
= isa_flags
;
9439 case AARCH64_PARSE_MISSING_ARG
:
9440 error ("missing feature modifier in target %s %qs",
9441 pragma_or_attr
, str
);
9444 case AARCH64_PARSE_INVALID_FEATURE
:
9445 error ("invalid feature modifier in target %s %qs",
9446 pragma_or_attr
, str
);
9456 /* The target attributes that we support. On top of these we also support just
9457 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
9458 handled explicitly in aarch64_process_one_target_attr. */
9460 static const struct aarch64_attribute_info aarch64_attributes
[] =
9462 { "general-regs-only", aarch64_attr_mask
, false, NULL
,
9463 OPT_mgeneral_regs_only
},
9464 { "fix-cortex-a53-835769", aarch64_attr_bool
, true, NULL
,
9465 OPT_mfix_cortex_a53_835769
},
9466 { "fix-cortex-a53-843419", aarch64_attr_bool
, true, NULL
,
9467 OPT_mfix_cortex_a53_843419
},
9468 { "cmodel", aarch64_attr_enum
, false, NULL
, OPT_mcmodel_
},
9469 { "strict-align", aarch64_attr_mask
, false, NULL
, OPT_mstrict_align
},
9470 { "omit-leaf-frame-pointer", aarch64_attr_bool
, true, NULL
,
9471 OPT_momit_leaf_frame_pointer
},
9472 { "tls-dialect", aarch64_attr_enum
, false, NULL
, OPT_mtls_dialect_
},
9473 { "arch", aarch64_attr_custom
, false, aarch64_handle_attr_arch
,
9475 { "cpu", aarch64_attr_custom
, false, aarch64_handle_attr_cpu
, OPT_mcpu_
},
9476 { "tune", aarch64_attr_custom
, false, aarch64_handle_attr_tune
,
9478 { "sign-return-address", aarch64_attr_enum
, false, NULL
,
9479 OPT_msign_return_address_
},
9480 { NULL
, aarch64_attr_custom
, false, NULL
, OPT____
}
9483 /* Parse ARG_STR which contains the definition of one target attribute.
9484 Show appropriate errors if any or return true if the attribute is valid.
9485 PRAGMA_OR_ATTR holds the string to use in error messages about whether
9486 we're processing a target attribute or pragma. */
9489 aarch64_process_one_target_attr (char *arg_str
, const char* pragma_or_attr
)
9491 bool invert
= false;
9493 size_t len
= strlen (arg_str
);
9497 error ("malformed target %s", pragma_or_attr
);
9501 char *str_to_check
= (char *) alloca (len
+ 1);
9502 strcpy (str_to_check
, arg_str
);
9504 /* Skip leading whitespace. */
9505 while (*str_to_check
== ' ' || *str_to_check
== '\t')
9508 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
9509 It is easier to detect and handle it explicitly here rather than going
9510 through the machinery for the rest of the target attributes in this
9512 if (*str_to_check
== '+')
9513 return aarch64_handle_attr_isa_flags (str_to_check
, pragma_or_attr
);
9515 if (len
> 3 && strncmp (str_to_check
, "no-", 3) == 0)
9520 char *arg
= strchr (str_to_check
, '=');
9522 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
9523 and point ARG to "foo". */
9529 const struct aarch64_attribute_info
*p_attr
;
9531 for (p_attr
= aarch64_attributes
; p_attr
->name
; p_attr
++)
9533 /* If the names don't match up, or the user has given an argument
9534 to an attribute that doesn't accept one, or didn't give an argument
9535 to an attribute that expects one, fail to match. */
9536 if (strcmp (str_to_check
, p_attr
->name
) != 0)
9540 bool attr_need_arg_p
= p_attr
->attr_type
== aarch64_attr_custom
9541 || p_attr
->attr_type
== aarch64_attr_enum
;
9543 if (attr_need_arg_p
^ (arg
!= NULL
))
9545 error ("target %s %qs does not accept an argument",
9546 pragma_or_attr
, str_to_check
);
9550 /* If the name matches but the attribute does not allow "no-" versions
9551 then we can't match. */
9552 if (invert
&& !p_attr
->allow_neg
)
9554 error ("target %s %qs does not allow a negated form",
9555 pragma_or_attr
, str_to_check
);
9559 switch (p_attr
->attr_type
)
9561 /* Has a custom handler registered.
9562 For example, cpu=, arch=, tune=. */
9563 case aarch64_attr_custom
:
9564 gcc_assert (p_attr
->handler
);
9565 if (!p_attr
->handler (arg
, pragma_or_attr
))
9569 /* Either set or unset a boolean option. */
9570 case aarch64_attr_bool
:
9572 struct cl_decoded_option decoded
;
9574 generate_option (p_attr
->opt_num
, NULL
, !invert
,
9575 CL_TARGET
, &decoded
);
9576 aarch64_handle_option (&global_options
, &global_options_set
,
9577 &decoded
, input_location
);
9580 /* Set or unset a bit in the target_flags. aarch64_handle_option
9581 should know what mask to apply given the option number. */
9582 case aarch64_attr_mask
:
9584 struct cl_decoded_option decoded
;
9585 /* We only need to specify the option number.
9586 aarch64_handle_option will know which mask to apply. */
9587 decoded
.opt_index
= p_attr
->opt_num
;
9588 decoded
.value
= !invert
;
9589 aarch64_handle_option (&global_options
, &global_options_set
,
9590 &decoded
, input_location
);
9593 /* Use the option setting machinery to set an option to an enum. */
9594 case aarch64_attr_enum
:
9599 valid
= opt_enum_arg_to_value (p_attr
->opt_num
, arg
,
9603 set_option (&global_options
, NULL
, p_attr
->opt_num
, value
,
9604 NULL
, DK_UNSPECIFIED
, input_location
,
9609 error ("target %s %s=%s is not valid",
9610 pragma_or_attr
, str_to_check
, arg
);
9619 /* If we reached here we either have found an attribute and validated
9620 it or didn't match any. If we matched an attribute but its arguments
9621 were malformed we will have returned false already. */
9625 /* Count how many times the character C appears in
9626 NULL-terminated string STR. */
9629 num_occurences_in_str (char c
, char *str
)
9631 unsigned int res
= 0;
9632 while (*str
!= '\0')
9643 /* Parse the tree in ARGS that contains the target attribute information
9644 and update the global target options space. PRAGMA_OR_ATTR is a string
9645 to be used in error messages, specifying whether this is processing
9646 a target attribute or a target pragma. */
9649 aarch64_process_target_attr (tree args
, const char* pragma_or_attr
)
9651 if (TREE_CODE (args
) == TREE_LIST
)
9655 tree head
= TREE_VALUE (args
);
9658 if (!aarch64_process_target_attr (head
, pragma_or_attr
))
9661 args
= TREE_CHAIN (args
);
9667 if (TREE_CODE (args
) != STRING_CST
)
9669 error ("attribute %<target%> argument not a string");
9673 size_t len
= strlen (TREE_STRING_POINTER (args
));
9674 char *str_to_check
= (char *) alloca (len
+ 1);
9675 strcpy (str_to_check
, TREE_STRING_POINTER (args
));
9679 error ("malformed target %s value", pragma_or_attr
);
9683 /* Used to catch empty spaces between commas i.e.
9684 attribute ((target ("attr1,,attr2"))). */
9685 unsigned int num_commas
= num_occurences_in_str (',', str_to_check
);
9687 /* Handle multiple target attributes separated by ','. */
9688 char *token
= strtok (str_to_check
, ",");
9690 unsigned int num_attrs
= 0;
9694 if (!aarch64_process_one_target_attr (token
, pragma_or_attr
))
9696 error ("target %s %qs is invalid", pragma_or_attr
, token
);
9700 token
= strtok (NULL
, ",");
9703 if (num_attrs
!= num_commas
+ 1)
9705 error ("malformed target %s list %qs",
9706 pragma_or_attr
, TREE_STRING_POINTER (args
));
9713 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
9714 process attribute ((target ("..."))). */
9717 aarch64_option_valid_attribute_p (tree fndecl
, tree
, tree args
, int)
9719 struct cl_target_option cur_target
;
9722 tree new_target
, new_optimize
;
9723 tree existing_target
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
9725 /* If what we're processing is the current pragma string then the
9726 target option node is already stored in target_option_current_node
9727 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
9728 having to re-parse the string. This is especially useful to keep
9729 arm_neon.h compile times down since that header contains a lot
9730 of intrinsics enclosed in pragmas. */
9731 if (!existing_target
&& args
== current_target_pragma
)
9733 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = target_option_current_node
;
9736 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
9738 old_optimize
= build_optimization_node (&global_options
);
9739 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
9741 /* If the function changed the optimization levels as well as setting
9742 target options, start with the optimizations specified. */
9743 if (func_optimize
&& func_optimize
!= old_optimize
)
9744 cl_optimization_restore (&global_options
,
9745 TREE_OPTIMIZATION (func_optimize
));
9747 /* Save the current target options to restore at the end. */
9748 cl_target_option_save (&cur_target
, &global_options
);
9750 /* If fndecl already has some target attributes applied to it, unpack
9751 them so that we add this attribute on top of them, rather than
9752 overwriting them. */
9753 if (existing_target
)
9755 struct cl_target_option
*existing_options
9756 = TREE_TARGET_OPTION (existing_target
);
9758 if (existing_options
)
9759 cl_target_option_restore (&global_options
, existing_options
);
9762 cl_target_option_restore (&global_options
,
9763 TREE_TARGET_OPTION (target_option_current_node
));
9766 ret
= aarch64_process_target_attr (args
, "attribute");
9768 /* Set up any additional state. */
9771 aarch64_override_options_internal (&global_options
);
9772 /* Initialize SIMD builtins if we haven't already.
9773 Set current_target_pragma to NULL for the duration so that
9774 the builtin initialization code doesn't try to tag the functions
9775 being built with the attributes specified by any current pragma, thus
9776 going into an infinite recursion. */
9779 tree saved_current_target_pragma
= current_target_pragma
;
9780 current_target_pragma
= NULL
;
9781 aarch64_init_simd_builtins ();
9782 current_target_pragma
= saved_current_target_pragma
;
9784 new_target
= build_target_option_node (&global_options
);
9789 new_optimize
= build_optimization_node (&global_options
);
9793 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
9795 if (old_optimize
!= new_optimize
)
9796 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
9799 cl_target_option_restore (&global_options
, &cur_target
);
9801 if (old_optimize
!= new_optimize
)
9802 cl_optimization_restore (&global_options
,
9803 TREE_OPTIMIZATION (old_optimize
));
9807 /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
9808 tri-bool options (yes, no, don't care) and the default value is
9809 DEF, determine whether to reject inlining. */
9812 aarch64_tribools_ok_for_inlining_p (int caller
, int callee
,
9813 int dont_care
, int def
)
9815 /* If the callee doesn't care, always allow inlining. */
9816 if (callee
== dont_care
)
9819 /* If the caller doesn't care, always allow inlining. */
9820 if (caller
== dont_care
)
9823 /* Otherwise, allow inlining if either the callee and caller values
9824 agree, or if the callee is using the default value. */
9825 return (callee
== caller
|| callee
== def
);
9828 /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
9829 to inline CALLEE into CALLER based on target-specific info.
9830 Make sure that the caller and callee have compatible architectural
9831 features. Then go through the other possible target attributes
9832 and see if they can block inlining. Try not to reject always_inline
9833 callees unless they are incompatible architecturally. */
9836 aarch64_can_inline_p (tree caller
, tree callee
)
9838 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
9839 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
9841 /* If callee has no option attributes, then it is ok to inline. */
9845 struct cl_target_option
*caller_opts
9846 = TREE_TARGET_OPTION (caller_tree
? caller_tree
9847 : target_option_default_node
);
9849 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
9852 /* Callee's ISA flags should be a subset of the caller's. */
9853 if ((caller_opts
->x_aarch64_isa_flags
& callee_opts
->x_aarch64_isa_flags
)
9854 != callee_opts
->x_aarch64_isa_flags
)
9857 /* Allow non-strict aligned functions inlining into strict
9859 if ((TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)
9860 != TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
))
9861 && !(!TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
)
9862 && TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)))
9865 bool always_inline
= lookup_attribute ("always_inline",
9866 DECL_ATTRIBUTES (callee
));
9868 /* If the architectural features match up and the callee is always_inline
9869 then the other attributes don't matter. */
9873 if (caller_opts
->x_aarch64_cmodel_var
9874 != callee_opts
->x_aarch64_cmodel_var
)
9877 if (caller_opts
->x_aarch64_tls_dialect
9878 != callee_opts
->x_aarch64_tls_dialect
)
9881 /* Honour explicit requests to workaround errata. */
9882 if (!aarch64_tribools_ok_for_inlining_p (
9883 caller_opts
->x_aarch64_fix_a53_err835769
,
9884 callee_opts
->x_aarch64_fix_a53_err835769
,
9885 2, TARGET_FIX_ERR_A53_835769_DEFAULT
))
9888 if (!aarch64_tribools_ok_for_inlining_p (
9889 caller_opts
->x_aarch64_fix_a53_err843419
,
9890 callee_opts
->x_aarch64_fix_a53_err843419
,
9891 2, TARGET_FIX_ERR_A53_843419
))
9894 /* If the user explicitly specified -momit-leaf-frame-pointer for the
9895 caller and calle and they don't match up, reject inlining. */
9896 if (!aarch64_tribools_ok_for_inlining_p (
9897 caller_opts
->x_flag_omit_leaf_frame_pointer
,
9898 callee_opts
->x_flag_omit_leaf_frame_pointer
,
9902 /* If the callee has specific tuning overrides, respect them. */
9903 if (callee_opts
->x_aarch64_override_tune_string
!= NULL
9904 && caller_opts
->x_aarch64_override_tune_string
== NULL
)
9907 /* If the user specified tuning override strings for the
9908 caller and callee and they don't match up, reject inlining.
9909 We just do a string compare here, we don't analyze the meaning
9910 of the string, as it would be too costly for little gain. */
9911 if (callee_opts
->x_aarch64_override_tune_string
9912 && caller_opts
->x_aarch64_override_tune_string
9913 && (strcmp (callee_opts
->x_aarch64_override_tune_string
,
9914 caller_opts
->x_aarch64_override_tune_string
) != 0))
9920 /* Return true if SYMBOL_REF X binds locally. */
9923 aarch64_symbol_binds_local_p (const_rtx x
)
9925 return (SYMBOL_REF_DECL (x
)
9926 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
9927 : SYMBOL_REF_LOCAL_P (x
));
9930 /* Return true if SYMBOL_REF X is thread local */
9932 aarch64_tls_symbol_p (rtx x
)
9934 if (! TARGET_HAVE_TLS
)
9937 if (GET_CODE (x
) != SYMBOL_REF
)
9940 return SYMBOL_REF_TLS_MODEL (x
) != 0;
9943 /* Classify a TLS symbol into one of the TLS kinds. */
9944 enum aarch64_symbol_type
9945 aarch64_classify_tls_symbol (rtx x
)
9947 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
9951 case TLS_MODEL_GLOBAL_DYNAMIC
:
9952 case TLS_MODEL_LOCAL_DYNAMIC
:
9953 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
9955 case TLS_MODEL_INITIAL_EXEC
:
9956 switch (aarch64_cmodel
)
9958 case AARCH64_CMODEL_TINY
:
9959 case AARCH64_CMODEL_TINY_PIC
:
9960 return SYMBOL_TINY_TLSIE
;
9962 return SYMBOL_SMALL_TLSIE
;
9965 case TLS_MODEL_LOCAL_EXEC
:
9966 if (aarch64_tls_size
== 12)
9967 return SYMBOL_TLSLE12
;
9968 else if (aarch64_tls_size
== 24)
9969 return SYMBOL_TLSLE24
;
9970 else if (aarch64_tls_size
== 32)
9971 return SYMBOL_TLSLE32
;
9972 else if (aarch64_tls_size
== 48)
9973 return SYMBOL_TLSLE48
;
9977 case TLS_MODEL_EMULATED
:
9978 case TLS_MODEL_NONE
:
9979 return SYMBOL_FORCE_TO_MEM
;
9986 /* Return the method that should be used to access SYMBOL_REF or
9989 enum aarch64_symbol_type
9990 aarch64_classify_symbol (rtx x
, rtx offset
)
9992 if (GET_CODE (x
) == LABEL_REF
)
9994 switch (aarch64_cmodel
)
9996 case AARCH64_CMODEL_LARGE
:
9997 return SYMBOL_FORCE_TO_MEM
;
9999 case AARCH64_CMODEL_TINY_PIC
:
10000 case AARCH64_CMODEL_TINY
:
10001 return SYMBOL_TINY_ABSOLUTE
;
10003 case AARCH64_CMODEL_SMALL_SPIC
:
10004 case AARCH64_CMODEL_SMALL_PIC
:
10005 case AARCH64_CMODEL_SMALL
:
10006 return SYMBOL_SMALL_ABSOLUTE
;
10009 gcc_unreachable ();
10013 if (GET_CODE (x
) == SYMBOL_REF
)
10015 if (aarch64_tls_symbol_p (x
))
10016 return aarch64_classify_tls_symbol (x
);
10018 switch (aarch64_cmodel
)
10020 case AARCH64_CMODEL_TINY
:
10021 /* When we retrieve symbol + offset address, we have to make sure
10022 the offset does not cause overflow of the final address. But
10023 we have no way of knowing the address of symbol at compile time
10024 so we can't accurately say if the distance between the PC and
10025 symbol + offset is outside the addressible range of +/-1M in the
10026 TINY code model. So we rely on images not being greater than
10027 1M and cap the offset at 1M and anything beyond 1M will have to
10028 be loaded using an alternative mechanism. Furthermore if the
10029 symbol is a weak reference to something that isn't known to
10030 resolve to a symbol in this module, then force to memory. */
10031 if ((SYMBOL_REF_WEAK (x
)
10032 && !aarch64_symbol_binds_local_p (x
))
10033 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
10034 return SYMBOL_FORCE_TO_MEM
;
10035 return SYMBOL_TINY_ABSOLUTE
;
10037 case AARCH64_CMODEL_SMALL
:
10038 /* Same reasoning as the tiny code model, but the offset cap here is
10040 if ((SYMBOL_REF_WEAK (x
)
10041 && !aarch64_symbol_binds_local_p (x
))
10042 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
10043 HOST_WIDE_INT_C (4294967264)))
10044 return SYMBOL_FORCE_TO_MEM
;
10045 return SYMBOL_SMALL_ABSOLUTE
;
10047 case AARCH64_CMODEL_TINY_PIC
:
10048 if (!aarch64_symbol_binds_local_p (x
))
10049 return SYMBOL_TINY_GOT
;
10050 return SYMBOL_TINY_ABSOLUTE
;
10052 case AARCH64_CMODEL_SMALL_SPIC
:
10053 case AARCH64_CMODEL_SMALL_PIC
:
10054 if (!aarch64_symbol_binds_local_p (x
))
10055 return (aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
10056 ? SYMBOL_SMALL_GOT_28K
: SYMBOL_SMALL_GOT_4G
);
10057 return SYMBOL_SMALL_ABSOLUTE
;
10059 case AARCH64_CMODEL_LARGE
:
10060 /* This is alright even in PIC code as the constant
10061 pool reference is always PC relative and within
10062 the same translation unit. */
10063 if (CONSTANT_POOL_ADDRESS_P (x
))
10064 return SYMBOL_SMALL_ABSOLUTE
;
10066 return SYMBOL_FORCE_TO_MEM
;
10069 gcc_unreachable ();
10073 /* By default push everything into the constant pool. */
10074 return SYMBOL_FORCE_TO_MEM
;
10078 aarch64_constant_address_p (rtx x
)
10080 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
10084 aarch64_legitimate_pic_operand_p (rtx x
)
10086 if (GET_CODE (x
) == SYMBOL_REF
10087 || (GET_CODE (x
) == CONST
10088 && GET_CODE (XEXP (x
, 0)) == PLUS
10089 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
10095 /* Return true if X holds either a quarter-precision or
10096 floating-point +0.0 constant. */
10098 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
10100 if (!CONST_DOUBLE_P (x
))
10103 if (aarch64_float_const_zero_rtx_p (x
))
10106 /* We only handle moving 0.0 to a TFmode register. */
10107 if (!(mode
== SFmode
|| mode
== DFmode
))
10110 return aarch64_float_const_representable_p (x
);
10114 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
10116 /* Do not allow vector struct mode constants. We could support
10117 0 and -1 easily, but they need support in aarch64-simd.md. */
10118 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
10121 /* This could probably go away because
10122 we now decompose CONST_INTs according to expand_mov_immediate. */
10123 if ((GET_CODE (x
) == CONST_VECTOR
10124 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
10125 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
10126 return !targetm
.cannot_force_const_mem (mode
, x
);
10128 if (GET_CODE (x
) == HIGH
10129 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
10132 /* Treat symbols as constants. Avoid TLS symbols as they are complex,
10133 so spilling them is better than rematerialization. */
10134 if (SYMBOL_REF_P (x
) && !SYMBOL_REF_TLS_MODEL (x
))
10137 return aarch64_constant_address_p (x
);
10141 aarch64_load_tp (rtx target
)
10144 || GET_MODE (target
) != Pmode
10145 || !register_operand (target
, Pmode
))
10146 target
= gen_reg_rtx (Pmode
);
10148 /* Can return in any reg. */
10149 emit_insn (gen_aarch64_load_tp_hard (target
));
10153 /* On AAPCS systems, this is the "struct __va_list". */
10154 static GTY(()) tree va_list_type
;
10156 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
10157 Return the type to use as __builtin_va_list.
10159 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
10171 aarch64_build_builtin_va_list (void)
10174 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10176 /* Create the type. */
10177 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
10178 /* Give it the required name. */
10179 va_list_name
= build_decl (BUILTINS_LOCATION
,
10181 get_identifier ("__va_list"),
10183 DECL_ARTIFICIAL (va_list_name
) = 1;
10184 TYPE_NAME (va_list_type
) = va_list_name
;
10185 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
10187 /* Create the fields. */
10188 f_stack
= build_decl (BUILTINS_LOCATION
,
10189 FIELD_DECL
, get_identifier ("__stack"),
10191 f_grtop
= build_decl (BUILTINS_LOCATION
,
10192 FIELD_DECL
, get_identifier ("__gr_top"),
10194 f_vrtop
= build_decl (BUILTINS_LOCATION
,
10195 FIELD_DECL
, get_identifier ("__vr_top"),
10197 f_groff
= build_decl (BUILTINS_LOCATION
,
10198 FIELD_DECL
, get_identifier ("__gr_offs"),
10199 integer_type_node
);
10200 f_vroff
= build_decl (BUILTINS_LOCATION
,
10201 FIELD_DECL
, get_identifier ("__vr_offs"),
10202 integer_type_node
);
10204 /* Tell tree-stdarg pass about our internal offset fields.
10205 NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision
10206 purpose to identify whether the code is updating va_list internal
10207 offset fields through irregular way. */
10208 va_list_gpr_counter_field
= f_groff
;
10209 va_list_fpr_counter_field
= f_vroff
;
10211 DECL_ARTIFICIAL (f_stack
) = 1;
10212 DECL_ARTIFICIAL (f_grtop
) = 1;
10213 DECL_ARTIFICIAL (f_vrtop
) = 1;
10214 DECL_ARTIFICIAL (f_groff
) = 1;
10215 DECL_ARTIFICIAL (f_vroff
) = 1;
10217 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
10218 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
10219 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
10220 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
10221 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
10223 TYPE_FIELDS (va_list_type
) = f_stack
;
10224 DECL_CHAIN (f_stack
) = f_grtop
;
10225 DECL_CHAIN (f_grtop
) = f_vrtop
;
10226 DECL_CHAIN (f_vrtop
) = f_groff
;
10227 DECL_CHAIN (f_groff
) = f_vroff
;
10229 /* Compute its layout. */
10230 layout_type (va_list_type
);
10232 return va_list_type
;
10235 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
10237 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
10239 const CUMULATIVE_ARGS
*cum
;
10240 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10241 tree stack
, grtop
, vrtop
, groff
, vroff
;
10243 int gr_save_area_size
= cfun
->va_list_gpr_size
;
10244 int vr_save_area_size
= cfun
->va_list_fpr_size
;
10247 cum
= &crtl
->args
.info
;
10248 if (cfun
->va_list_gpr_size
)
10249 gr_save_area_size
= MIN ((NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
,
10250 cfun
->va_list_gpr_size
);
10251 if (cfun
->va_list_fpr_size
)
10252 vr_save_area_size
= MIN ((NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
)
10253 * UNITS_PER_VREG
, cfun
->va_list_fpr_size
);
10257 gcc_assert (cum
->aapcs_nvrn
== 0);
10258 vr_save_area_size
= 0;
10261 f_stack
= TYPE_FIELDS (va_list_type_node
);
10262 f_grtop
= DECL_CHAIN (f_stack
);
10263 f_vrtop
= DECL_CHAIN (f_grtop
);
10264 f_groff
= DECL_CHAIN (f_vrtop
);
10265 f_vroff
= DECL_CHAIN (f_groff
);
10267 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
10269 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
10271 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
10273 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
10275 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
10278 /* Emit code to initialize STACK, which points to the next varargs stack
10279 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
10280 by named arguments. STACK is 8-byte aligned. */
10281 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
10282 if (cum
->aapcs_stack_size
> 0)
10283 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
10284 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
10285 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10287 /* Emit code to initialize GRTOP, the top of the GR save area.
10288 virtual_incoming_args_rtx should have been 16 byte aligned. */
10289 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
10290 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
10291 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10293 /* Emit code to initialize VRTOP, the top of the VR save area.
10294 This address is gr_save_area_bytes below GRTOP, rounded
10295 down to the next 16-byte boundary. */
10296 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
10297 vr_offset
= ROUND_UP (gr_save_area_size
,
10298 STACK_BOUNDARY
/ BITS_PER_UNIT
);
10301 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
10302 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
10303 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10305 /* Emit code to initialize GROFF, the offset from GRTOP of the
10306 next GPR argument. */
10307 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
10308 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
10309 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10311 /* Likewise emit code to initialize VROFF, the offset from FTOP
10312 of the next VR argument. */
10313 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
10314 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
10315 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
10318 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
10321 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
10322 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
10326 bool is_ha
; /* is HFA or HVA. */
10327 bool dw_align
; /* double-word align. */
10328 machine_mode ag_mode
= VOIDmode
;
10332 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
10333 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
10334 HOST_WIDE_INT size
, rsize
, adjust
, align
;
10335 tree t
, u
, cond1
, cond2
;
10337 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
10339 type
= build_pointer_type (type
);
10341 mode
= TYPE_MODE (type
);
10343 f_stack
= TYPE_FIELDS (va_list_type_node
);
10344 f_grtop
= DECL_CHAIN (f_stack
);
10345 f_vrtop
= DECL_CHAIN (f_grtop
);
10346 f_groff
= DECL_CHAIN (f_vrtop
);
10347 f_vroff
= DECL_CHAIN (f_groff
);
10349 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
10350 f_stack
, NULL_TREE
);
10351 size
= int_size_in_bytes (type
);
10352 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
10356 if (aarch64_vfp_is_call_or_return_candidate (mode
,
10362 /* TYPE passed in fp/simd registers. */
10364 aarch64_err_no_fpadvsimd (mode
, "varargs");
10366 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
10367 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
10368 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
10369 unshare_expr (valist
), f_vroff
, NULL_TREE
);
10371 rsize
= nregs
* UNITS_PER_VREG
;
10375 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
10376 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
10378 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
10379 && size
< UNITS_PER_VREG
)
10381 adjust
= UNITS_PER_VREG
- size
;
10386 /* TYPE passed in general registers. */
10387 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
10388 unshare_expr (valist
), f_grtop
, NULL_TREE
);
10389 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
10390 unshare_expr (valist
), f_groff
, NULL_TREE
);
10391 rsize
= ROUND_UP (size
, UNITS_PER_WORD
);
10392 nregs
= rsize
/ UNITS_PER_WORD
;
10397 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
10398 && size
< UNITS_PER_WORD
)
10400 adjust
= UNITS_PER_WORD
- size
;
10404 /* Get a local temporary for the field value. */
10405 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
10407 /* Emit code to branch if off >= 0. */
10408 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
10409 build_int_cst (TREE_TYPE (off
), 0));
10410 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
10414 /* Emit: offs = (offs + 15) & -16. */
10415 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
10416 build_int_cst (TREE_TYPE (off
), 15));
10417 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
10418 build_int_cst (TREE_TYPE (off
), -16));
10419 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
10424 /* Update ap.__[g|v]r_offs */
10425 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
10426 build_int_cst (TREE_TYPE (off
), rsize
));
10427 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
10431 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
10433 /* [cond2] if (ap.__[g|v]r_offs > 0) */
10434 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
10435 build_int_cst (TREE_TYPE (f_off
), 0));
10436 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
10438 /* String up: make sure the assignment happens before the use. */
10439 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
10440 COND_EXPR_ELSE (cond1
) = t
;
10442 /* Prepare the trees handling the argument that is passed on the stack;
10443 the top level node will store in ON_STACK. */
10444 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
10447 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
10448 t
= fold_convert (intDI_type_node
, arg
);
10449 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
10450 build_int_cst (TREE_TYPE (t
), 15));
10451 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10452 build_int_cst (TREE_TYPE (t
), -16));
10453 t
= fold_convert (TREE_TYPE (arg
), t
);
10454 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
10458 /* Advance ap.__stack */
10459 t
= fold_convert (intDI_type_node
, arg
);
10460 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
10461 build_int_cst (TREE_TYPE (t
), size
+ 7));
10462 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
10463 build_int_cst (TREE_TYPE (t
), -8));
10464 t
= fold_convert (TREE_TYPE (arg
), t
);
10465 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
10466 /* String up roundup and advance. */
10468 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
10469 /* String up with arg */
10470 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
10471 /* Big-endianness related address adjustment. */
10472 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
10473 && size
< UNITS_PER_WORD
)
10475 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
10476 size_int (UNITS_PER_WORD
- size
));
10477 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
10480 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
10481 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
10483 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
10486 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
10487 build_int_cst (TREE_TYPE (off
), adjust
));
10489 t
= fold_convert (sizetype
, t
);
10490 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
10494 /* type ha; // treat as "struct {ftype field[n];}"
10495 ... [computing offs]
10496 for (i = 0; i <nregs; ++i, offs += 16)
10497 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
10500 tree tmp_ha
, field_t
, field_ptr_t
;
10502 /* Declare a local variable. */
10503 tmp_ha
= create_tmp_var_raw (type
, "ha");
10504 gimple_add_tmp_var (tmp_ha
);
10506 /* Establish the base type. */
10510 field_t
= float_type_node
;
10511 field_ptr_t
= float_ptr_type_node
;
10514 field_t
= double_type_node
;
10515 field_ptr_t
= double_ptr_type_node
;
10518 field_t
= long_double_type_node
;
10519 field_ptr_t
= long_double_ptr_type_node
;
10522 field_t
= aarch64_fp16_type_node
;
10523 field_ptr_t
= aarch64_fp16_ptr_type_node
;
10528 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
10529 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
10530 field_ptr_t
= build_pointer_type (field_t
);
10537 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
10538 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
10540 t
= fold_convert (field_ptr_t
, addr
);
10541 t
= build2 (MODIFY_EXPR
, field_t
,
10542 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
10543 build1 (INDIRECT_REF
, field_t
, t
));
10545 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
10546 for (i
= 1; i
< nregs
; ++i
)
10548 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
10549 u
= fold_convert (field_ptr_t
, addr
);
10550 u
= build2 (MODIFY_EXPR
, field_t
,
10551 build2 (MEM_REF
, field_t
, tmp_ha
,
10552 build_int_cst (field_ptr_t
,
10554 int_size_in_bytes (field_t
)))),
10555 build1 (INDIRECT_REF
, field_t
, u
));
10556 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
10559 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
10560 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
10563 COND_EXPR_ELSE (cond2
) = t
;
10564 addr
= fold_convert (build_pointer_type (type
), cond1
);
10565 addr
= build_va_arg_indirect_ref (addr
);
10568 addr
= build_va_arg_indirect_ref (addr
);
10573 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
10576 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
10577 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
10580 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
10581 CUMULATIVE_ARGS local_cum
;
10582 int gr_saved
= cfun
->va_list_gpr_size
;
10583 int vr_saved
= cfun
->va_list_fpr_size
;
10585 /* The caller has advanced CUM up to, but not beyond, the last named
10586 argument. Advance a local copy of CUM past the last "real" named
10587 argument, to find out how many registers are left over. */
10589 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
10591 /* Found out how many registers we need to save.
10592 Honor tree-stdvar analysis results. */
10593 if (cfun
->va_list_gpr_size
)
10594 gr_saved
= MIN (NUM_ARG_REGS
- local_cum
.aapcs_ncrn
,
10595 cfun
->va_list_gpr_size
/ UNITS_PER_WORD
);
10596 if (cfun
->va_list_fpr_size
)
10597 vr_saved
= MIN (NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
,
10598 cfun
->va_list_fpr_size
/ UNITS_PER_VREG
);
10602 gcc_assert (local_cum
.aapcs_nvrn
== 0);
10612 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
10613 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
10614 - gr_saved
* UNITS_PER_WORD
);
10615 mem
= gen_frame_mem (BLKmode
, ptr
);
10616 set_mem_alias_set (mem
, get_varargs_alias_set ());
10618 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
10623 /* We can't use move_block_from_reg, because it will use
10624 the wrong mode, storing D regs only. */
10625 machine_mode mode
= TImode
;
10626 int off
, i
, vr_start
;
10628 /* Set OFF to the offset from virtual_incoming_args_rtx of
10629 the first vector register. The VR save area lies below
10630 the GR one, and is aligned to 16 bytes. */
10631 off
= -ROUND_UP (gr_saved
* UNITS_PER_WORD
,
10632 STACK_BOUNDARY
/ BITS_PER_UNIT
);
10633 off
-= vr_saved
* UNITS_PER_VREG
;
10635 vr_start
= V0_REGNUM
+ local_cum
.aapcs_nvrn
;
10636 for (i
= 0; i
< vr_saved
; ++i
)
10640 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
10641 mem
= gen_frame_mem (mode
, ptr
);
10642 set_mem_alias_set (mem
, get_varargs_alias_set ());
10643 aarch64_emit_move (mem
, gen_rtx_REG (mode
, vr_start
+ i
));
10644 off
+= UNITS_PER_VREG
;
10649 /* We don't save the size into *PRETEND_SIZE because we want to avoid
10650 any complication of having crtl->args.pretend_args_size changed. */
10651 cfun
->machine
->frame
.saved_varargs_size
10652 = (ROUND_UP (gr_saved
* UNITS_PER_WORD
,
10653 STACK_BOUNDARY
/ BITS_PER_UNIT
)
10654 + vr_saved
* UNITS_PER_VREG
);
10658 aarch64_conditional_register_usage (void)
10663 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
10666 call_used_regs
[i
] = 1;
10671 /* Walk down the type tree of TYPE counting consecutive base elements.
10672 If *MODEP is VOIDmode, then set it to the first valid floating point
10673 type. If a non-floating point type is found, or if a floating point
10674 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
10675 otherwise return the count in the sub-tree. */
10677 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
10680 HOST_WIDE_INT size
;
10682 switch (TREE_CODE (type
))
10685 mode
= TYPE_MODE (type
);
10686 if (mode
!= DFmode
&& mode
!= SFmode
10687 && mode
!= TFmode
&& mode
!= HFmode
)
10690 if (*modep
== VOIDmode
)
10693 if (*modep
== mode
)
10699 mode
= TYPE_MODE (TREE_TYPE (type
));
10700 if (mode
!= DFmode
&& mode
!= SFmode
10701 && mode
!= TFmode
&& mode
!= HFmode
)
10704 if (*modep
== VOIDmode
)
10707 if (*modep
== mode
)
10713 /* Use V2SImode and V4SImode as representatives of all 64-bit
10714 and 128-bit vector types. */
10715 size
= int_size_in_bytes (type
);
10728 if (*modep
== VOIDmode
)
10731 /* Vector modes are considered to be opaque: two vectors are
10732 equivalent for the purposes of being homogeneous aggregates
10733 if they are the same size. */
10734 if (*modep
== mode
)
10742 tree index
= TYPE_DOMAIN (type
);
10744 /* Can't handle incomplete types nor sizes that are not
10746 if (!COMPLETE_TYPE_P (type
)
10747 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
10750 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
10753 || !TYPE_MAX_VALUE (index
)
10754 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
10755 || !TYPE_MIN_VALUE (index
)
10756 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
10760 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
10761 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
10763 /* There must be no padding. */
10764 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
10776 /* Can't handle incomplete types nor sizes that are not
10778 if (!COMPLETE_TYPE_P (type
)
10779 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
10782 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
10784 if (TREE_CODE (field
) != FIELD_DECL
)
10787 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
10790 count
+= sub_count
;
10793 /* There must be no padding. */
10794 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
10801 case QUAL_UNION_TYPE
:
10803 /* These aren't very interesting except in a degenerate case. */
10808 /* Can't handle incomplete types nor sizes that are not
10810 if (!COMPLETE_TYPE_P (type
)
10811 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
10814 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
10816 if (TREE_CODE (field
) != FIELD_DECL
)
10819 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
10822 count
= count
> sub_count
? count
: sub_count
;
10825 /* There must be no padding. */
10826 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
10839 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
10840 type as described in AAPCS64 \S 4.1.2.
10842 See the comment above aarch64_composite_type_p for the notes on MODE. */
10845 aarch64_short_vector_p (const_tree type
,
10848 HOST_WIDE_INT size
= -1;
10850 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
10851 size
= int_size_in_bytes (type
);
10852 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
10853 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
10854 size
= GET_MODE_SIZE (mode
);
10856 return (size
== 8 || size
== 16);
10859 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
10860 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
10861 array types. The C99 floating-point complex types are also considered
10862 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
10863 types, which are GCC extensions and out of the scope of AAPCS64, are
10864 treated as composite types here as well.
10866 Note that MODE itself is not sufficient in determining whether a type
10867 is such a composite type or not. This is because
10868 stor-layout.c:compute_record_mode may have already changed the MODE
10869 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
10870 structure with only one field may have its MODE set to the mode of the
10871 field. Also an integer mode whose size matches the size of the
10872 RECORD_TYPE type may be used to substitute the original mode
10873 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
10874 solely relied on. */
10877 aarch64_composite_type_p (const_tree type
,
10880 if (aarch64_short_vector_p (type
, mode
))
10883 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
10886 if (mode
== BLKmode
10887 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
10888 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
10894 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
10895 shall be passed or returned in simd/fp register(s) (providing these
10896 parameter passing registers are available).
10898 Upon successful return, *COUNT returns the number of needed registers,
10899 *BASE_MODE returns the mode of the individual register and when IS_HAF
10900 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
10901 floating-point aggregate or a homogeneous short-vector aggregate. */
10904 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
10906 machine_mode
*base_mode
,
10910 machine_mode new_mode
= VOIDmode
;
10911 bool composite_p
= aarch64_composite_type_p (type
, mode
);
10913 if (is_ha
!= NULL
) *is_ha
= false;
10915 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
10916 || aarch64_short_vector_p (type
, mode
))
10921 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10923 if (is_ha
!= NULL
) *is_ha
= true;
10925 new_mode
= GET_MODE_INNER (mode
);
10927 else if (type
&& composite_p
)
10929 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
10931 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
10933 if (is_ha
!= NULL
) *is_ha
= true;
10942 *base_mode
= new_mode
;
10946 /* Implement TARGET_STRUCT_VALUE_RTX. */
10949 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
10950 int incoming ATTRIBUTE_UNUSED
)
10952 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
10955 /* Implements target hook vector_mode_supported_p. */
10957 aarch64_vector_mode_supported_p (machine_mode mode
)
10960 && (mode
== V4SImode
|| mode
== V8HImode
10961 || mode
== V16QImode
|| mode
== V2DImode
10962 || mode
== V2SImode
|| mode
== V4HImode
10963 || mode
== V8QImode
|| mode
== V2SFmode
10964 || mode
== V4SFmode
|| mode
== V2DFmode
10965 || mode
== V4HFmode
|| mode
== V8HFmode
10966 || mode
== V1DFmode
))
10972 /* Return appropriate SIMD container
10973 for MODE within a vector of WIDTH bits. */
10974 static machine_mode
10975 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
10977 gcc_assert (width
== 64 || width
== 128);
11020 /* Return 128-bit container as the preferred SIMD mode for MODE. */
11021 static machine_mode
11022 aarch64_preferred_simd_mode (machine_mode mode
)
11024 return aarch64_simd_container_mode (mode
, 128);
11027 /* Return the bitmask of possible vector sizes for the vectorizer
11028 to iterate over. */
11029 static unsigned int
11030 aarch64_autovectorize_vector_sizes (void)
11035 /* Implement TARGET_MANGLE_TYPE. */
11037 static const char *
11038 aarch64_mangle_type (const_tree type
)
11040 /* The AArch64 ABI documents say that "__va_list" has to be
11041 managled as if it is in the "std" namespace. */
11042 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
11043 return "St9__va_list";
11045 /* Half-precision float. */
11046 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
11049 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
11051 if (TYPE_NAME (type
) != NULL
)
11052 return aarch64_mangle_builtin_type (type
);
11054 /* Use the default mangling. */
11058 /* Find the first rtx_insn before insn that will generate an assembly
11062 aarch64_prev_real_insn (rtx_insn
*insn
)
11069 insn
= prev_real_insn (insn
);
11071 while (insn
&& recog_memoized (insn
) < 0);
11077 is_madd_op (enum attr_type t1
)
11080 /* A number of these may be AArch32 only. */
11081 enum attr_type mlatypes
[] = {
11082 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
11083 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
11084 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
11087 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
11089 if (t1
== mlatypes
[i
])
11096 /* Check if there is a register dependency between a load and the insn
11097 for which we hold recog_data. */
11100 dep_between_memop_and_curr (rtx memop
)
11105 gcc_assert (GET_CODE (memop
) == SET
);
11107 if (!REG_P (SET_DEST (memop
)))
11110 load_reg
= SET_DEST (memop
);
11111 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
11113 rtx operand
= recog_data
.operand
[opno
];
11114 if (REG_P (operand
)
11115 && reg_overlap_mentioned_p (load_reg
, operand
))
11123 /* When working around the Cortex-A53 erratum 835769,
11124 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
11125 instruction and has a preceding memory instruction such that a NOP
11126 should be inserted between them. */
11129 aarch64_madd_needs_nop (rtx_insn
* insn
)
11131 enum attr_type attr_type
;
11135 if (!TARGET_FIX_ERR_A53_835769
)
11138 if (!INSN_P (insn
) || recog_memoized (insn
) < 0)
11141 attr_type
= get_attr_type (insn
);
11142 if (!is_madd_op (attr_type
))
11145 prev
= aarch64_prev_real_insn (insn
);
11146 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
11147 Restore recog state to INSN to avoid state corruption. */
11148 extract_constrain_insn_cached (insn
);
11150 if (!prev
|| !contains_mem_rtx_p (PATTERN (prev
)))
11153 body
= single_set (prev
);
11155 /* If the previous insn is a memory op and there is no dependency between
11156 it and the DImode madd, emit a NOP between them. If body is NULL then we
11157 have a complex memory operation, probably a load/store pair.
11158 Be conservative for now and emit a NOP. */
11159 if (GET_MODE (recog_data
.operand
[0]) == DImode
11160 && (!body
|| !dep_between_memop_and_curr (body
)))
11168 /* Implement FINAL_PRESCAN_INSN. */
11171 aarch64_final_prescan_insn (rtx_insn
*insn
)
11173 if (aarch64_madd_needs_nop (insn
))
11174 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
11178 /* Return the equivalent letter for size. */
11180 sizetochar (int size
)
11184 case 64: return 'd';
11185 case 32: return 's';
11186 case 16: return 'h';
11187 case 8 : return 'b';
11188 default: gcc_unreachable ();
11192 /* Return true iff x is a uniform vector of floating-point
11193 constants, and the constant can be represented in
11194 quarter-precision form. Note, as aarch64_float_const_representable
11195 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
11197 aarch64_vect_float_const_representable_p (rtx x
)
11200 return (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_FLOAT
11201 && const_vec_duplicate_p (x
, &elt
)
11202 && aarch64_float_const_representable_p (elt
));
11205 /* Return true for valid and false for invalid. */
11207 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
11208 struct simd_immediate_info
*info
)
11210 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
11212 for (i = 0; i < idx; i += (STRIDE)) \
11217 immtype = (CLASS); \
11218 elsize = (ELSIZE); \
11219 eshift = (SHIFT); \
11224 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
11225 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
11226 unsigned char bytes
[16];
11227 int immtype
= -1, matches
;
11228 unsigned int invmask
= inverse
? 0xff : 0;
11231 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
11233 if (! (aarch64_simd_imm_zero_p (op
, mode
)
11234 || aarch64_vect_float_const_representable_p (op
)))
11239 info
->value
= CONST_VECTOR_ELT (op
, 0);
11240 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
11248 /* Splat vector constant out into a byte vector. */
11249 for (i
= 0; i
< n_elts
; i
++)
11251 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
11252 it must be laid out in the vector register in reverse order. */
11253 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
11254 unsigned HOST_WIDE_INT elpart
;
11256 gcc_assert (CONST_INT_P (el
));
11257 elpart
= INTVAL (el
);
11259 for (unsigned int byte
= 0; byte
< innersize
; byte
++)
11261 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
11262 elpart
>>= BITS_PER_UNIT
;
11267 /* Sanity check. */
11268 gcc_assert (idx
== GET_MODE_SIZE (mode
));
11272 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
11273 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
11275 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11276 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
11278 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11279 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
11281 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11282 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
11284 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
11286 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
11288 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
11289 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
11291 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11292 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
11294 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11295 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
11297 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11298 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
11300 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
11302 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
11304 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
11305 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
11307 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
11308 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
11310 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
11311 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
11313 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
11314 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
11316 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
11318 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
11319 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
11328 info
->element_width
= elsize
;
11329 info
->mvn
= emvn
!= 0;
11330 info
->shift
= eshift
;
11332 unsigned HOST_WIDE_INT imm
= 0;
11334 if (immtype
>= 12 && immtype
<= 15)
11337 /* Un-invert bytes of recognized vector, if necessary. */
11339 for (i
= 0; i
< idx
; i
++)
11340 bytes
[i
] ^= invmask
;
11344 /* FIXME: Broken on 32-bit H_W_I hosts. */
11345 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
11347 for (i
= 0; i
< 8; i
++)
11348 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
11349 << (i
* BITS_PER_UNIT
);
11352 info
->value
= GEN_INT (imm
);
11356 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
11357 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
11359 /* Construct 'abcdefgh' because the assembler cannot handle
11360 generic constants. */
11363 imm
= (imm
>> info
->shift
) & 0xff;
11364 info
->value
= GEN_INT (imm
);
11372 /* Check of immediate shift constants are within range. */
11374 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
11376 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
11378 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
11380 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
11383 /* Return true if X is a uniform vector where all elements
11384 are either the floating-point constant 0.0 or the
11385 integer constant 0. */
11387 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
11389 return x
== CONST0_RTX (mode
);
11393 /* Return the bitmask CONST_INT to select the bits required by a zero extract
11394 operation of width WIDTH at bit position POS. */
11397 aarch64_mask_from_zextract_ops (rtx width
, rtx pos
)
11399 gcc_assert (CONST_INT_P (width
));
11400 gcc_assert (CONST_INT_P (pos
));
11402 unsigned HOST_WIDE_INT mask
11403 = ((unsigned HOST_WIDE_INT
) 1 << UINTVAL (width
)) - 1;
11404 return GEN_INT (mask
<< UINTVAL (pos
));
11408 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
11410 HOST_WIDE_INT imm
= INTVAL (x
);
11413 for (i
= 0; i
< 8; i
++)
11415 unsigned int byte
= imm
& 0xff;
11416 if (byte
!= 0xff && byte
!= 0)
11425 aarch64_mov_operand_p (rtx x
, machine_mode mode
)
11427 if (GET_CODE (x
) == HIGH
11428 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
11431 if (CONST_INT_P (x
))
11434 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
11437 return aarch64_classify_symbolic_expression (x
)
11438 == SYMBOL_TINY_ABSOLUTE
;
11441 /* Return a const_int vector of VAL. */
11443 aarch64_simd_gen_const_vector_dup (machine_mode mode
, HOST_WIDE_INT val
)
11445 int nunits
= GET_MODE_NUNITS (mode
);
11446 rtvec v
= rtvec_alloc (nunits
);
11449 rtx cache
= GEN_INT (val
);
11451 for (i
=0; i
< nunits
; i
++)
11452 RTVEC_ELT (v
, i
) = cache
;
11454 return gen_rtx_CONST_VECTOR (mode
, v
);
11457 /* Check OP is a legal scalar immediate for the MOVI instruction. */
11460 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
11462 machine_mode vmode
;
11464 gcc_assert (!VECTOR_MODE_P (mode
));
11465 vmode
= aarch64_preferred_simd_mode (mode
);
11466 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
11467 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
11470 /* Construct and return a PARALLEL RTX vector with elements numbering the
11471 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
11472 the vector - from the perspective of the architecture. This does not
11473 line up with GCC's perspective on lane numbers, so we end up with
11474 different masks depending on our target endian-ness. The diagram
11475 below may help. We must draw the distinction when building masks
11476 which select one half of the vector. An instruction selecting
11477 architectural low-lanes for a big-endian target, must be described using
11478 a mask selecting GCC high-lanes.
11480 Big-Endian Little-Endian
11482 GCC 0 1 2 3 3 2 1 0
11483 | x | x | x | x | | x | x | x | x |
11484 Architecture 3 2 1 0 3 2 1 0
11486 Low Mask: { 2, 3 } { 0, 1 }
11487 High Mask: { 0, 1 } { 2, 3 }
11491 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
11493 int nunits
= GET_MODE_NUNITS (mode
);
11494 rtvec v
= rtvec_alloc (nunits
/ 2);
11495 int high_base
= nunits
/ 2;
11501 if (BYTES_BIG_ENDIAN
)
11502 base
= high
? low_base
: high_base
;
11504 base
= high
? high_base
: low_base
;
11506 for (i
= 0; i
< nunits
/ 2; i
++)
11507 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
11509 t1
= gen_rtx_PARALLEL (mode
, v
);
11513 /* Check OP for validity as a PARALLEL RTX vector with elements
11514 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
11515 from the perspective of the architecture. See the diagram above
11516 aarch64_simd_vect_par_cnst_half for more details. */
11519 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
11522 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
11523 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
11524 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
11527 if (!VECTOR_MODE_P (mode
))
11530 if (count_op
!= count_ideal
)
11533 for (i
= 0; i
< count_ideal
; i
++)
11535 rtx elt_op
= XVECEXP (op
, 0, i
);
11536 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
11538 if (!CONST_INT_P (elt_op
)
11539 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
11545 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
11546 HIGH (exclusive). */
11548 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
11551 HOST_WIDE_INT lane
;
11552 gcc_assert (CONST_INT_P (operand
));
11553 lane
= INTVAL (operand
);
11555 if (lane
< low
|| lane
>= high
)
11558 error ("%Klane %wd out of range %wd - %wd", exp
, lane
, low
, high
- 1);
11560 error ("lane %wd out of range %wd - %wd", lane
, low
, high
- 1);
11564 /* Return TRUE if OP is a valid vector addressing mode. */
11566 aarch64_simd_mem_operand_p (rtx op
)
11568 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
11569 || REG_P (XEXP (op
, 0)));
11572 /* Emit a register copy from operand to operand, taking care not to
11573 early-clobber source registers in the process.
11575 COUNT is the number of components into which the copy needs to be
11578 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
11579 unsigned int count
)
11582 int rdest
= REGNO (operands
[0]);
11583 int rsrc
= REGNO (operands
[1]);
11585 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
11587 for (i
= 0; i
< count
; i
++)
11588 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
11589 gen_rtx_REG (mode
, rsrc
+ i
));
11591 for (i
= 0; i
< count
; i
++)
11592 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
11593 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
11596 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
11597 one of VSTRUCT modes: OI, CI, or XI. */
11599 aarch64_simd_attr_length_rglist (enum machine_mode mode
)
11601 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
11604 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
11605 alignment of a vector to 128 bits. */
11606 static HOST_WIDE_INT
11607 aarch64_simd_vector_alignment (const_tree type
)
11609 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
11610 return MIN (align
, 128);
11613 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
11615 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
11620 /* We guarantee alignment for vectors up to 128-bits. */
11621 if (tree_int_cst_compare (TYPE_SIZE (type
),
11622 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
11625 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
11629 /* Return true if the vector misalignment factor is supported by the
11632 aarch64_builtin_support_vector_misalignment (machine_mode mode
,
11633 const_tree type
, int misalignment
,
11636 if (TARGET_SIMD
&& STRICT_ALIGNMENT
)
11638 /* Return if movmisalign pattern is not supported for this mode. */
11639 if (optab_handler (movmisalign_optab
, mode
) == CODE_FOR_nothing
)
11642 if (misalignment
== -1)
11644 /* Misalignment factor is unknown at compile time but we know
11645 it's word aligned. */
11646 if (aarch64_simd_vector_alignment_reachable (type
, is_packed
))
11648 int element_size
= TREE_INT_CST_LOW (TYPE_SIZE (type
));
11650 if (element_size
!= 64)
11656 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
11660 /* If VALS is a vector constant that can be loaded into a register
11661 using DUP, generate instructions to do so and return an RTX to
11662 assign to the register. Otherwise return NULL_RTX. */
11664 aarch64_simd_dup_constant (rtx vals
)
11666 machine_mode mode
= GET_MODE (vals
);
11667 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11670 if (!const_vec_duplicate_p (vals
, &x
))
11673 /* We can load this constant by using DUP and a constant in a
11674 single ARM register. This will be cheaper than a vector
11676 x
= copy_to_mode_reg (inner_mode
, x
);
11677 return gen_rtx_VEC_DUPLICATE (mode
, x
);
11681 /* Generate code to load VALS, which is a PARALLEL containing only
11682 constants (for vec_init) or CONST_VECTOR, efficiently into a
11683 register. Returns an RTX to copy into the register, or NULL_RTX
11684 for a PARALLEL that can not be converted into a CONST_VECTOR. */
11686 aarch64_simd_make_constant (rtx vals
)
11688 machine_mode mode
= GET_MODE (vals
);
11690 rtx const_vec
= NULL_RTX
;
11691 int n_elts
= GET_MODE_NUNITS (mode
);
11695 if (GET_CODE (vals
) == CONST_VECTOR
)
11697 else if (GET_CODE (vals
) == PARALLEL
)
11699 /* A CONST_VECTOR must contain only CONST_INTs and
11700 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
11701 Only store valid constants in a CONST_VECTOR. */
11702 for (i
= 0; i
< n_elts
; ++i
)
11704 rtx x
= XVECEXP (vals
, 0, i
);
11705 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11708 if (n_const
== n_elts
)
11709 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
11712 gcc_unreachable ();
11714 if (const_vec
!= NULL_RTX
11715 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
11716 /* Load using MOVI/MVNI. */
11718 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
11719 /* Loaded using DUP. */
11721 else if (const_vec
!= NULL_RTX
)
11722 /* Load from constant pool. We can not take advantage of single-cycle
11723 LD1 because we need a PC-relative addressing mode. */
11726 /* A PARALLEL containing something not valid inside CONST_VECTOR.
11727 We can not construct an initializer. */
11731 /* Expand a vector initialisation sequence, such that TARGET is
11732 initialised to contain VALS. */
11735 aarch64_expand_vector_init (rtx target
, rtx vals
)
11737 machine_mode mode
= GET_MODE (target
);
11738 machine_mode inner_mode
= GET_MODE_INNER (mode
);
11739 /* The number of vector elements. */
11740 int n_elts
= GET_MODE_NUNITS (mode
);
11741 /* The number of vector elements which are not constant. */
11743 rtx any_const
= NULL_RTX
;
11744 /* The first element of vals. */
11745 rtx v0
= XVECEXP (vals
, 0, 0);
11746 bool all_same
= true;
11748 /* Count the number of variable elements to initialise. */
11749 for (int i
= 0; i
< n_elts
; ++i
)
11751 rtx x
= XVECEXP (vals
, 0, i
);
11752 if (!(CONST_INT_P (x
) || CONST_DOUBLE_P (x
)))
11757 all_same
&= rtx_equal_p (x
, v0
);
11760 /* No variable elements, hand off to aarch64_simd_make_constant which knows
11761 how best to handle this. */
11764 rtx constant
= aarch64_simd_make_constant (vals
);
11765 if (constant
!= NULL_RTX
)
11767 emit_move_insn (target
, constant
);
11772 /* Splat a single non-constant element if we can. */
11775 rtx x
= copy_to_mode_reg (inner_mode
, v0
);
11776 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
11780 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
11781 gcc_assert (icode
!= CODE_FOR_nothing
);
11783 /* If there are only variable elements, try to optimize
11784 the insertion using dup for the most common element
11785 followed by insertions. */
11787 /* The algorithm will fill matches[*][0] with the earliest matching element,
11788 and matches[X][1] with the count of duplicate elements (if X is the
11789 earliest element which has duplicates). */
11791 if (n_var
== n_elts
&& n_elts
<= 16)
11793 int matches
[16][2] = {0};
11794 for (int i
= 0; i
< n_elts
; i
++)
11796 for (int j
= 0; j
<= i
; j
++)
11798 if (rtx_equal_p (XVECEXP (vals
, 0, i
), XVECEXP (vals
, 0, j
)))
11806 int maxelement
= 0;
11808 for (int i
= 0; i
< n_elts
; i
++)
11809 if (matches
[i
][1] > maxv
)
11812 maxv
= matches
[i
][1];
11815 /* Create a duplicate of the most common element. */
11816 rtx x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, maxelement
));
11817 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
11819 /* Insert the rest. */
11820 for (int i
= 0; i
< n_elts
; i
++)
11822 rtx x
= XVECEXP (vals
, 0, i
);
11823 if (matches
[i
][0] == maxelement
)
11825 x
= copy_to_mode_reg (inner_mode
, x
);
11826 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
11831 /* Initialise a vector which is part-variable. We want to first try
11832 to build those lanes which are constant in the most efficient way we
11834 if (n_var
!= n_elts
)
11836 rtx copy
= copy_rtx (vals
);
11838 /* Load constant part of vector. We really don't care what goes into the
11839 parts we will overwrite, but we're more likely to be able to load the
11840 constant efficiently if it has fewer, larger, repeating parts
11841 (see aarch64_simd_valid_immediate). */
11842 for (int i
= 0; i
< n_elts
; i
++)
11844 rtx x
= XVECEXP (vals
, 0, i
);
11845 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11847 rtx subst
= any_const
;
11848 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
11850 /* Look in the copied vector, as more elements are const. */
11851 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
11852 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
11858 XVECEXP (copy
, 0, i
) = subst
;
11860 aarch64_expand_vector_init (target
, copy
);
11863 /* Insert the variable lanes directly. */
11864 for (int i
= 0; i
< n_elts
; i
++)
11866 rtx x
= XVECEXP (vals
, 0, i
);
11867 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
11869 x
= copy_to_mode_reg (inner_mode
, x
);
11870 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
11874 static unsigned HOST_WIDE_INT
11875 aarch64_shift_truncation_mask (machine_mode mode
)
11878 (!SHIFT_COUNT_TRUNCATED
11879 || aarch64_vector_mode_supported_p (mode
)
11880 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
11883 /* Select a format to encode pointers in exception handling data. */
11885 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
11888 switch (aarch64_cmodel
)
11890 case AARCH64_CMODEL_TINY
:
11891 case AARCH64_CMODEL_TINY_PIC
:
11892 case AARCH64_CMODEL_SMALL
:
11893 case AARCH64_CMODEL_SMALL_PIC
:
11894 case AARCH64_CMODEL_SMALL_SPIC
:
11895 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
11897 type
= DW_EH_PE_sdata4
;
11900 /* No assumptions here. 8-byte relocs required. */
11901 type
= DW_EH_PE_sdata8
;
11904 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
11907 /* The last .arch and .tune assembly strings that we printed. */
11908 static std::string aarch64_last_printed_arch_string
;
11909 static std::string aarch64_last_printed_tune_string
;
11911 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
11912 by the function fndecl. */
11915 aarch64_declare_function_name (FILE *stream
, const char* name
,
11918 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
11920 struct cl_target_option
*targ_options
;
11922 targ_options
= TREE_TARGET_OPTION (target_parts
);
11924 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
11925 gcc_assert (targ_options
);
11927 const struct processor
*this_arch
11928 = aarch64_get_arch (targ_options
->x_explicit_arch
);
11930 unsigned long isa_flags
= targ_options
->x_aarch64_isa_flags
;
11931 std::string extension
11932 = aarch64_get_extension_string_for_isa_flags (isa_flags
,
11934 /* Only update the assembler .arch string if it is distinct from the last
11935 such string we printed. */
11936 std::string to_print
= this_arch
->name
+ extension
;
11937 if (to_print
!= aarch64_last_printed_arch_string
)
11939 asm_fprintf (asm_out_file
, "\t.arch %s\n", to_print
.c_str ());
11940 aarch64_last_printed_arch_string
= to_print
;
11943 /* Print the cpu name we're tuning for in the comments, might be
11944 useful to readers of the generated asm. Do it only when it changes
11945 from function to function and verbose assembly is requested. */
11946 const struct processor
*this_tune
11947 = aarch64_get_tune_cpu (targ_options
->x_explicit_tune_core
);
11949 if (flag_debug_asm
&& aarch64_last_printed_tune_string
!= this_tune
->name
)
11951 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune %s\n",
11953 aarch64_last_printed_tune_string
= this_tune
->name
;
11956 /* Don't forget the type directive for ELF. */
11957 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
11958 ASM_OUTPUT_LABEL (stream
, name
);
11961 /* Implements TARGET_ASM_FILE_START. Output the assembly header. */
11964 aarch64_start_file (void)
11966 struct cl_target_option
*default_options
11967 = TREE_TARGET_OPTION (target_option_default_node
);
11969 const struct processor
*default_arch
11970 = aarch64_get_arch (default_options
->x_explicit_arch
);
11971 unsigned long default_isa_flags
= default_options
->x_aarch64_isa_flags
;
11972 std::string extension
11973 = aarch64_get_extension_string_for_isa_flags (default_isa_flags
,
11974 default_arch
->flags
);
11976 aarch64_last_printed_arch_string
= default_arch
->name
+ extension
;
11977 aarch64_last_printed_tune_string
= "";
11978 asm_fprintf (asm_out_file
, "\t.arch %s\n",
11979 aarch64_last_printed_arch_string
.c_str ());
11981 default_file_start ();
11984 /* Emit load exclusive. */
11987 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
11988 rtx mem
, rtx model_rtx
)
11990 rtx (*gen
) (rtx
, rtx
, rtx
);
11994 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
11995 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
11996 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
11997 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
11999 gcc_unreachable ();
12002 emit_insn (gen (rval
, mem
, model_rtx
));
12005 /* Emit store exclusive. */
12008 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
12009 rtx rval
, rtx mem
, rtx model_rtx
)
12011 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12015 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
12016 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
12017 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
12018 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
12020 gcc_unreachable ();
12023 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
12026 /* Mark the previous jump instruction as unlikely. */
12029 aarch64_emit_unlikely_jump (rtx insn
)
12031 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
12033 rtx_insn
*jump
= emit_jump_insn (insn
);
12034 add_int_reg_note (jump
, REG_BR_PROB
, very_unlikely
);
12037 /* Expand a compare and swap pattern. */
12040 aarch64_expand_compare_and_swap (rtx operands
[])
12042 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
12043 machine_mode mode
, cmp_mode
;
12044 typedef rtx (*gen_cas_fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
12047 const gen_cas_fn split_cas
[] =
12049 gen_aarch64_compare_and_swapqi
,
12050 gen_aarch64_compare_and_swaphi
,
12051 gen_aarch64_compare_and_swapsi
,
12052 gen_aarch64_compare_and_swapdi
12054 const gen_cas_fn atomic_cas
[] =
12056 gen_aarch64_compare_and_swapqi_lse
,
12057 gen_aarch64_compare_and_swaphi_lse
,
12058 gen_aarch64_compare_and_swapsi_lse
,
12059 gen_aarch64_compare_and_swapdi_lse
12062 bval
= operands
[0];
12063 rval
= operands
[1];
12065 oldval
= operands
[3];
12066 newval
= operands
[4];
12067 is_weak
= operands
[5];
12068 mod_s
= operands
[6];
12069 mod_f
= operands
[7];
12070 mode
= GET_MODE (mem
);
12073 /* Normally the succ memory model must be stronger than fail, but in the
12074 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
12075 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
12077 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
12078 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
12079 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
12085 /* For short modes, we're going to perform the comparison in SImode,
12086 so do the zero-extension now. */
12088 rval
= gen_reg_rtx (SImode
);
12089 oldval
= convert_modes (SImode
, mode
, oldval
, true);
12090 /* Fall through. */
12094 /* Force the value into a register if needed. */
12095 if (!aarch64_plus_operand (oldval
, mode
))
12096 oldval
= force_reg (cmp_mode
, oldval
);
12100 gcc_unreachable ();
12105 case QImode
: idx
= 0; break;
12106 case HImode
: idx
= 1; break;
12107 case SImode
: idx
= 2; break;
12108 case DImode
: idx
= 3; break;
12110 gcc_unreachable ();
12113 gen
= atomic_cas
[idx
];
12115 gen
= split_cas
[idx
];
12117 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
12119 if (mode
== QImode
|| mode
== HImode
)
12120 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
12122 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12123 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
12124 emit_insn (gen_rtx_SET (bval
, x
));
12127 /* Test whether the target supports using a atomic load-operate instruction.
12128 CODE is the operation and AFTER is TRUE if the data in memory after the
12129 operation should be returned and FALSE if the data before the operation
12130 should be returned. Returns FALSE if the operation isn't supported by the
12134 aarch64_atomic_ldop_supported_p (enum rtx_code code
)
12153 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
12154 sequence implementing an atomic operation. */
12157 aarch64_emit_post_barrier (enum memmodel model
)
12159 const enum memmodel base_model
= memmodel_base (model
);
12161 if (is_mm_sync (model
)
12162 && (base_model
== MEMMODEL_ACQUIRE
12163 || base_model
== MEMMODEL_ACQ_REL
12164 || base_model
== MEMMODEL_SEQ_CST
))
12166 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST
)));
12170 /* Emit an atomic compare-and-swap operation. RVAL is the destination register
12171 for the data in memory. EXPECTED is the value expected to be in memory.
12172 DESIRED is the value to store to memory. MEM is the memory location. MODEL
12173 is the memory ordering to use. */
12176 aarch64_gen_atomic_cas (rtx rval
, rtx mem
,
12177 rtx expected
, rtx desired
,
12180 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12183 mode
= GET_MODE (mem
);
12187 case QImode
: gen
= gen_aarch64_atomic_casqi
; break;
12188 case HImode
: gen
= gen_aarch64_atomic_cashi
; break;
12189 case SImode
: gen
= gen_aarch64_atomic_cassi
; break;
12190 case DImode
: gen
= gen_aarch64_atomic_casdi
; break;
12192 gcc_unreachable ();
12195 /* Move the expected value into the CAS destination register. */
12196 emit_insn (gen_rtx_SET (rval
, expected
));
12198 /* Emit the CAS. */
12199 emit_insn (gen (rval
, mem
, desired
, model
));
12201 /* Compare the expected value with the value loaded by the CAS, to establish
12202 whether the swap was made. */
12203 aarch64_gen_compare_reg (EQ
, rval
, expected
);
12206 /* Split a compare and swap pattern. */
12209 aarch64_split_compare_and_swap (rtx operands
[])
12211 rtx rval
, mem
, oldval
, newval
, scratch
;
12214 rtx_code_label
*label1
, *label2
;
12216 enum memmodel model
;
12219 rval
= operands
[0];
12221 oldval
= operands
[2];
12222 newval
= operands
[3];
12223 is_weak
= (operands
[4] != const0_rtx
);
12224 model_rtx
= operands
[5];
12225 scratch
= operands
[7];
12226 mode
= GET_MODE (mem
);
12227 model
= memmodel_from_int (INTVAL (model_rtx
));
12229 /* When OLDVAL is zero and we want the strong version we can emit a tighter
12232 LD[A]XR rval, [mem]
12234 ST[L]XR scratch, newval, [mem]
12235 CBNZ scratch, .label1
12238 bool strong_zero_p
= !is_weak
&& oldval
== const0_rtx
;
12243 label1
= gen_label_rtx ();
12244 emit_label (label1
);
12246 label2
= gen_label_rtx ();
12248 /* The initial load can be relaxed for a __sync operation since a final
12249 barrier will be emitted to stop code hoisting. */
12250 if (is_mm_sync (model
))
12251 aarch64_emit_load_exclusive (mode
, rval
, mem
,
12252 GEN_INT (MEMMODEL_RELAXED
));
12254 aarch64_emit_load_exclusive (mode
, rval
, mem
, model_rtx
);
12258 x
= gen_rtx_NE (VOIDmode
, rval
, const0_rtx
);
12259 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12260 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
12261 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12265 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
12266 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
12267 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12268 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
12269 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12272 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, model_rtx
);
12276 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
12277 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12278 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
12279 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12283 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12284 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
12285 emit_insn (gen_rtx_SET (cond
, x
));
12288 emit_label (label2
);
12289 /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL
12290 to set the condition flags. If this is not used it will be removed by
12294 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12295 x
= gen_rtx_COMPARE (CCmode
, rval
, const0_rtx
);
12296 emit_insn (gen_rtx_SET (cond
, x
));
12298 /* Emit any final barrier needed for a __sync operation. */
12299 if (is_mm_sync (model
))
12300 aarch64_emit_post_barrier (model
);
12303 /* Emit a BIC instruction. */
12306 aarch64_emit_bic (machine_mode mode
, rtx dst
, rtx s1
, rtx s2
, int shift
)
12308 rtx shift_rtx
= GEN_INT (shift
);
12309 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12313 case SImode
: gen
= gen_and_one_cmpl_lshrsi3
; break;
12314 case DImode
: gen
= gen_and_one_cmpl_lshrdi3
; break;
12316 gcc_unreachable ();
12319 emit_insn (gen (dst
, s2
, shift_rtx
, s1
));
12322 /* Emit an atomic swap. */
12325 aarch64_emit_atomic_swap (machine_mode mode
, rtx dst
, rtx value
,
12326 rtx mem
, rtx model
)
12328 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12332 case QImode
: gen
= gen_aarch64_atomic_swpqi
; break;
12333 case HImode
: gen
= gen_aarch64_atomic_swphi
; break;
12334 case SImode
: gen
= gen_aarch64_atomic_swpsi
; break;
12335 case DImode
: gen
= gen_aarch64_atomic_swpdi
; break;
12337 gcc_unreachable ();
12340 emit_insn (gen (dst
, mem
, value
, model
));
12343 /* Operations supported by aarch64_emit_atomic_load_op. */
12345 enum aarch64_atomic_load_op_code
12347 AARCH64_LDOP_PLUS
, /* A + B */
12348 AARCH64_LDOP_XOR
, /* A ^ B */
12349 AARCH64_LDOP_OR
, /* A | B */
12350 AARCH64_LDOP_BIC
/* A & ~B */
12353 /* Emit an atomic load-operate. */
12356 aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code
,
12357 machine_mode mode
, rtx dst
, rtx src
,
12358 rtx mem
, rtx model
)
12360 typedef rtx (*aarch64_atomic_load_op_fn
) (rtx
, rtx
, rtx
, rtx
);
12361 const aarch64_atomic_load_op_fn plus
[] =
12363 gen_aarch64_atomic_loadaddqi
,
12364 gen_aarch64_atomic_loadaddhi
,
12365 gen_aarch64_atomic_loadaddsi
,
12366 gen_aarch64_atomic_loadadddi
12368 const aarch64_atomic_load_op_fn eor
[] =
12370 gen_aarch64_atomic_loadeorqi
,
12371 gen_aarch64_atomic_loadeorhi
,
12372 gen_aarch64_atomic_loadeorsi
,
12373 gen_aarch64_atomic_loadeordi
12375 const aarch64_atomic_load_op_fn ior
[] =
12377 gen_aarch64_atomic_loadsetqi
,
12378 gen_aarch64_atomic_loadsethi
,
12379 gen_aarch64_atomic_loadsetsi
,
12380 gen_aarch64_atomic_loadsetdi
12382 const aarch64_atomic_load_op_fn bic
[] =
12384 gen_aarch64_atomic_loadclrqi
,
12385 gen_aarch64_atomic_loadclrhi
,
12386 gen_aarch64_atomic_loadclrsi
,
12387 gen_aarch64_atomic_loadclrdi
12389 aarch64_atomic_load_op_fn gen
;
12394 case QImode
: idx
= 0; break;
12395 case HImode
: idx
= 1; break;
12396 case SImode
: idx
= 2; break;
12397 case DImode
: idx
= 3; break;
12399 gcc_unreachable ();
12404 case AARCH64_LDOP_PLUS
: gen
= plus
[idx
]; break;
12405 case AARCH64_LDOP_XOR
: gen
= eor
[idx
]; break;
12406 case AARCH64_LDOP_OR
: gen
= ior
[idx
]; break;
12407 case AARCH64_LDOP_BIC
: gen
= bic
[idx
]; break;
12409 gcc_unreachable ();
12412 emit_insn (gen (dst
, mem
, src
, model
));
12415 /* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
12416 location to store the data read from memory. OUT_RESULT is the location to
12417 store the result of the operation. MEM is the memory location to read and
12418 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
12419 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
12423 aarch64_gen_atomic_ldop (enum rtx_code code
, rtx out_data
, rtx out_result
,
12424 rtx mem
, rtx value
, rtx model_rtx
)
12426 machine_mode mode
= GET_MODE (mem
);
12427 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
12428 const bool short_mode
= (mode
< SImode
);
12429 aarch64_atomic_load_op_code ldop_code
;
12434 out_data
= gen_lowpart (mode
, out_data
);
12437 out_result
= gen_lowpart (mode
, out_result
);
12439 /* Make sure the value is in a register, putting it into a destination
12440 register if it needs to be manipulated. */
12441 if (!register_operand (value
, mode
)
12442 || code
== AND
|| code
== MINUS
)
12444 src
= out_result
? out_result
: out_data
;
12445 emit_move_insn (src
, gen_lowpart (mode
, value
));
12449 gcc_assert (register_operand (src
, mode
));
12451 /* Preprocess the data for the operation as necessary. If the operation is
12452 a SET then emit a swap instruction and finish. */
12456 aarch64_emit_atomic_swap (mode
, out_data
, src
, mem
, model_rtx
);
12460 /* Negate the value and treat it as a PLUS. */
12464 /* Resize the value if necessary. */
12466 src
= gen_lowpart (wmode
, src
);
12468 neg_src
= gen_rtx_NEG (wmode
, src
);
12469 emit_insn (gen_rtx_SET (src
, neg_src
));
12472 src
= gen_lowpart (mode
, src
);
12474 /* Fall-through. */
12476 ldop_code
= AARCH64_LDOP_PLUS
;
12480 ldop_code
= AARCH64_LDOP_OR
;
12484 ldop_code
= AARCH64_LDOP_XOR
;
12491 /* Resize the value if necessary. */
12493 src
= gen_lowpart (wmode
, src
);
12495 not_src
= gen_rtx_NOT (wmode
, src
);
12496 emit_insn (gen_rtx_SET (src
, not_src
));
12499 src
= gen_lowpart (mode
, src
);
12501 ldop_code
= AARCH64_LDOP_BIC
;
12505 /* The operation can't be done with atomic instructions. */
12506 gcc_unreachable ();
12509 aarch64_emit_atomic_load_op (ldop_code
, mode
, out_data
, src
, mem
, model_rtx
);
12511 /* If necessary, calculate the data in memory after the update by redoing the
12512 operation from values in registers. */
12518 src
= gen_lowpart (wmode
, src
);
12519 out_data
= gen_lowpart (wmode
, out_data
);
12520 out_result
= gen_lowpart (wmode
, out_result
);
12529 x
= gen_rtx_PLUS (wmode
, out_data
, src
);
12532 x
= gen_rtx_IOR (wmode
, out_data
, src
);
12535 x
= gen_rtx_XOR (wmode
, out_data
, src
);
12538 aarch64_emit_bic (wmode
, out_result
, out_data
, src
, 0);
12541 gcc_unreachable ();
12544 emit_set_insn (out_result
, x
);
12549 /* Split an atomic operation. */
12552 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
12553 rtx value
, rtx model_rtx
, rtx cond
)
12555 machine_mode mode
= GET_MODE (mem
);
12556 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
12557 const enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
12558 const bool is_sync
= is_mm_sync (model
);
12559 rtx_code_label
*label
;
12562 /* Split the atomic operation into a sequence. */
12563 label
= gen_label_rtx ();
12564 emit_label (label
);
12567 new_out
= gen_lowpart (wmode
, new_out
);
12569 old_out
= gen_lowpart (wmode
, old_out
);
12572 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
12574 /* The initial load can be relaxed for a __sync operation since a final
12575 barrier will be emitted to stop code hoisting. */
12577 aarch64_emit_load_exclusive (mode
, old_out
, mem
,
12578 GEN_INT (MEMMODEL_RELAXED
));
12580 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
12589 x
= gen_rtx_AND (wmode
, old_out
, value
);
12590 emit_insn (gen_rtx_SET (new_out
, x
));
12591 x
= gen_rtx_NOT (wmode
, new_out
);
12592 emit_insn (gen_rtx_SET (new_out
, x
));
12596 if (CONST_INT_P (value
))
12598 value
= GEN_INT (-INTVAL (value
));
12601 /* Fall through. */
12604 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
12605 emit_insn (gen_rtx_SET (new_out
, x
));
12609 aarch64_emit_store_exclusive (mode
, cond
, mem
,
12610 gen_lowpart (mode
, new_out
), model_rtx
);
12612 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
12613 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
12614 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
12615 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
12617 /* Emit any final barrier needed for a __sync operation. */
12619 aarch64_emit_post_barrier (model
);
12623 aarch64_init_libfuncs (void)
12625 /* Half-precision float operations. The compiler handles all operations
12626 with NULL libfuncs by converting to SFmode. */
12629 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
, "__gnu_f2h_ieee");
12630 set_conv_libfunc (sext_optab
, SFmode
, HFmode
, "__gnu_h2f_ieee");
12633 set_optab_libfunc (add_optab
, HFmode
, NULL
);
12634 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
12635 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
12636 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
12637 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
12640 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
12641 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
12642 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
12643 set_optab_libfunc (le_optab
, HFmode
, NULL
);
12644 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
12645 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
12646 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
12649 /* Target hook for c_mode_for_suffix. */
12650 static machine_mode
12651 aarch64_c_mode_for_suffix (char suffix
)
12659 /* We can only represent floating point constants which will fit in
12660 "quarter-precision" values. These values are characterised by
12661 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
12664 (-1)^s * (n/16) * 2^r
12667 's' is the sign bit.
12668 'n' is an integer in the range 16 <= n <= 31.
12669 'r' is an integer in the range -3 <= r <= 4. */
12671 /* Return true iff X can be represented by a quarter-precision
12672 floating point immediate operand X. Note, we cannot represent 0.0. */
12674 aarch64_float_const_representable_p (rtx x
)
12676 /* This represents our current view of how many bits
12677 make up the mantissa. */
12678 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
12680 unsigned HOST_WIDE_INT mantissa
, mask
;
12681 REAL_VALUE_TYPE r
, m
;
12684 if (!CONST_DOUBLE_P (x
))
12687 /* We don't support HFmode constants yet. */
12688 if (GET_MODE (x
) == VOIDmode
|| GET_MODE (x
) == HFmode
)
12691 r
= *CONST_DOUBLE_REAL_VALUE (x
);
12693 /* We cannot represent infinities, NaNs or +/-zero. We won't
12694 know if we have +zero until we analyse the mantissa, but we
12695 can reject the other invalid values. */
12696 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
12697 || REAL_VALUE_MINUS_ZERO (r
))
12700 /* Extract exponent. */
12701 r
= real_value_abs (&r
);
12702 exponent
= REAL_EXP (&r
);
12704 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12705 highest (sign) bit, with a fixed binary point at bit point_pos.
12706 m1 holds the low part of the mantissa, m2 the high part.
12707 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
12708 bits for the mantissa, this can fail (low bits will be lost). */
12709 real_ldexp (&m
, &r
, point_pos
- exponent
);
12710 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
12712 /* If the low part of the mantissa has bits set we cannot represent
12714 if (w
.ulow () != 0)
12716 /* We have rejected the lower HOST_WIDE_INT, so update our
12717 understanding of how many bits lie in the mantissa and
12718 look only at the high HOST_WIDE_INT. */
12719 mantissa
= w
.elt (1);
12720 point_pos
-= HOST_BITS_PER_WIDE_INT
;
12722 /* We can only represent values with a mantissa of the form 1.xxxx. */
12723 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
12724 if ((mantissa
& mask
) != 0)
12727 /* Having filtered unrepresentable values, we may now remove all
12728 but the highest 5 bits. */
12729 mantissa
>>= point_pos
- 5;
12731 /* We cannot represent the value 0.0, so reject it. This is handled
12736 /* Then, as bit 4 is always set, we can mask it off, leaving
12737 the mantissa in the range [0, 15]. */
12738 mantissa
&= ~(1 << 4);
12739 gcc_assert (mantissa
<= 15);
12741 /* GCC internally does not use IEEE754-like encoding (where normalized
12742 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
12743 Our mantissa values are shifted 4 places to the left relative to
12744 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
12745 by 5 places to correct for GCC's representation. */
12746 exponent
= 5 - exponent
;
12748 return (exponent
>= 0 && exponent
<= 7);
12752 aarch64_output_simd_mov_immediate (rtx const_vector
,
12757 static char templ
[40];
12758 const char *mnemonic
;
12759 const char *shift_op
;
12760 unsigned int lane_count
= 0;
12763 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
12765 /* This will return true to show const_vector is legal for use as either
12766 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
12767 also update INFO to show how the immediate should be generated. */
12768 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
12769 gcc_assert (is_valid
);
12771 element_char
= sizetochar (info
.element_width
);
12772 lane_count
= width
/ info
.element_width
;
12774 mode
= GET_MODE_INNER (mode
);
12775 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
12777 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
12778 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
12779 move immediate path. */
12780 if (aarch64_float_const_zero_rtx_p (info
.value
))
12781 info
.value
= GEN_INT (0);
12784 const unsigned int buf_size
= 20;
12785 char float_buf
[buf_size
] = {'\0'};
12786 real_to_decimal_for_mode (float_buf
,
12787 CONST_DOUBLE_REAL_VALUE (info
.value
),
12788 buf_size
, buf_size
, 1, mode
);
12790 if (lane_count
== 1)
12791 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
12793 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
12794 lane_count
, element_char
, float_buf
);
12799 mnemonic
= info
.mvn
? "mvni" : "movi";
12800 shift_op
= info
.msl
? "msl" : "lsl";
12802 gcc_assert (CONST_INT_P (info
.value
));
12803 if (lane_count
== 1)
12804 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
12805 mnemonic
, UINTVAL (info
.value
));
12806 else if (info
.shift
)
12807 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
12808 ", %s %d", mnemonic
, lane_count
, element_char
,
12809 UINTVAL (info
.value
), shift_op
, info
.shift
);
12811 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
12812 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
12817 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
12820 machine_mode vmode
;
12822 gcc_assert (!VECTOR_MODE_P (mode
));
12823 vmode
= aarch64_simd_container_mode (mode
, 64);
12824 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
12825 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
12828 /* Split operands into moves from op[1] + op[2] into op[0]. */
12831 aarch64_split_combinev16qi (rtx operands
[3])
12833 unsigned int dest
= REGNO (operands
[0]);
12834 unsigned int src1
= REGNO (operands
[1]);
12835 unsigned int src2
= REGNO (operands
[2]);
12836 machine_mode halfmode
= GET_MODE (operands
[1]);
12837 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
12838 rtx destlo
, desthi
;
12840 gcc_assert (halfmode
== V16QImode
);
12842 if (src1
== dest
&& src2
== dest
+ halfregs
)
12844 /* No-op move. Can't split to nothing; emit something. */
12845 emit_note (NOTE_INSN_DELETED
);
12849 /* Preserve register attributes for variable tracking. */
12850 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
12851 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
12852 GET_MODE_SIZE (halfmode
));
12854 /* Special case of reversed high/low parts. */
12855 if (reg_overlap_mentioned_p (operands
[2], destlo
)
12856 && reg_overlap_mentioned_p (operands
[1], desthi
))
12858 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
12859 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
12860 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
12862 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
12864 /* Try to avoid unnecessary moves if part of the result
12865 is in the right place already. */
12867 emit_move_insn (destlo
, operands
[1]);
12868 if (src2
!= dest
+ halfregs
)
12869 emit_move_insn (desthi
, operands
[2]);
12873 if (src2
!= dest
+ halfregs
)
12874 emit_move_insn (desthi
, operands
[2]);
12876 emit_move_insn (destlo
, operands
[1]);
12880 /* vec_perm support. */
12882 #define MAX_VECT_LEN 16
12884 struct expand_vec_perm_d
12886 rtx target
, op0
, op1
;
12887 unsigned char perm
[MAX_VECT_LEN
];
12888 machine_mode vmode
;
12889 unsigned char nelt
;
12894 /* Generate a variable permutation. */
12897 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
12899 machine_mode vmode
= GET_MODE (target
);
12900 bool one_vector_p
= rtx_equal_p (op0
, op1
);
12902 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
12903 gcc_checking_assert (GET_MODE (op0
) == vmode
);
12904 gcc_checking_assert (GET_MODE (op1
) == vmode
);
12905 gcc_checking_assert (GET_MODE (sel
) == vmode
);
12906 gcc_checking_assert (TARGET_SIMD
);
12910 if (vmode
== V8QImode
)
12912 /* Expand the argument to a V16QI mode by duplicating it. */
12913 rtx pair
= gen_reg_rtx (V16QImode
);
12914 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
12915 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
12919 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
12926 if (vmode
== V8QImode
)
12928 pair
= gen_reg_rtx (V16QImode
);
12929 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
12930 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
12934 pair
= gen_reg_rtx (OImode
);
12935 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
12936 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
12942 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
12944 machine_mode vmode
= GET_MODE (target
);
12945 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
12946 bool one_vector_p
= rtx_equal_p (op0
, op1
);
12949 /* The TBL instruction does not use a modulo index, so we must take care
12950 of that ourselves. */
12951 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
12952 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
12953 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
12955 /* For big-endian, we also need to reverse the index within the vector
12956 (but not which vector). */
12957 if (BYTES_BIG_ENDIAN
)
12959 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
12961 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
12962 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
12963 NULL
, 0, OPTAB_LIB_WIDEN
);
12965 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
12968 /* Recognize patterns suitable for the TRN instructions. */
12970 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
12972 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
12973 rtx out
, in0
, in1
, x
;
12974 rtx (*gen
) (rtx
, rtx
, rtx
);
12975 machine_mode vmode
= d
->vmode
;
12977 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
12980 /* Note that these are little-endian tests.
12981 We correct for big-endian later. */
12982 if (d
->perm
[0] == 0)
12984 else if (d
->perm
[0] == 1)
12988 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
12990 for (i
= 0; i
< nelt
; i
+= 2)
12992 if (d
->perm
[i
] != i
+ odd
)
12994 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
13004 if (BYTES_BIG_ENDIAN
)
13006 x
= in0
, in0
= in1
, in1
= x
;
13015 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
13016 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
13017 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
13018 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
13019 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
13020 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
13021 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
13022 case V4HFmode
: gen
= gen_aarch64_trn2v4hf
; break;
13023 case V8HFmode
: gen
= gen_aarch64_trn2v8hf
; break;
13024 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
13025 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
13026 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
13035 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
13036 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
13037 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
13038 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
13039 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
13040 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
13041 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
13042 case V4HFmode
: gen
= gen_aarch64_trn1v4hf
; break;
13043 case V8HFmode
: gen
= gen_aarch64_trn1v8hf
; break;
13044 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
13045 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
13046 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
13052 emit_insn (gen (out
, in0
, in1
));
13056 /* Recognize patterns suitable for the UZP instructions. */
13058 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
13060 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
13061 rtx out
, in0
, in1
, x
;
13062 rtx (*gen
) (rtx
, rtx
, rtx
);
13063 machine_mode vmode
= d
->vmode
;
13065 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
13068 /* Note that these are little-endian tests.
13069 We correct for big-endian later. */
13070 if (d
->perm
[0] == 0)
13072 else if (d
->perm
[0] == 1)
13076 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13078 for (i
= 0; i
< nelt
; i
++)
13080 unsigned elt
= (i
* 2 + odd
) & mask
;
13081 if (d
->perm
[i
] != elt
)
13091 if (BYTES_BIG_ENDIAN
)
13093 x
= in0
, in0
= in1
, in1
= x
;
13102 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
13103 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
13104 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
13105 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
13106 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
13107 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
13108 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
13109 case V4HFmode
: gen
= gen_aarch64_uzp2v4hf
; break;
13110 case V8HFmode
: gen
= gen_aarch64_uzp2v8hf
; break;
13111 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
13112 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
13113 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
13122 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
13123 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
13124 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
13125 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
13126 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
13127 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
13128 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
13129 case V4HFmode
: gen
= gen_aarch64_uzp1v4hf
; break;
13130 case V8HFmode
: gen
= gen_aarch64_uzp1v8hf
; break;
13131 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
13132 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
13133 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
13139 emit_insn (gen (out
, in0
, in1
));
13143 /* Recognize patterns suitable for the ZIP instructions. */
13145 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
13147 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
13148 rtx out
, in0
, in1
, x
;
13149 rtx (*gen
) (rtx
, rtx
, rtx
);
13150 machine_mode vmode
= d
->vmode
;
13152 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
13155 /* Note that these are little-endian tests.
13156 We correct for big-endian later. */
13158 if (d
->perm
[0] == high
)
13161 else if (d
->perm
[0] == 0)
13165 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
13167 for (i
= 0; i
< nelt
/ 2; i
++)
13169 unsigned elt
= (i
+ high
) & mask
;
13170 if (d
->perm
[i
* 2] != elt
)
13172 elt
= (elt
+ nelt
) & mask
;
13173 if (d
->perm
[i
* 2 + 1] != elt
)
13183 if (BYTES_BIG_ENDIAN
)
13185 x
= in0
, in0
= in1
, in1
= x
;
13194 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
13195 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
13196 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
13197 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
13198 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
13199 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
13200 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
13201 case V4HFmode
: gen
= gen_aarch64_zip2v4hf
; break;
13202 case V8HFmode
: gen
= gen_aarch64_zip2v8hf
; break;
13203 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
13204 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
13205 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
13214 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
13215 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
13216 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
13217 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
13218 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
13219 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
13220 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
13221 case V4HFmode
: gen
= gen_aarch64_zip1v4hf
; break;
13222 case V8HFmode
: gen
= gen_aarch64_zip1v8hf
; break;
13223 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
13224 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
13225 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
13231 emit_insn (gen (out
, in0
, in1
));
13235 /* Recognize patterns for the EXT insn. */
13238 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
13240 unsigned int i
, nelt
= d
->nelt
;
13241 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
13244 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
13246 /* Check if the extracted indices are increasing by one. */
13247 for (i
= 1; i
< nelt
; i
++)
13249 unsigned int required
= location
+ i
;
13250 if (d
->one_vector_p
)
13252 /* We'll pass the same vector in twice, so allow indices to wrap. */
13253 required
&= (nelt
- 1);
13255 if (d
->perm
[i
] != required
)
13261 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
13262 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
13263 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
13264 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
13265 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
13266 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
13267 case V4HFmode
: gen
= gen_aarch64_extv4hf
; break;
13268 case V8HFmode
: gen
= gen_aarch64_extv8hf
; break;
13269 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
13270 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
13271 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
13272 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
13281 /* The case where (location == 0) is a no-op for both big- and little-endian,
13282 and is removed by the mid-end at optimization levels -O1 and higher. */
13284 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
13286 /* After setup, we want the high elements of the first vector (stored
13287 at the LSB end of the register), and the low elements of the second
13288 vector (stored at the MSB end of the register). So swap. */
13289 std::swap (d
->op0
, d
->op1
);
13290 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
13291 location
= nelt
- location
;
13294 offset
= GEN_INT (location
);
13295 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
13299 /* Recognize patterns for the REV insns. */
13302 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
13304 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
13305 rtx (*gen
) (rtx
, rtx
);
13307 if (!d
->one_vector_p
)
13316 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
13317 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
13325 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
13326 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
13327 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
13328 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
13336 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
13337 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
13338 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
13339 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
13340 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
13341 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
13342 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
13343 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
13344 case V8HFmode
: gen
= gen_aarch64_rev64v8hf
; break;
13345 case V4HFmode
: gen
= gen_aarch64_rev64v4hf
; break;
13354 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
13355 for (j
= 0; j
<= diff
; j
+= 1)
13357 /* This is guaranteed to be true as the value of diff
13358 is 7, 3, 1 and we should have enough elements in the
13359 queue to generate this. Getting a vector mask with a
13360 value of diff other than these values implies that
13361 something is wrong by the time we get here. */
13362 gcc_assert (i
+ j
< nelt
);
13363 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
13371 emit_insn (gen (d
->target
, d
->op0
));
13376 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
13378 rtx (*gen
) (rtx
, rtx
, rtx
);
13379 rtx out
= d
->target
;
13381 machine_mode vmode
= d
->vmode
;
13382 unsigned int i
, elt
, nelt
= d
->nelt
;
13386 for (i
= 1; i
< nelt
; i
++)
13388 if (elt
!= d
->perm
[i
])
13392 /* The generic preparation in aarch64_expand_vec_perm_const_1
13393 swaps the operand order and the permute indices if it finds
13394 d->perm[0] to be in the second operand. Thus, we can always
13395 use d->op0 and need not do any extra arithmetic to get the
13396 correct lane number. */
13398 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
13402 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
13403 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
13404 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
13405 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
13406 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
13407 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
13408 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
13409 case V8HFmode
: gen
= gen_aarch64_dup_lanev8hf
; break;
13410 case V4HFmode
: gen
= gen_aarch64_dup_lanev4hf
; break;
13411 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
13412 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
13413 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
13418 emit_insn (gen (out
, in0
, lane
));
13423 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
13425 rtx rperm
[MAX_VECT_LEN
], sel
;
13426 machine_mode vmode
= d
->vmode
;
13427 unsigned int i
, nelt
= d
->nelt
;
13432 /* Generic code will try constant permutation twice. Once with the
13433 original mode and again with the elements lowered to QImode.
13434 So wait and don't do the selector expansion ourselves. */
13435 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
13438 for (i
= 0; i
< nelt
; ++i
)
13440 int nunits
= GET_MODE_NUNITS (vmode
);
13442 /* If big-endian and two vectors we end up with a weird mixed-endian
13443 mode on NEON. Reverse the index within each word but not the word
13445 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
13448 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
13449 sel
= force_reg (vmode
, sel
);
13451 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
13456 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
13458 /* The pattern matching functions above are written to look for a small
13459 number to begin the sequence (0, 1, N/2). If we begin with an index
13460 from the second operand, we can swap the operands. */
13461 if (d
->perm
[0] >= d
->nelt
)
13463 unsigned i
, nelt
= d
->nelt
;
13465 gcc_assert (nelt
== (nelt
& -nelt
));
13466 for (i
= 0; i
< nelt
; ++i
)
13467 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
13469 std::swap (d
->op0
, d
->op1
);
13474 if (aarch64_evpc_rev (d
))
13476 else if (aarch64_evpc_ext (d
))
13478 else if (aarch64_evpc_dup (d
))
13480 else if (aarch64_evpc_zip (d
))
13482 else if (aarch64_evpc_uzp (d
))
13484 else if (aarch64_evpc_trn (d
))
13486 return aarch64_evpc_tbl (d
);
13491 /* Expand a vec_perm_const pattern. */
13494 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
13496 struct expand_vec_perm_d d
;
13497 int i
, nelt
, which
;
13503 d
.vmode
= GET_MODE (target
);
13504 gcc_assert (VECTOR_MODE_P (d
.vmode
));
13505 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
13506 d
.testing_p
= false;
13508 for (i
= which
= 0; i
< nelt
; ++i
)
13510 rtx e
= XVECEXP (sel
, 0, i
);
13511 int ei
= INTVAL (e
) & (2 * nelt
- 1);
13512 which
|= (ei
< nelt
? 1 : 2);
13519 gcc_unreachable ();
13522 d
.one_vector_p
= false;
13523 if (!rtx_equal_p (op0
, op1
))
13526 /* The elements of PERM do not suggest that only the first operand
13527 is used, but both operands are identical. Allow easier matching
13528 of the permutation by folding the permutation into the single
13530 /* Fall Through. */
13532 for (i
= 0; i
< nelt
; ++i
)
13533 d
.perm
[i
] &= nelt
- 1;
13535 d
.one_vector_p
= true;
13540 d
.one_vector_p
= true;
13544 return aarch64_expand_vec_perm_const_1 (&d
);
13548 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
13549 const unsigned char *sel
)
13551 struct expand_vec_perm_d d
;
13552 unsigned int i
, nelt
, which
;
13556 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
13557 d
.testing_p
= true;
13558 memcpy (d
.perm
, sel
, nelt
);
13560 /* Calculate whether all elements are in one vector. */
13561 for (i
= which
= 0; i
< nelt
; ++i
)
13563 unsigned char e
= d
.perm
[i
];
13564 gcc_assert (e
< 2 * nelt
);
13565 which
|= (e
< nelt
? 1 : 2);
13568 /* If all elements are from the second vector, reindex as if from the
13571 for (i
= 0; i
< nelt
; ++i
)
13574 /* Check whether the mask can be applied to a single vector. */
13575 d
.one_vector_p
= (which
!= 3);
13577 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
13578 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
13579 if (!d
.one_vector_p
)
13580 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
13583 ret
= aarch64_expand_vec_perm_const_1 (&d
);
13590 aarch64_reverse_mask (enum machine_mode mode
)
13592 /* We have to reverse each vector because we dont have
13593 a permuted load that can reverse-load according to ABI rules. */
13595 rtvec v
= rtvec_alloc (16);
13597 int nunits
= GET_MODE_NUNITS (mode
);
13598 int usize
= GET_MODE_UNIT_SIZE (mode
);
13600 gcc_assert (BYTES_BIG_ENDIAN
);
13601 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
13603 for (i
= 0; i
< nunits
; i
++)
13604 for (j
= 0; j
< usize
; j
++)
13605 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
13606 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
13607 return force_reg (V16QImode
, mask
);
13610 /* Implement MODES_TIEABLE_P. In principle we should always return true.
13611 However due to issues with register allocation it is preferable to avoid
13612 tieing integer scalar and FP scalar modes. Executing integer operations
13613 in general registers is better than treating them as scalar vector
13614 operations. This reduces latency and avoids redundant int<->FP moves.
13615 So tie modes if they are either the same class, or vector modes with
13616 other vector modes, vector structs or any scalar mode.
13620 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
13622 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
13625 /* We specifically want to allow elements of "structure" modes to
13626 be tieable to the structure. This more general condition allows
13627 other rarer situations too. */
13628 if (aarch64_vector_mode_p (mode1
) && aarch64_vector_mode_p (mode2
))
13631 /* Also allow any scalar modes with vectors. */
13632 if (aarch64_vector_mode_supported_p (mode1
)
13633 || aarch64_vector_mode_supported_p (mode2
))
13639 /* Return a new RTX holding the result of moving POINTER forward by
13643 aarch64_move_pointer (rtx pointer
, int amount
)
13645 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
13647 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
13651 /* Return a new RTX holding the result of moving POINTER forward by the
13652 size of the mode it points to. */
13655 aarch64_progress_pointer (rtx pointer
)
13657 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
13659 return aarch64_move_pointer (pointer
, amount
);
13662 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
13666 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
13669 rtx reg
= gen_reg_rtx (mode
);
13671 /* "Cast" the pointers to the correct mode. */
13672 *src
= adjust_address (*src
, mode
, 0);
13673 *dst
= adjust_address (*dst
, mode
, 0);
13674 /* Emit the memcpy. */
13675 emit_move_insn (reg
, *src
);
13676 emit_move_insn (*dst
, reg
);
13677 /* Move the pointers forward. */
13678 *src
= aarch64_progress_pointer (*src
);
13679 *dst
= aarch64_progress_pointer (*dst
);
13682 /* Expand movmem, as if from a __builtin_memcpy. Return true if
13683 we succeed, otherwise return false. */
13686 aarch64_expand_movmem (rtx
*operands
)
13689 rtx dst
= operands
[0];
13690 rtx src
= operands
[1];
13692 bool speed_p
= !optimize_function_for_size_p (cfun
);
13694 /* When optimizing for size, give a better estimate of the length of a
13695 memcpy call, but use the default otherwise. */
13696 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
13698 /* We can't do anything smart if the amount to copy is not constant. */
13699 if (!CONST_INT_P (operands
[2]))
13702 n
= UINTVAL (operands
[2]);
13704 /* Try to keep the number of instructions low. For cases below 16 bytes we
13705 need to make at most two moves. For cases above 16 bytes it will be one
13706 move for each 16 byte chunk, then at most two additional moves. */
13707 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
13710 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
13711 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
13713 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
13714 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
13716 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
13722 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
13727 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
13732 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
13733 4-byte chunk, partially overlapping with the previously copied chunk. */
13736 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13742 src
= aarch64_move_pointer (src
, move
);
13743 dst
= aarch64_move_pointer (dst
, move
);
13744 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13749 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
13750 them, then (if applicable) an 8-byte chunk. */
13755 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
13760 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
13765 /* Finish the final bytes of the copy. We can always do this in one
13766 instruction. We either copy the exact amount we need, or partially
13767 overlap with the previous chunk we copied and copy 8-bytes. */
13771 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
13773 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
13775 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13780 src
= aarch64_move_pointer (src
, -1);
13781 dst
= aarch64_move_pointer (dst
, -1);
13782 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
13788 src
= aarch64_move_pointer (src
, move
);
13789 dst
= aarch64_move_pointer (dst
, move
);
13790 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
13797 /* Split a DImode store of a CONST_INT SRC to MEM DST as two
13798 SImode stores. Handle the case when the constant has identical
13799 bottom and top halves. This is beneficial when the two stores can be
13800 merged into an STP and we avoid synthesising potentially expensive
13801 immediates twice. Return true if such a split is possible. */
13804 aarch64_split_dimode_const_store (rtx dst
, rtx src
)
13806 rtx lo
= gen_lowpart (SImode
, src
);
13807 rtx hi
= gen_highpart_mode (SImode
, DImode
, src
);
13809 bool size_p
= optimize_function_for_size_p (cfun
);
13811 if (!rtx_equal_p (lo
, hi
))
13814 unsigned int orig_cost
13815 = aarch64_internal_mov_immediate (NULL_RTX
, src
, false, DImode
);
13816 unsigned int lo_cost
13817 = aarch64_internal_mov_immediate (NULL_RTX
, lo
, false, SImode
);
13819 /* We want to transform:
13821 MOVK x1, 0x140, lsl 16
13822 MOVK x1, 0xc0da, lsl 32
13823 MOVK x1, 0x140, lsl 48
13827 MOVK w1, 0x140, lsl 16
13829 So we want to perform this only when we save two instructions
13830 or more. When optimizing for size, however, accept any code size
13832 if (size_p
&& orig_cost
<= lo_cost
)
13836 && (orig_cost
<= lo_cost
+ 1))
13839 rtx mem_lo
= adjust_address (dst
, SImode
, 0);
13840 if (!aarch64_mem_pair_operand (mem_lo
, SImode
))
13843 rtx tmp_reg
= gen_reg_rtx (SImode
);
13844 aarch64_expand_mov_immediate (tmp_reg
, lo
);
13845 rtx mem_hi
= aarch64_move_pointer (mem_lo
, GET_MODE_SIZE (SImode
));
13846 /* Don't emit an explicit store pair as this may not be always profitable.
13847 Let the sched-fusion logic decide whether to merge them. */
13848 emit_move_insn (mem_lo
, tmp_reg
);
13849 emit_move_insn (mem_hi
, tmp_reg
);
13854 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
13856 static unsigned HOST_WIDE_INT
13857 aarch64_asan_shadow_offset (void)
13859 return (HOST_WIDE_INT_1
<< 36);
13863 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
13864 unsigned int align
,
13865 enum by_pieces_operation op
,
13868 /* STORE_BY_PIECES can be used when copying a constant string, but
13869 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
13870 For now we always fail this and let the move_by_pieces code copy
13871 the string from read-only memory. */
13872 if (op
== STORE_BY_PIECES
)
13875 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
13879 aarch64_gen_ccmp_first (rtx_insn
**prep_seq
, rtx_insn
**gen_seq
,
13880 int code
, tree treeop0
, tree treeop1
)
13882 machine_mode op_mode
, cmp_mode
, cc_mode
= CCmode
;
13884 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
13886 struct expand_operand ops
[4];
13889 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
13891 op_mode
= GET_MODE (op0
);
13892 if (op_mode
== VOIDmode
)
13893 op_mode
= GET_MODE (op1
);
13901 icode
= CODE_FOR_cmpsi
;
13906 icode
= CODE_FOR_cmpdi
;
13911 cc_mode
= aarch64_select_cc_mode ((rtx_code
) code
, op0
, op1
);
13912 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fcmpesf
: CODE_FOR_fcmpsf
;
13917 cc_mode
= aarch64_select_cc_mode ((rtx_code
) code
, op0
, op1
);
13918 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fcmpedf
: CODE_FOR_fcmpdf
;
13926 op0
= prepare_operand (icode
, op0
, 0, op_mode
, cmp_mode
, unsignedp
);
13927 op1
= prepare_operand (icode
, op1
, 1, op_mode
, cmp_mode
, unsignedp
);
13933 *prep_seq
= get_insns ();
13936 create_fixed_operand (&ops
[0], op0
);
13937 create_fixed_operand (&ops
[1], op1
);
13940 if (!maybe_expand_insn (icode
, 2, ops
))
13945 *gen_seq
= get_insns ();
13948 return gen_rtx_fmt_ee ((rtx_code
) code
, cc_mode
,
13949 gen_rtx_REG (cc_mode
, CC_REGNUM
), const0_rtx
);
13953 aarch64_gen_ccmp_next (rtx_insn
**prep_seq
, rtx_insn
**gen_seq
, rtx prev
,
13954 int cmp_code
, tree treeop0
, tree treeop1
, int bit_code
)
13956 rtx op0
, op1
, target
;
13957 machine_mode op_mode
, cmp_mode
, cc_mode
= CCmode
;
13958 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
13960 struct expand_operand ops
[6];
13963 push_to_sequence (*prep_seq
);
13964 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
13966 op_mode
= GET_MODE (op0
);
13967 if (op_mode
== VOIDmode
)
13968 op_mode
= GET_MODE (op1
);
13976 icode
= CODE_FOR_ccmpsi
;
13981 icode
= CODE_FOR_ccmpdi
;
13986 cc_mode
= aarch64_select_cc_mode ((rtx_code
) cmp_code
, op0
, op1
);
13987 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fccmpesf
: CODE_FOR_fccmpsf
;
13992 cc_mode
= aarch64_select_cc_mode ((rtx_code
) cmp_code
, op0
, op1
);
13993 icode
= cc_mode
== CCFPEmode
? CODE_FOR_fccmpedf
: CODE_FOR_fccmpdf
;
14001 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
14002 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
14008 *prep_seq
= get_insns ();
14011 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
14012 aarch64_cond
= aarch64_get_condition_code_1 (cc_mode
, (rtx_code
) cmp_code
);
14014 if (bit_code
!= AND
)
14016 prev
= gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (prev
),
14017 GET_MODE (XEXP (prev
, 0))),
14018 VOIDmode
, XEXP (prev
, 0), const0_rtx
);
14019 aarch64_cond
= AARCH64_INVERSE_CONDITION_CODE (aarch64_cond
);
14022 create_fixed_operand (&ops
[0], XEXP (prev
, 0));
14023 create_fixed_operand (&ops
[1], target
);
14024 create_fixed_operand (&ops
[2], op0
);
14025 create_fixed_operand (&ops
[3], op1
);
14026 create_fixed_operand (&ops
[4], prev
);
14027 create_fixed_operand (&ops
[5], GEN_INT (aarch64_cond
));
14029 push_to_sequence (*gen_seq
);
14030 if (!maybe_expand_insn (icode
, 6, ops
))
14036 *gen_seq
= get_insns ();
14039 return gen_rtx_fmt_ee ((rtx_code
) cmp_code
, VOIDmode
, target
, const0_rtx
);
14042 #undef TARGET_GEN_CCMP_FIRST
14043 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
14045 #undef TARGET_GEN_CCMP_NEXT
14046 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
14048 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
14049 instruction fusion of some sort. */
14052 aarch64_macro_fusion_p (void)
14054 return aarch64_tune_params
.fusible_ops
!= AARCH64_FUSE_NOTHING
;
14058 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
14059 should be kept together during scheduling. */
14062 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
14065 rtx prev_set
= single_set (prev
);
14066 rtx curr_set
= single_set (curr
);
14067 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
14068 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
14070 if (!aarch64_macro_fusion_p ())
14073 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_MOV_MOVK
))
14075 /* We are trying to match:
14076 prev (mov) == (set (reg r0) (const_int imm16))
14077 curr (movk) == (set (zero_extract (reg r0)
14080 (const_int imm16_1)) */
14082 set_dest
= SET_DEST (curr_set
);
14084 if (GET_CODE (set_dest
) == ZERO_EXTRACT
14085 && CONST_INT_P (SET_SRC (curr_set
))
14086 && CONST_INT_P (SET_SRC (prev_set
))
14087 && CONST_INT_P (XEXP (set_dest
, 2))
14088 && INTVAL (XEXP (set_dest
, 2)) == 16
14089 && REG_P (XEXP (set_dest
, 0))
14090 && REG_P (SET_DEST (prev_set
))
14091 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
14097 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_ADD
))
14100 /* We're trying to match:
14101 prev (adrp) == (set (reg r1)
14102 (high (symbol_ref ("SYM"))))
14103 curr (add) == (set (reg r0)
14105 (symbol_ref ("SYM"))))
14106 Note that r0 need not necessarily be the same as r1, especially
14107 during pre-regalloc scheduling. */
14109 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
14110 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
14112 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
14113 && REG_P (XEXP (SET_SRC (curr_set
), 0))
14114 && REGNO (XEXP (SET_SRC (curr_set
), 0))
14115 == REGNO (SET_DEST (prev_set
))
14116 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
14117 XEXP (SET_SRC (curr_set
), 1)))
14122 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_MOVK_MOVK
))
14125 /* We're trying to match:
14126 prev (movk) == (set (zero_extract (reg r0)
14129 (const_int imm16_1))
14130 curr (movk) == (set (zero_extract (reg r0)
14133 (const_int imm16_2)) */
14135 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
14136 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
14137 && REG_P (XEXP (SET_DEST (prev_set
), 0))
14138 && REG_P (XEXP (SET_DEST (curr_set
), 0))
14139 && REGNO (XEXP (SET_DEST (prev_set
), 0))
14140 == REGNO (XEXP (SET_DEST (curr_set
), 0))
14141 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
14142 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
14143 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
14144 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
14145 && CONST_INT_P (SET_SRC (prev_set
))
14146 && CONST_INT_P (SET_SRC (curr_set
)))
14150 if (simple_sets_p
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ADRP_LDR
))
14152 /* We're trying to match:
14153 prev (adrp) == (set (reg r0)
14154 (high (symbol_ref ("SYM"))))
14155 curr (ldr) == (set (reg r1)
14156 (mem (lo_sum (reg r0)
14157 (symbol_ref ("SYM")))))
14159 curr (ldr) == (set (reg r1)
14162 (symbol_ref ("SYM")))))) */
14163 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
14164 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
14166 rtx curr_src
= SET_SRC (curr_set
);
14168 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
14169 curr_src
= XEXP (curr_src
, 0);
14171 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
14172 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
14173 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
14174 == REGNO (SET_DEST (prev_set
))
14175 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
14176 XEXP (SET_SRC (prev_set
), 0)))
14181 if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC
)
14182 && aarch_crypto_can_dual_issue (prev
, curr
))
14185 if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH
)
14186 && any_condjump_p (curr
))
14188 enum attr_type prev_type
= get_attr_type (prev
);
14190 /* FIXME: this misses some which is considered simple arthematic
14191 instructions for ThunderX. Simple shifts are missed here. */
14192 if (prev_type
== TYPE_ALUS_SREG
14193 || prev_type
== TYPE_ALUS_IMM
14194 || prev_type
== TYPE_LOGICS_REG
14195 || prev_type
== TYPE_LOGICS_IMM
)
14202 /* Return true iff the instruction fusion described by OP is enabled. */
14205 aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op
)
14207 return (aarch64_tune_params
.fusible_ops
& op
) != 0;
14210 /* If MEM is in the form of [base+offset], extract the two parts
14211 of address and set to BASE and OFFSET, otherwise return false
14212 after clearing BASE and OFFSET. */
14215 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
14219 gcc_assert (MEM_P (mem
));
14221 addr
= XEXP (mem
, 0);
14226 *offset
= const0_rtx
;
14230 if (GET_CODE (addr
) == PLUS
14231 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
14233 *base
= XEXP (addr
, 0);
14234 *offset
= XEXP (addr
, 1);
14239 *offset
= NULL_RTX
;
14244 /* Types for scheduling fusion. */
14245 enum sched_fusion_type
14247 SCHED_FUSION_NONE
= 0,
14248 SCHED_FUSION_LD_SIGN_EXTEND
,
14249 SCHED_FUSION_LD_ZERO_EXTEND
,
14255 /* If INSN is a load or store of address in the form of [base+offset],
14256 extract the two parts and set to BASE and OFFSET. Return scheduling
14257 fusion type this INSN is. */
14259 static enum sched_fusion_type
14260 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
14263 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
14265 gcc_assert (INSN_P (insn
));
14266 x
= PATTERN (insn
);
14267 if (GET_CODE (x
) != SET
)
14268 return SCHED_FUSION_NONE
;
14271 dest
= SET_DEST (x
);
14273 machine_mode dest_mode
= GET_MODE (dest
);
14275 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode
))
14276 return SCHED_FUSION_NONE
;
14278 if (GET_CODE (src
) == SIGN_EXTEND
)
14280 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
14281 src
= XEXP (src
, 0);
14282 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
14283 return SCHED_FUSION_NONE
;
14285 else if (GET_CODE (src
) == ZERO_EXTEND
)
14287 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
14288 src
= XEXP (src
, 0);
14289 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
14290 return SCHED_FUSION_NONE
;
14293 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
14294 extract_base_offset_in_addr (src
, base
, offset
);
14295 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
14297 fusion
= SCHED_FUSION_ST
;
14298 extract_base_offset_in_addr (dest
, base
, offset
);
14301 return SCHED_FUSION_NONE
;
14303 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
14304 fusion
= SCHED_FUSION_NONE
;
14309 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
14311 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
14312 and PRI are only calculated for these instructions. For other instruction,
14313 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
14314 type instruction fusion can be added by returning different priorities.
14316 It's important that irrelevant instructions get the largest FUSION_PRI. */
14319 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
14320 int *fusion_pri
, int *pri
)
14324 enum sched_fusion_type fusion
;
14326 gcc_assert (INSN_P (insn
));
14329 fusion
= fusion_load_store (insn
, &base
, &offset
);
14330 if (fusion
== SCHED_FUSION_NONE
)
14337 /* Set FUSION_PRI according to fusion type and base register. */
14338 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
14340 /* Calculate PRI. */
14343 /* INSN with smaller offset goes first. */
14344 off_val
= (int)(INTVAL (offset
));
14346 tmp
-= (off_val
& 0xfffff);
14348 tmp
+= ((- off_val
) & 0xfffff);
14354 /* Implement the TARGET_SCHED_ADJUST_PRIORITY hook.
14355 Adjust priority of sha1h instructions so they are scheduled before
14356 other SHA1 instructions. */
14359 aarch64_sched_adjust_priority (rtx_insn
*insn
, int priority
)
14361 rtx x
= PATTERN (insn
);
14363 if (GET_CODE (x
) == SET
)
14367 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SHA1H
)
14368 return priority
+ 10;
14374 /* Given OPERANDS of consecutive load/store, check if we can merge
14375 them into ldp/stp. LOAD is true if they are load instructions.
14376 MODE is the mode of memory operands. */
14379 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
14380 enum machine_mode mode
)
14382 HOST_WIDE_INT offval_1
, offval_2
, msize
;
14383 enum reg_class rclass_1
, rclass_2
;
14384 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
14388 mem_1
= operands
[1];
14389 mem_2
= operands
[3];
14390 reg_1
= operands
[0];
14391 reg_2
= operands
[2];
14392 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
14393 if (REGNO (reg_1
) == REGNO (reg_2
))
14398 mem_1
= operands
[0];
14399 mem_2
= operands
[2];
14400 reg_1
= operands
[1];
14401 reg_2
= operands
[3];
14404 /* The mems cannot be volatile. */
14405 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
14408 /* If we have SImode and slow unaligned ldp,
14409 check the alignment to be at least 8 byte. */
14411 && (aarch64_tune_params
.extra_tuning_flags
14412 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
)
14414 && MEM_ALIGN (mem_1
) < 8 * BITS_PER_UNIT
)
14417 /* Check if the addresses are in the form of [base+offset]. */
14418 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
14419 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
14421 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
14422 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
14425 /* Check if the bases are same. */
14426 if (!rtx_equal_p (base_1
, base_2
))
14429 offval_1
= INTVAL (offset_1
);
14430 offval_2
= INTVAL (offset_2
);
14431 msize
= GET_MODE_SIZE (mode
);
14432 /* Check if the offsets are consecutive. */
14433 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
14436 /* Check if the addresses are clobbered by load. */
14439 if (reg_mentioned_p (reg_1
, mem_1
))
14442 /* In increasing order, the last load can clobber the address. */
14443 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
14447 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
14448 rclass_1
= FP_REGS
;
14450 rclass_1
= GENERAL_REGS
;
14452 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
14453 rclass_2
= FP_REGS
;
14455 rclass_2
= GENERAL_REGS
;
14457 /* Check if the registers are of same class. */
14458 if (rclass_1
!= rclass_2
)
14464 /* Given OPERANDS of consecutive load/store, check if we can merge
14465 them into ldp/stp by adjusting the offset. LOAD is true if they
14466 are load instructions. MODE is the mode of memory operands.
14468 Given below consecutive stores:
14470 str w1, [xb, 0x100]
14471 str w1, [xb, 0x104]
14472 str w1, [xb, 0x108]
14473 str w1, [xb, 0x10c]
14475 Though the offsets are out of the range supported by stp, we can
14476 still pair them after adjusting the offset, like:
14478 add scratch, xb, 0x100
14479 stp w1, w1, [scratch]
14480 stp w1, w1, [scratch, 0x8]
14482 The peephole patterns detecting this opportunity should guarantee
14483 the scratch register is avaliable. */
14486 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
14487 enum machine_mode mode
)
14489 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
14490 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
14491 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
14492 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
14496 reg_1
= operands
[0];
14497 mem_1
= operands
[1];
14498 reg_2
= operands
[2];
14499 mem_2
= operands
[3];
14500 reg_3
= operands
[4];
14501 mem_3
= operands
[5];
14502 reg_4
= operands
[6];
14503 mem_4
= operands
[7];
14504 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
14505 && REG_P (reg_3
) && REG_P (reg_4
));
14506 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
14511 mem_1
= operands
[0];
14512 reg_1
= operands
[1];
14513 mem_2
= operands
[2];
14514 reg_2
= operands
[3];
14515 mem_3
= operands
[4];
14516 reg_3
= operands
[5];
14517 mem_4
= operands
[6];
14518 reg_4
= operands
[7];
14520 /* Skip if memory operand is by itslef valid for ldp/stp. */
14521 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
14524 /* The mems cannot be volatile. */
14525 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
14526 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
14529 /* Check if the addresses are in the form of [base+offset]. */
14530 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
14531 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
14533 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
14534 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
14536 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
14537 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
14539 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
14540 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
14543 /* Check if the bases are same. */
14544 if (!rtx_equal_p (base_1
, base_2
)
14545 || !rtx_equal_p (base_2
, base_3
)
14546 || !rtx_equal_p (base_3
, base_4
))
14549 offval_1
= INTVAL (offset_1
);
14550 offval_2
= INTVAL (offset_2
);
14551 offval_3
= INTVAL (offset_3
);
14552 offval_4
= INTVAL (offset_4
);
14553 msize
= GET_MODE_SIZE (mode
);
14554 /* Check if the offsets are consecutive. */
14555 if ((offval_1
!= (offval_2
+ msize
)
14556 || offval_1
!= (offval_3
+ msize
* 2)
14557 || offval_1
!= (offval_4
+ msize
* 3))
14558 && (offval_4
!= (offval_3
+ msize
)
14559 || offval_4
!= (offval_2
+ msize
* 2)
14560 || offval_4
!= (offval_1
+ msize
* 3)))
14563 /* Check if the addresses are clobbered by load. */
14566 if (reg_mentioned_p (reg_1
, mem_1
)
14567 || reg_mentioned_p (reg_2
, mem_2
)
14568 || reg_mentioned_p (reg_3
, mem_3
))
14571 /* In increasing order, the last load can clobber the address. */
14572 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
14576 /* If we have SImode and slow unaligned ldp,
14577 check the alignment to be at least 8 byte. */
14579 && (aarch64_tune_params
.extra_tuning_flags
14580 & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
)
14582 && MEM_ALIGN (mem_1
) < 8 * BITS_PER_UNIT
)
14585 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
14586 rclass_1
= FP_REGS
;
14588 rclass_1
= GENERAL_REGS
;
14590 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
14591 rclass_2
= FP_REGS
;
14593 rclass_2
= GENERAL_REGS
;
14595 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
14596 rclass_3
= FP_REGS
;
14598 rclass_3
= GENERAL_REGS
;
14600 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
14601 rclass_4
= FP_REGS
;
14603 rclass_4
= GENERAL_REGS
;
14605 /* Check if the registers are of same class. */
14606 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
14612 /* Given OPERANDS of consecutive load/store, this function pairs them
14613 into ldp/stp after adjusting the offset. It depends on the fact
14614 that addresses of load/store instructions are in increasing order.
14615 MODE is the mode of memory operands. CODE is the rtl operator
14616 which should be applied to all memory operands, it's SIGN_EXTEND,
14617 ZERO_EXTEND or UNKNOWN. */
14620 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
14621 enum machine_mode mode
, RTX_CODE code
)
14623 rtx base
, offset
, t1
, t2
;
14624 rtx mem_1
, mem_2
, mem_3
, mem_4
;
14625 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
14629 mem_1
= operands
[1];
14630 mem_2
= operands
[3];
14631 mem_3
= operands
[5];
14632 mem_4
= operands
[7];
14636 mem_1
= operands
[0];
14637 mem_2
= operands
[2];
14638 mem_3
= operands
[4];
14639 mem_4
= operands
[6];
14640 gcc_assert (code
== UNKNOWN
);
14643 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
14644 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
14646 /* Adjust offset thus it can fit in ldp/stp instruction. */
14647 msize
= GET_MODE_SIZE (mode
);
14648 stp_off_limit
= msize
* 0x40;
14649 off_val
= INTVAL (offset
);
14650 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
14651 new_off
= abs_off
% stp_off_limit
;
14652 adj_off
= abs_off
- new_off
;
14654 /* Further adjust to make sure all offsets are OK. */
14655 if ((new_off
+ msize
* 2) >= stp_off_limit
)
14657 adj_off
+= stp_off_limit
;
14658 new_off
-= stp_off_limit
;
14661 /* Make sure the adjustment can be done with ADD/SUB instructions. */
14662 if (adj_off
>= 0x1000)
14667 adj_off
= -adj_off
;
14668 new_off
= -new_off
;
14671 /* Create new memory references. */
14672 mem_1
= change_address (mem_1
, VOIDmode
,
14673 plus_constant (DImode
, operands
[8], new_off
));
14675 /* Check if the adjusted address is OK for ldp/stp. */
14676 if (!aarch64_mem_pair_operand (mem_1
, mode
))
14679 msize
= GET_MODE_SIZE (mode
);
14680 mem_2
= change_address (mem_2
, VOIDmode
,
14681 plus_constant (DImode
,
14684 mem_3
= change_address (mem_3
, VOIDmode
,
14685 plus_constant (DImode
,
14687 new_off
+ msize
* 2));
14688 mem_4
= change_address (mem_4
, VOIDmode
,
14689 plus_constant (DImode
,
14691 new_off
+ msize
* 3));
14693 if (code
== ZERO_EXTEND
)
14695 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
14696 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
14697 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
14698 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
14700 else if (code
== SIGN_EXTEND
)
14702 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
14703 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
14704 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
14705 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
14710 operands
[1] = mem_1
;
14711 operands
[3] = mem_2
;
14712 operands
[5] = mem_3
;
14713 operands
[7] = mem_4
;
14717 operands
[0] = mem_1
;
14718 operands
[2] = mem_2
;
14719 operands
[4] = mem_3
;
14720 operands
[6] = mem_4
;
14723 /* Emit adjusting instruction. */
14724 emit_insn (gen_rtx_SET (operands
[8], plus_constant (DImode
, base
, adj_off
)));
14725 /* Emit ldp/stp instructions. */
14726 t1
= gen_rtx_SET (operands
[0], operands
[1]);
14727 t2
= gen_rtx_SET (operands
[2], operands
[3]);
14728 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
14729 t1
= gen_rtx_SET (operands
[4], operands
[5]);
14730 t2
= gen_rtx_SET (operands
[6], operands
[7]);
14731 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
14735 /* Return 1 if pseudo register should be created and used to hold
14736 GOT address for PIC code. */
14739 aarch64_use_pseudo_pic_reg (void)
14741 return aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
;
14744 /* Implement TARGET_UNSPEC_MAY_TRAP_P. */
14747 aarch64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
14749 switch (XINT (x
, 1))
14751 case UNSPEC_GOTSMALLPIC
:
14752 case UNSPEC_GOTSMALLPIC28K
:
14753 case UNSPEC_GOTTINYPIC
:
14759 return default_unspec_may_trap_p (x
, flags
);
14763 /* If X is a positive CONST_DOUBLE with a value that is a power of 2
14764 return the log2 of that value. Otherwise return -1. */
14767 aarch64_fpconst_pow_of_2 (rtx x
)
14769 const REAL_VALUE_TYPE
*r
;
14771 if (!CONST_DOUBLE_P (x
))
14774 r
= CONST_DOUBLE_REAL_VALUE (x
);
14776 if (REAL_VALUE_NEGATIVE (*r
)
14777 || REAL_VALUE_ISNAN (*r
)
14778 || REAL_VALUE_ISINF (*r
)
14779 || !real_isinteger (r
, DFmode
))
14782 return exact_log2 (real_to_integer (r
));
14785 /* If X is a vector of equal CONST_DOUBLE values and that value is
14786 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
14789 aarch64_vec_fpconst_pow_of_2 (rtx x
)
14791 if (GET_CODE (x
) != CONST_VECTOR
)
14794 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
14797 int firstval
= aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, 0));
14801 for (int i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
14802 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, i
)) != firstval
)
14808 /* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
14811 __fp16 always promotes through this hook.
14812 _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
14813 through the generic excess precision logic rather than here. */
14816 aarch64_promoted_type (const_tree t
)
14818 if (SCALAR_FLOAT_TYPE_P (t
)
14819 && TYPE_MAIN_VARIANT (t
) == aarch64_fp16_type_node
)
14820 return float_type_node
;
14825 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
14828 aarch64_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
14829 optimization_type opt_type
)
14834 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p (mode1
);
14841 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
14842 if MODE is HFmode, and punt to the generic implementation otherwise. */
14845 aarch64_libgcc_floating_mode_supported_p (machine_mode mode
)
14847 return (mode
== HFmode
14849 : default_libgcc_floating_mode_supported_p (mode
));
14852 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
14853 if MODE is HFmode, and punt to the generic implementation otherwise. */
14856 aarch64_scalar_mode_supported_p (machine_mode mode
)
14858 return (mode
== HFmode
14860 : default_scalar_mode_supported_p (mode
));
14863 /* Set the value of FLT_EVAL_METHOD.
14864 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
14866 0: evaluate all operations and constants, whose semantic type has at
14867 most the range and precision of type float, to the range and
14868 precision of float; evaluate all other operations and constants to
14869 the range and precision of the semantic type;
14871 N, where _FloatN is a supported interchange floating type
14872 evaluate all operations and constants, whose semantic type has at
14873 most the range and precision of _FloatN type, to the range and
14874 precision of the _FloatN type; evaluate all other operations and
14875 constants to the range and precision of the semantic type;
14877 If we have the ARMv8.2-A extensions then we support _Float16 in native
14878 precision, so we should set this to 16. Otherwise, we support the type,
14879 but want to evaluate expressions in float precision, so set this to
14882 static enum flt_eval_method
14883 aarch64_excess_precision (enum excess_precision_type type
)
14887 case EXCESS_PRECISION_TYPE_FAST
:
14888 case EXCESS_PRECISION_TYPE_STANDARD
:
14889 /* We can calculate either in 16-bit range and precision or
14890 32-bit range and precision. Make that decision based on whether
14891 we have native support for the ARMv8.2-A 16-bit floating-point
14892 instructions or not. */
14893 return (TARGET_FP_F16INST
14894 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
14895 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
);
14896 case EXCESS_PRECISION_TYPE_IMPLICIT
:
14897 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
;
14899 gcc_unreachable ();
14901 return FLT_EVAL_METHOD_UNPREDICTABLE
;
14904 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
14905 scheduled for speculative execution. Reject the long-running division
14906 and square-root instructions. */
14909 aarch64_sched_can_speculate_insn (rtx_insn
*insn
)
14911 switch (get_attr_type (insn
))
14919 case TYPE_NEON_FP_SQRT_S
:
14920 case TYPE_NEON_FP_SQRT_D
:
14921 case TYPE_NEON_FP_SQRT_S_Q
:
14922 case TYPE_NEON_FP_SQRT_D_Q
:
14923 case TYPE_NEON_FP_DIV_S
:
14924 case TYPE_NEON_FP_DIV_D
:
14925 case TYPE_NEON_FP_DIV_S_Q
:
14926 case TYPE_NEON_FP_DIV_D_Q
:
14933 /* Target-specific selftests. */
14937 namespace selftest
{
14939 /* Selftest for the RTL loader.
14940 Verify that the RTL loader copes with a dump from
14941 print_rtx_function. This is essentially just a test that class
14942 function_reader can handle a real dump, but it also verifies
14943 that lookup_reg_by_dump_name correctly handles hard regs.
14944 The presence of hard reg names in the dump means that the test is
14945 target-specific, hence it is in this file. */
14948 aarch64_test_loading_full_dump ()
14950 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("aarch64/times-two.rtl"));
14952 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
14954 rtx_insn
*insn_1
= get_insn_by_uid (1);
14955 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
14957 rtx_insn
*insn_15
= get_insn_by_uid (15);
14958 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
14959 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
14961 /* Verify crtl->return_rtx. */
14962 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
14963 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
14964 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
14967 /* Run all target-specific selftests. */
14970 aarch64_run_selftests (void)
14972 aarch64_test_loading_full_dump ();
14975 } // namespace selftest
14977 #endif /* #if CHECKING_P */
14979 #undef TARGET_ADDRESS_COST
14980 #define TARGET_ADDRESS_COST aarch64_address_cost
14982 /* This hook will determines whether unnamed bitfields affect the alignment
14983 of the containing structure. The hook returns true if the structure
14984 should inherit the alignment requirements of an unnamed bitfield's
14986 #undef TARGET_ALIGN_ANON_BITFIELD
14987 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
14989 #undef TARGET_ASM_ALIGNED_DI_OP
14990 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
14992 #undef TARGET_ASM_ALIGNED_HI_OP
14993 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
14995 #undef TARGET_ASM_ALIGNED_SI_OP
14996 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
14998 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14999 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
15000 hook_bool_const_tree_hwi_hwi_const_tree_true
15002 #undef TARGET_ASM_FILE_START
15003 #define TARGET_ASM_FILE_START aarch64_start_file
15005 #undef TARGET_ASM_OUTPUT_MI_THUNK
15006 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
15008 #undef TARGET_ASM_SELECT_RTX_SECTION
15009 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
15011 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15012 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
15014 #undef TARGET_BUILD_BUILTIN_VA_LIST
15015 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
15017 #undef TARGET_CALLEE_COPIES
15018 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
15020 #undef TARGET_CAN_ELIMINATE
15021 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
15023 #undef TARGET_CAN_INLINE_P
15024 #define TARGET_CAN_INLINE_P aarch64_can_inline_p
15026 #undef TARGET_CANNOT_FORCE_CONST_MEM
15027 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
15029 #undef TARGET_CASE_VALUES_THRESHOLD
15030 #define TARGET_CASE_VALUES_THRESHOLD aarch64_case_values_threshold
15032 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15033 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
15035 /* Only the least significant bit is used for initialization guard
15037 #undef TARGET_CXX_GUARD_MASK_BIT
15038 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
15040 #undef TARGET_C_MODE_FOR_SUFFIX
15041 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
15043 #ifdef TARGET_BIG_ENDIAN_DEFAULT
15044 #undef TARGET_DEFAULT_TARGET_FLAGS
15045 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
15048 #undef TARGET_CLASS_MAX_NREGS
15049 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
15051 #undef TARGET_BUILTIN_DECL
15052 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
15054 #undef TARGET_BUILTIN_RECIPROCAL
15055 #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
15057 #undef TARGET_C_EXCESS_PRECISION
15058 #define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
15060 #undef TARGET_EXPAND_BUILTIN
15061 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
15063 #undef TARGET_EXPAND_BUILTIN_VA_START
15064 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
15066 #undef TARGET_FOLD_BUILTIN
15067 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
15069 #undef TARGET_FUNCTION_ARG
15070 #define TARGET_FUNCTION_ARG aarch64_function_arg
15072 #undef TARGET_FUNCTION_ARG_ADVANCE
15073 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
15075 #undef TARGET_FUNCTION_ARG_BOUNDARY
15076 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
15078 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15079 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
15081 #undef TARGET_FUNCTION_VALUE
15082 #define TARGET_FUNCTION_VALUE aarch64_function_value
15084 #undef TARGET_FUNCTION_VALUE_REGNO_P
15085 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
15087 #undef TARGET_FRAME_POINTER_REQUIRED
15088 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
15090 #undef TARGET_GIMPLE_FOLD_BUILTIN
15091 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
15093 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15094 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
15096 #undef TARGET_INIT_BUILTINS
15097 #define TARGET_INIT_BUILTINS aarch64_init_builtins
15099 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
15100 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
15101 aarch64_ira_change_pseudo_allocno_class
15103 #undef TARGET_LEGITIMATE_ADDRESS_P
15104 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
15106 #undef TARGET_LEGITIMATE_CONSTANT_P
15107 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
15109 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
15110 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
15111 aarch64_legitimize_address_displacement
15113 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15114 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
15116 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
15117 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
15118 aarch64_libgcc_floating_mode_supported_p
15120 #undef TARGET_MANGLE_TYPE
15121 #define TARGET_MANGLE_TYPE aarch64_mangle_type
15123 #undef TARGET_MEMORY_MOVE_COST
15124 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
15126 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
15127 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
15129 #undef TARGET_MUST_PASS_IN_STACK
15130 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
15132 /* This target hook should return true if accesses to volatile bitfields
15133 should use the narrowest mode possible. It should return false if these
15134 accesses should use the bitfield container type. */
15135 #undef TARGET_NARROW_VOLATILE_BITFIELD
15136 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
15138 #undef TARGET_OPTION_OVERRIDE
15139 #define TARGET_OPTION_OVERRIDE aarch64_override_options
15141 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
15142 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
15143 aarch64_override_options_after_change
15145 #undef TARGET_OPTION_SAVE
15146 #define TARGET_OPTION_SAVE aarch64_option_save
15148 #undef TARGET_OPTION_RESTORE
15149 #define TARGET_OPTION_RESTORE aarch64_option_restore
15151 #undef TARGET_OPTION_PRINT
15152 #define TARGET_OPTION_PRINT aarch64_option_print
15154 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15155 #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
15157 #undef TARGET_SET_CURRENT_FUNCTION
15158 #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
15160 #undef TARGET_PASS_BY_REFERENCE
15161 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
15163 #undef TARGET_PREFERRED_RELOAD_CLASS
15164 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
15166 #undef TARGET_SCHED_REASSOCIATION_WIDTH
15167 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
15169 #undef TARGET_PROMOTED_TYPE
15170 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
15172 #undef TARGET_SECONDARY_RELOAD
15173 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
15175 #undef TARGET_SHIFT_TRUNCATION_MASK
15176 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
15178 #undef TARGET_SETUP_INCOMING_VARARGS
15179 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
15181 #undef TARGET_STRUCT_VALUE_RTX
15182 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
15184 #undef TARGET_REGISTER_MOVE_COST
15185 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
15187 #undef TARGET_RETURN_IN_MEMORY
15188 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
15190 #undef TARGET_RETURN_IN_MSB
15191 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
15193 #undef TARGET_RTX_COSTS
15194 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
15196 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15197 #define TARGET_SCALAR_MODE_SUPPORTED_P aarch64_scalar_mode_supported_p
15199 #undef TARGET_SCHED_ISSUE_RATE
15200 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
15202 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15203 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
15204 aarch64_sched_first_cycle_multipass_dfa_lookahead
15206 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
15207 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
15208 aarch64_first_cycle_multipass_dfa_lookahead_guard
15210 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
15211 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
15212 aarch64_get_separate_components
15214 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
15215 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
15216 aarch64_components_for_bb
15218 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
15219 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
15220 aarch64_disqualify_components
15222 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
15223 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
15224 aarch64_emit_prologue_components
15226 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
15227 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
15228 aarch64_emit_epilogue_components
15230 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
15231 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
15232 aarch64_set_handled_components
15234 #undef TARGET_TRAMPOLINE_INIT
15235 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
15237 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
15238 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
15240 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15241 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
15243 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15244 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
15245 aarch64_builtin_support_vector_misalignment
15247 #undef TARGET_ARRAY_MODE_SUPPORTED_P
15248 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
15250 #undef TARGET_VECTORIZE_ADD_STMT_COST
15251 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
15253 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15254 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15255 aarch64_builtin_vectorization_cost
15257 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15258 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
15260 #undef TARGET_VECTORIZE_BUILTINS
15261 #define TARGET_VECTORIZE_BUILTINS
15263 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
15264 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
15265 aarch64_builtin_vectorized_function
15267 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
15268 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
15269 aarch64_autovectorize_vector_sizes
15271 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15272 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
15273 aarch64_atomic_assign_expand_fenv
15275 /* Section anchor support. */
15277 #undef TARGET_MIN_ANCHOR_OFFSET
15278 #define TARGET_MIN_ANCHOR_OFFSET -256
15280 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
15281 byte offset; we can do much more for larger data types, but have no way
15282 to determine the size of the access. We assume accesses are aligned. */
15283 #undef TARGET_MAX_ANCHOR_OFFSET
15284 #define TARGET_MAX_ANCHOR_OFFSET 4095
15286 #undef TARGET_VECTOR_ALIGNMENT
15287 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
15289 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
15290 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
15291 aarch64_simd_vector_alignment_reachable
15293 /* vec_perm support. */
15295 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
15296 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
15297 aarch64_vectorize_vec_perm_const_ok
15299 #undef TARGET_INIT_LIBFUNCS
15300 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
15302 #undef TARGET_FIXED_CONDITION_CODE_REGS
15303 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
15305 #undef TARGET_FLAGS_REGNUM
15306 #define TARGET_FLAGS_REGNUM CC_REGNUM
15308 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
15309 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
15311 #undef TARGET_ASAN_SHADOW_OFFSET
15312 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
15314 #undef TARGET_LEGITIMIZE_ADDRESS
15315 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
15317 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15318 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15319 aarch64_use_by_pieces_infrastructure_p
15321 #undef TARGET_SCHED_CAN_SPECULATE_INSN
15322 #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
15324 #undef TARGET_CAN_USE_DOLOOP_P
15325 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
15327 #undef TARGET_SCHED_ADJUST_PRIORITY
15328 #define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority
15330 #undef TARGET_SCHED_MACRO_FUSION_P
15331 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
15333 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
15334 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
15336 #undef TARGET_SCHED_FUSION_PRIORITY
15337 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
15339 #undef TARGET_UNSPEC_MAY_TRAP_P
15340 #define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
15342 #undef TARGET_USE_PSEUDO_PIC_REG
15343 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
15345 #undef TARGET_PRINT_OPERAND
15346 #define TARGET_PRINT_OPERAND aarch64_print_operand
15348 #undef TARGET_PRINT_OPERAND_ADDRESS
15349 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
15351 #undef TARGET_OPTAB_SUPPORTED_P
15352 #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
15354 #undef TARGET_OMIT_STRUCT_RETURN_REG
15355 #define TARGET_OMIT_STRUCT_RETURN_REG true
15357 /* The architecture reserves bits 0 and 1 so use bit 2 for descriptors. */
15358 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15359 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 4
15362 #undef TARGET_RUN_TARGET_SELFTESTS
15363 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
15364 #endif /* #if CHECKING_P */
15366 struct gcc_target targetm
= TARGET_INITIALIZER
;
15368 #include "gt-aarch64.h"