]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[multiple changes]
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
43e9d192
IB
1/* Machine description for AArch64 architecture.
2 Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
29#include "regs.h"
30#include "df.h"
31#include "hard-reg-set.h"
32#include "output.h"
33#include "expr.h"
34#include "reload.h"
35#include "toplev.h"
36#include "target.h"
37#include "target-def.h"
38#include "targhooks.h"
39#include "ggc.h"
40#include "function.h"
41#include "tm_p.h"
42#include "recog.h"
43#include "langhooks.h"
44#include "diagnostic-core.h"
45#include "gimple.h"
46#include "optabs.h"
47#include "dwarf2.h"
48
49/* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80};
81
82struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88};
89
90/* The current code model. */
91enum aarch64_code_model aarch64_cmodel;
92
93#ifdef HAVE_AS_TLS
94#undef TARGET_HAVE_TLS
95#define TARGET_HAVE_TLS 1
96#endif
97
98static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192
IB
105static void aarch64_override_options_after_change (void);
106static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108static bool aarch64_vector_mode_supported_p (enum machine_mode);
109static unsigned bit_count (unsigned HOST_WIDE_INT);
110static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
112
113/* The processor for which instructions should be scheduled. */
114enum aarch64_processor aarch64_tune = generic;
115
116/* The current tuning set. */
117const struct tune_params *aarch64_tune_params;
118
119/* Mask to specify which instructions we are allowed to generate. */
120unsigned long aarch64_isa_flags = 0;
121
122/* Mask to specify which instruction scheduling options should be used. */
123unsigned long aarch64_tune_flags = 0;
124
125/* Tuning parameters. */
126
127#if HAVE_DESIGNATED_INITIALIZERS
128#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
129#else
130#define NAMED_PARAM(NAME, VAL) (VAL)
131#endif
132
133#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
134__extension__
135#endif
136static const struct cpu_rtx_cost_table generic_rtx_cost_table =
137{
138 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
139 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
140 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
141 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
142 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
143 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
144 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
145 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
146 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
147 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
148 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
149 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
150};
151
152#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
153__extension__
154#endif
155static const struct cpu_addrcost_table generic_addrcost_table =
156{
157 NAMED_PARAM (pre_modify, 0),
158 NAMED_PARAM (post_modify, 0),
159 NAMED_PARAM (register_offset, 0),
160 NAMED_PARAM (register_extend, 0),
161 NAMED_PARAM (imm_offset, 0)
162};
163
164#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
165__extension__
166#endif
167static const struct cpu_regmove_cost generic_regmove_cost =
168{
169 NAMED_PARAM (GP2GP, 1),
170 NAMED_PARAM (GP2FP, 2),
171 NAMED_PARAM (FP2GP, 2),
172 /* We currently do not provide direct support for TFmode Q->Q move.
173 Therefore we need to raise the cost above 2 in order to have
174 reload handle the situation. */
175 NAMED_PARAM (FP2FP, 4)
176};
177
178#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
179__extension__
180#endif
181static const struct tune_params generic_tunings =
182{
183 &generic_rtx_cost_table,
184 &generic_addrcost_table,
185 &generic_regmove_cost,
186 NAMED_PARAM (memmov_cost, 4)
187};
188
189/* A processor implementing AArch64. */
190struct processor
191{
192 const char *const name;
193 enum aarch64_processor core;
194 const char *arch;
195 const unsigned long flags;
196 const struct tune_params *const tune;
197};
198
199/* Processor cores implementing AArch64. */
200static const struct processor all_cores[] =
201{
202#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
203 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
204#include "aarch64-cores.def"
205#undef AARCH64_CORE
206 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
207 {NULL, aarch64_none, NULL, 0, NULL}
208};
209
210/* Architectures implementing AArch64. */
211static const struct processor all_architectures[] =
212{
213#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
214 {NAME, CORE, #ARCH, FLAGS, NULL},
215#include "aarch64-arches.def"
216#undef AARCH64_ARCH
217 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
218 {NULL, aarch64_none, NULL, 0, NULL}
219};
220
221/* Target specification. These are populated as commandline arguments
222 are processed, or NULL if not specified. */
223static const struct processor *selected_arch;
224static const struct processor *selected_cpu;
225static const struct processor *selected_tune;
226
227#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
228
229/* An ISA extension in the co-processor and main instruction set space. */
230struct aarch64_option_extension
231{
232 const char *const name;
233 const unsigned long flags_on;
234 const unsigned long flags_off;
235};
236
237/* ISA extensions in AArch64. */
238static const struct aarch64_option_extension all_extensions[] =
239{
240#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
241 {NAME, FLAGS_ON, FLAGS_OFF},
242#include "aarch64-option-extensions.def"
243#undef AARCH64_OPT_EXTENSION
244 {NULL, 0, 0}
245};
246
247/* Used to track the size of an address when generating a pre/post
248 increment address. */
249static enum machine_mode aarch64_memory_reference_mode;
250
251/* Used to force GTY into this file. */
252static GTY(()) int gty_dummy;
253
254/* A table of valid AArch64 "bitmask immediate" values for
255 logical instructions. */
256
257#define AARCH64_NUM_BITMASKS 5334
258static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
259
260/* Did we set flag_omit_frame_pointer just so
261 aarch64_frame_pointer_required would be called? */
262static bool faked_omit_frame_pointer;
263
264typedef enum aarch64_cond_code
265{
266 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
267 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
268 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
269}
270aarch64_cc;
271
272#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
273
274/* The condition codes of the processor, and the inverse function. */
275static const char * const aarch64_condition_codes[] =
276{
277 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
278 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
279};
280
281/* Provide a mapping from gcc register numbers to dwarf register numbers. */
282unsigned
283aarch64_dbx_register_number (unsigned regno)
284{
285 if (GP_REGNUM_P (regno))
286 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
287 else if (regno == SP_REGNUM)
288 return AARCH64_DWARF_SP;
289 else if (FP_REGNUM_P (regno))
290 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
291
292 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
293 equivalent DWARF register. */
294 return DWARF_FRAME_REGISTERS;
295}
296
297/* Return TRUE if MODE is any of the large INT modes. */
298static bool
299aarch64_vect_struct_mode_p (enum machine_mode mode)
300{
301 return mode == OImode || mode == CImode || mode == XImode;
302}
303
304/* Return TRUE if MODE is any of the vector modes. */
305static bool
306aarch64_vector_mode_p (enum machine_mode mode)
307{
308 return aarch64_vector_mode_supported_p (mode)
309 || aarch64_vect_struct_mode_p (mode);
310}
311
312/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
313static bool
314aarch64_array_mode_supported_p (enum machine_mode mode,
315 unsigned HOST_WIDE_INT nelems)
316{
317 if (TARGET_SIMD
318 && AARCH64_VALID_SIMD_QREG_MODE (mode)
319 && (nelems >= 2 && nelems <= 4))
320 return true;
321
322 return false;
323}
324
325/* Implement HARD_REGNO_NREGS. */
326
327int
328aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
329{
330 switch (aarch64_regno_regclass (regno))
331 {
332 case FP_REGS:
333 case FP_LO_REGS:
334 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
335 default:
336 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
337 }
338 gcc_unreachable ();
339}
340
341/* Implement HARD_REGNO_MODE_OK. */
342
343int
344aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
345{
346 if (GET_MODE_CLASS (mode) == MODE_CC)
347 return regno == CC_REGNUM;
348
349 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
350 || regno == ARG_POINTER_REGNUM)
351 return mode == Pmode;
352
353 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
354 return 1;
355
356 if (FP_REGNUM_P (regno))
357 {
358 if (aarch64_vect_struct_mode_p (mode))
359 return
360 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
361 else
362 return 1;
363 }
364
365 return 0;
366}
367
368/* Return true if calls to DECL should be treated as
369 long-calls (ie called via a register). */
370static bool
371aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
372{
373 return false;
374}
375
376/* Return true if calls to symbol-ref SYM should be treated as
377 long-calls (ie called via a register). */
378bool
379aarch64_is_long_call_p (rtx sym)
380{
381 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
382}
383
384/* Return true if the offsets to a zero/sign-extract operation
385 represent an expression that matches an extend operation. The
386 operands represent the paramters from
387
388 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
389bool
390aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
391 rtx extract_imm)
392{
393 HOST_WIDE_INT mult_val, extract_val;
394
395 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
396 return false;
397
398 mult_val = INTVAL (mult_imm);
399 extract_val = INTVAL (extract_imm);
400
401 if (extract_val > 8
402 && extract_val < GET_MODE_BITSIZE (mode)
403 && exact_log2 (extract_val & ~7) > 0
404 && (extract_val & 7) <= 4
405 && mult_val == (1 << (extract_val & 7)))
406 return true;
407
408 return false;
409}
410
411/* Emit an insn that's a simple single-set. Both the operands must be
412 known to be valid. */
413inline static rtx
414emit_set_insn (rtx x, rtx y)
415{
416 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
417}
418
419/* X and Y are two things to compare using CODE. Emit the compare insn and
420 return the rtx for register 0 in the proper mode. */
421rtx
422aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
423{
424 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
425 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
426
427 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
428 return cc_reg;
429}
430
431/* Build the SYMBOL_REF for __tls_get_addr. */
432
433static GTY(()) rtx tls_get_addr_libfunc;
434
435rtx
436aarch64_tls_get_addr (void)
437{
438 if (!tls_get_addr_libfunc)
439 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
440 return tls_get_addr_libfunc;
441}
442
443/* Return the TLS model to use for ADDR. */
444
445static enum tls_model
446tls_symbolic_operand_type (rtx addr)
447{
448 enum tls_model tls_kind = TLS_MODEL_NONE;
449 rtx sym, addend;
450
451 if (GET_CODE (addr) == CONST)
452 {
453 split_const (addr, &sym, &addend);
454 if (GET_CODE (sym) == SYMBOL_REF)
455 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
456 }
457 else if (GET_CODE (addr) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
459
460 return tls_kind;
461}
462
463/* We'll allow lo_sum's in addresses in our legitimate addresses
464 so that combine would take care of combining addresses where
465 necessary, but for generation purposes, we'll generate the address
466 as :
467 RTL Absolute
468 tmp = hi (symbol_ref); adrp x1, foo
469 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
470 nop
471
472 PIC TLS
473 adrp x1, :got:foo adrp tmp, :tlsgd:foo
474 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
475 bl __tls_get_addr
476 nop
477
478 Load TLS symbol, depending on TLS mechanism and TLS access model.
479
480 Global Dynamic - Traditional TLS:
481 adrp tmp, :tlsgd:imm
482 add dest, tmp, #:tlsgd_lo12:imm
483 bl __tls_get_addr
484
485 Global Dynamic - TLS Descriptors:
486 adrp dest, :tlsdesc:imm
487 ldr tmp, [dest, #:tlsdesc_lo12:imm]
488 add dest, dest, #:tlsdesc_lo12:imm
489 blr tmp
490 mrs tp, tpidr_el0
491 add dest, dest, tp
492
493 Initial Exec:
494 mrs tp, tpidr_el0
495 adrp tmp, :gottprel:imm
496 ldr dest, [tmp, #:gottprel_lo12:imm]
497 add dest, dest, tp
498
499 Local Exec:
500 mrs tp, tpidr_el0
501 add t0, tp, #:tprel_hi12:imm
502 add t0, #:tprel_lo12_nc:imm
503*/
504
505static void
506aarch64_load_symref_appropriately (rtx dest, rtx imm,
507 enum aarch64_symbol_type type)
508{
509 switch (type)
510 {
511 case SYMBOL_SMALL_ABSOLUTE:
512 {
513 rtx tmp_reg = dest;
514 if (can_create_pseudo_p ())
515 {
516 tmp_reg = gen_reg_rtx (Pmode);
517 }
518
519 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
520 emit_insn (gen_add_losym (dest, tmp_reg, imm));
521 return;
522 }
523
524 case SYMBOL_SMALL_GOT:
525 {
526 rtx tmp_reg = dest;
527 if (can_create_pseudo_p ())
528 {
529 tmp_reg = gen_reg_rtx (Pmode);
530 }
531 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
532 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
533 return;
534 }
535
536 case SYMBOL_SMALL_TLSGD:
537 {
538 rtx insns;
539 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
540
541 start_sequence ();
542 emit_call_insn (gen_tlsgd_small (result, imm));
543 insns = get_insns ();
544 end_sequence ();
545
546 RTL_CONST_CALL_P (insns) = 1;
547 emit_libcall_block (insns, dest, result, imm);
548 return;
549 }
550
551 case SYMBOL_SMALL_TLSDESC:
552 {
553 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
554 rtx tp;
555
556 emit_insn (gen_tlsdesc_small (imm));
557 tp = aarch64_load_tp (NULL);
558 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
559 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
560 return;
561 }
562
563 case SYMBOL_SMALL_GOTTPREL:
564 {
565 rtx tmp_reg = gen_reg_rtx (Pmode);
566 rtx tp = aarch64_load_tp (NULL);
567 emit_insn (gen_tlsie_small (tmp_reg, imm));
568 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
569 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
570 return;
571 }
572
573 case SYMBOL_SMALL_TPREL:
574 {
575 rtx tp = aarch64_load_tp (NULL);
576 emit_insn (gen_tlsle_small (dest, tp, imm));
577 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
578 return;
579 }
580
581 default:
582 gcc_unreachable ();
583 }
584}
585
586/* Emit a move from SRC to DEST. Assume that the move expanders can
587 handle all moves if !can_create_pseudo_p (). The distinction is
588 important because, unlike emit_move_insn, the move expanders know
589 how to force Pmode objects into the constant pool even when the
590 constant pool address is not itself legitimate. */
591static rtx
592aarch64_emit_move (rtx dest, rtx src)
593{
594 return (can_create_pseudo_p ()
595 ? emit_move_insn (dest, src)
596 : emit_move_insn_1 (dest, src));
597}
598
599void
600aarch64_split_128bit_move (rtx dst, rtx src)
601{
602 rtx low_dst;
603
604 gcc_assert (GET_MODE (dst) == TImode);
605
606 if (REG_P (dst) && REG_P (src))
607 {
608 int src_regno = REGNO (src);
609 int dst_regno = REGNO (dst);
610
611 gcc_assert (GET_MODE (src) == TImode);
612
613 /* Handle r -> w, w -> r. */
614 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
615 {
616 emit_insn (gen_aarch64_movtilow_di (dst,
617 gen_lowpart (word_mode, src)));
618 emit_insn (gen_aarch64_movtihigh_di (dst,
619 gen_highpart (word_mode, src)));
620 return;
621 }
622 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
623 {
624 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
625 src));
626 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
627 src));
628 return;
629 }
630 /* Fall through to r -> r cases. */
631 }
632
633 low_dst = gen_lowpart (word_mode, dst);
634 if (REG_P (low_dst)
635 && reg_overlap_mentioned_p (low_dst, src))
636 {
637 aarch64_emit_move (gen_highpart (word_mode, dst),
638 gen_highpart_mode (word_mode, TImode, src));
639 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
640 }
641 else
642 {
643 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
644 aarch64_emit_move (gen_highpart (word_mode, dst),
645 gen_highpart_mode (word_mode, TImode, src));
646 }
647}
648
649bool
650aarch64_split_128bit_move_p (rtx dst, rtx src)
651{
652 return (! REG_P (src)
653 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
654}
655
656static rtx
657aarch64_force_temporary (rtx x, rtx value)
658{
659 if (can_create_pseudo_p ())
660 return force_reg (Pmode, value);
661 else
662 {
663 x = aarch64_emit_move (x, value);
664 return x;
665 }
666}
667
668
669static rtx
670aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
671{
672 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
673 {
674 rtx high;
675 /* Load the full offset into a register. This
676 might be improvable in the future. */
677 high = GEN_INT (offset);
678 offset = 0;
679 high = aarch64_force_temporary (temp, high);
680 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
681 }
682 return plus_constant (mode, reg, offset);
683}
684
685void
686aarch64_expand_mov_immediate (rtx dest, rtx imm)
687{
688 enum machine_mode mode = GET_MODE (dest);
689 unsigned HOST_WIDE_INT mask;
690 int i;
691 bool first;
692 unsigned HOST_WIDE_INT val;
693 bool subtargets;
694 rtx subtarget;
695 int one_match, zero_match;
696
697 gcc_assert (mode == SImode || mode == DImode);
698
699 /* Check on what type of symbol it is. */
700 if (GET_CODE (imm) == SYMBOL_REF
701 || GET_CODE (imm) == LABEL_REF
702 || GET_CODE (imm) == CONST)
703 {
704 rtx mem, base, offset;
705 enum aarch64_symbol_type sty;
706
707 /* If we have (const (plus symbol offset)), separate out the offset
708 before we start classifying the symbol. */
709 split_const (imm, &base, &offset);
710
711 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
712 switch (sty)
713 {
714 case SYMBOL_FORCE_TO_MEM:
715 if (offset != const0_rtx
716 && targetm.cannot_force_const_mem (mode, imm))
717 {
718 gcc_assert(can_create_pseudo_p ());
719 base = aarch64_force_temporary (dest, base);
720 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
721 aarch64_emit_move (dest, base);
722 return;
723 }
724 mem = force_const_mem (mode, imm);
725 gcc_assert (mem);
726 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
727 return;
728
729 case SYMBOL_SMALL_TLSGD:
730 case SYMBOL_SMALL_TLSDESC:
731 case SYMBOL_SMALL_GOTTPREL:
732 case SYMBOL_SMALL_GOT:
733 if (offset != const0_rtx)
734 {
735 gcc_assert(can_create_pseudo_p ());
736 base = aarch64_force_temporary (dest, base);
737 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
738 aarch64_emit_move (dest, base);
739 return;
740 }
741 /* FALLTHRU */
742
743 case SYMBOL_SMALL_TPREL:
744 case SYMBOL_SMALL_ABSOLUTE:
745 aarch64_load_symref_appropriately (dest, imm, sty);
746 return;
747
748 default:
749 gcc_unreachable ();
750 }
751 }
752
753 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
754 {
755 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
756 return;
757 }
758
759 if (!CONST_INT_P (imm))
760 {
761 if (GET_CODE (imm) == HIGH)
762 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
763 else
764 {
765 rtx mem = force_const_mem (mode, imm);
766 gcc_assert (mem);
767 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
768 }
769
770 return;
771 }
772
773 if (mode == SImode)
774 {
775 /* We know we can't do this in 1 insn, and we must be able to do it
776 in two; so don't mess around looking for sequences that don't buy
777 us anything. */
778 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
779 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
780 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
781 return;
782 }
783
784 /* Remaining cases are all for DImode. */
785
786 val = INTVAL (imm);
787 subtargets = optimize && can_create_pseudo_p ();
788
789 one_match = 0;
790 zero_match = 0;
791 mask = 0xffff;
792
793 for (i = 0; i < 64; i += 16, mask <<= 16)
794 {
795 if ((val & mask) == 0)
796 zero_match++;
797 else if ((val & mask) == mask)
798 one_match++;
799 }
800
801 if (one_match == 2)
802 {
803 mask = 0xffff;
804 for (i = 0; i < 64; i += 16, mask <<= 16)
805 {
806 if ((val & mask) != mask)
807 {
808 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
809 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
810 GEN_INT ((val >> i) & 0xffff)));
811 return;
812 }
813 }
814 gcc_unreachable ();
815 }
816
817 if (zero_match == 2)
818 goto simple_sequence;
819
820 mask = 0x0ffff0000UL;
821 for (i = 16; i < 64; i += 16, mask <<= 16)
822 {
823 HOST_WIDE_INT comp = mask & ~(mask - 1);
824
825 if (aarch64_uimm12_shift (val - (val & mask)))
826 {
827 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
828
829 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
830 emit_insn (gen_adddi3 (dest, subtarget,
831 GEN_INT (val - (val & mask))));
832 return;
833 }
834 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
835 {
836 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
837
838 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
839 GEN_INT ((val + comp) & mask)));
840 emit_insn (gen_adddi3 (dest, subtarget,
841 GEN_INT (val - ((val + comp) & mask))));
842 return;
843 }
844 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
845 {
846 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
847
848 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
849 GEN_INT ((val - comp) | ~mask)));
850 emit_insn (gen_adddi3 (dest, subtarget,
851 GEN_INT (val - ((val - comp) | ~mask))));
852 return;
853 }
854 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
855 {
856 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
857
858 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
859 GEN_INT (val | ~mask)));
860 emit_insn (gen_adddi3 (dest, subtarget,
861 GEN_INT (val - (val | ~mask))));
862 return;
863 }
864 }
865
866 /* See if we can do it by arithmetically combining two
867 immediates. */
868 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
869 {
870 int j;
871 mask = 0xffff;
872
873 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
874 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
875 {
876 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
877 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
878 GEN_INT (aarch64_bitmasks[i])));
879 emit_insn (gen_adddi3 (dest, subtarget,
880 GEN_INT (val - aarch64_bitmasks[i])));
881 return;
882 }
883
884 for (j = 0; j < 64; j += 16, mask <<= 16)
885 {
886 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
887 {
888 emit_insn (gen_rtx_SET (VOIDmode, dest,
889 GEN_INT (aarch64_bitmasks[i])));
890 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
891 GEN_INT ((val >> j) & 0xffff)));
892 return;
893 }
894 }
895 }
896
897 /* See if we can do it by logically combining two immediates. */
898 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
899 {
900 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
901 {
902 int j;
903
904 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
905 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
906 {
907 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
908 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
909 GEN_INT (aarch64_bitmasks[i])));
910 emit_insn (gen_iordi3 (dest, subtarget,
911 GEN_INT (aarch64_bitmasks[j])));
912 return;
913 }
914 }
915 else if ((val & aarch64_bitmasks[i]) == val)
916 {
917 int j;
918
919 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
920 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
921 {
922
923 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
924 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
925 GEN_INT (aarch64_bitmasks[j])));
926 emit_insn (gen_anddi3 (dest, subtarget,
927 GEN_INT (aarch64_bitmasks[i])));
928 return;
929 }
930 }
931 }
932
933 simple_sequence:
934 first = true;
935 mask = 0xffff;
936 for (i = 0; i < 64; i += 16, mask <<= 16)
937 {
938 if ((val & mask) != 0)
939 {
940 if (first)
941 {
942 emit_insn (gen_rtx_SET (VOIDmode, dest,
943 GEN_INT (val & mask)));
944 first = false;
945 }
946 else
947 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
948 GEN_INT ((val >> i) & 0xffff)));
949 }
950 }
951}
952
953static bool
954aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
955{
956 /* Indirect calls are not currently supported. */
957 if (decl == NULL)
958 return false;
959
960 /* Cannot tail-call to long-calls, since these are outside of the
961 range of a branch instruction (we could handle this if we added
962 support for indirect tail-calls. */
963 if (aarch64_decl_is_long_call_p (decl))
964 return false;
965
966 return true;
967}
968
969/* Implement TARGET_PASS_BY_REFERENCE. */
970
971static bool
972aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
973 enum machine_mode mode,
974 const_tree type,
975 bool named ATTRIBUTE_UNUSED)
976{
977 HOST_WIDE_INT size;
978 enum machine_mode dummymode;
979 int nregs;
980
981 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
982 size = (mode == BLKmode && type)
983 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
984
985 if (type)
986 {
987 /* Arrays always passed by reference. */
988 if (TREE_CODE (type) == ARRAY_TYPE)
989 return true;
990 /* Other aggregates based on their size. */
991 if (AGGREGATE_TYPE_P (type))
992 size = int_size_in_bytes (type);
993 }
994
995 /* Variable sized arguments are always returned by reference. */
996 if (size < 0)
997 return true;
998
999 /* Can this be a candidate to be passed in fp/simd register(s)? */
1000 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1001 &dummymode, &nregs,
1002 NULL))
1003 return false;
1004
1005 /* Arguments which are variable sized or larger than 2 registers are
1006 passed by reference unless they are a homogenous floating point
1007 aggregate. */
1008 return size > 2 * UNITS_PER_WORD;
1009}
1010
1011/* Return TRUE if VALTYPE is padded to its least significant bits. */
1012static bool
1013aarch64_return_in_msb (const_tree valtype)
1014{
1015 enum machine_mode dummy_mode;
1016 int dummy_int;
1017
1018 /* Never happens in little-endian mode. */
1019 if (!BYTES_BIG_ENDIAN)
1020 return false;
1021
1022 /* Only composite types smaller than or equal to 16 bytes can
1023 be potentially returned in registers. */
1024 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1025 || int_size_in_bytes (valtype) <= 0
1026 || int_size_in_bytes (valtype) > 16)
1027 return false;
1028
1029 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1030 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1031 is always passed/returned in the least significant bits of fp/simd
1032 register(s). */
1033 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1034 &dummy_mode, &dummy_int, NULL))
1035 return false;
1036
1037 return true;
1038}
1039
1040/* Implement TARGET_FUNCTION_VALUE.
1041 Define how to find the value returned by a function. */
1042
1043static rtx
1044aarch64_function_value (const_tree type, const_tree func,
1045 bool outgoing ATTRIBUTE_UNUSED)
1046{
1047 enum machine_mode mode;
1048 int unsignedp;
1049 int count;
1050 enum machine_mode ag_mode;
1051
1052 mode = TYPE_MODE (type);
1053 if (INTEGRAL_TYPE_P (type))
1054 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1055
1056 if (aarch64_return_in_msb (type))
1057 {
1058 HOST_WIDE_INT size = int_size_in_bytes (type);
1059
1060 if (size % UNITS_PER_WORD != 0)
1061 {
1062 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1063 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1064 }
1065 }
1066
1067 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1068 &ag_mode, &count, NULL))
1069 {
1070 if (!aarch64_composite_type_p (type, mode))
1071 {
1072 gcc_assert (count == 1 && mode == ag_mode);
1073 return gen_rtx_REG (mode, V0_REGNUM);
1074 }
1075 else
1076 {
1077 int i;
1078 rtx par;
1079
1080 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1081 for (i = 0; i < count; i++)
1082 {
1083 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1084 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1085 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1086 XVECEXP (par, 0, i) = tmp;
1087 }
1088 return par;
1089 }
1090 }
1091 else
1092 return gen_rtx_REG (mode, R0_REGNUM);
1093}
1094
1095/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1096 Return true if REGNO is the number of a hard register in which the values
1097 of called function may come back. */
1098
1099static bool
1100aarch64_function_value_regno_p (const unsigned int regno)
1101{
1102 /* Maximum of 16 bytes can be returned in the general registers. Examples
1103 of 16-byte return values are: 128-bit integers and 16-byte small
1104 structures (excluding homogeneous floating-point aggregates). */
1105 if (regno == R0_REGNUM || regno == R1_REGNUM)
1106 return true;
1107
1108 /* Up to four fp/simd registers can return a function value, e.g. a
1109 homogeneous floating-point aggregate having four members. */
1110 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1111 return !TARGET_GENERAL_REGS_ONLY;
1112
1113 return false;
1114}
1115
1116/* Implement TARGET_RETURN_IN_MEMORY.
1117
1118 If the type T of the result of a function is such that
1119 void func (T arg)
1120 would require that arg be passed as a value in a register (or set of
1121 registers) according to the parameter passing rules, then the result
1122 is returned in the same registers as would be used for such an
1123 argument. */
1124
1125static bool
1126aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1127{
1128 HOST_WIDE_INT size;
1129 enum machine_mode ag_mode;
1130 int count;
1131
1132 if (!AGGREGATE_TYPE_P (type)
1133 && TREE_CODE (type) != COMPLEX_TYPE
1134 && TREE_CODE (type) != VECTOR_TYPE)
1135 /* Simple scalar types always returned in registers. */
1136 return false;
1137
1138 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1139 type,
1140 &ag_mode,
1141 &count,
1142 NULL))
1143 return false;
1144
1145 /* Types larger than 2 registers returned in memory. */
1146 size = int_size_in_bytes (type);
1147 return (size < 0 || size > 2 * UNITS_PER_WORD);
1148}
1149
1150static bool
1151aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1152 const_tree type, int *nregs)
1153{
1154 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1155 return aarch64_vfp_is_call_or_return_candidate (mode,
1156 type,
1157 &pcum->aapcs_vfp_rmode,
1158 nregs,
1159 NULL);
1160}
1161
1162/* Given MODE and TYPE of a function argument, return the alignment in
1163 bits. The idea is to suppress any stronger alignment requested by
1164 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1165 This is a helper function for local use only. */
1166
1167static unsigned int
1168aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1169{
1170 unsigned int alignment;
1171
1172 if (type)
1173 {
1174 if (!integer_zerop (TYPE_SIZE (type)))
1175 {
1176 if (TYPE_MODE (type) == mode)
1177 alignment = TYPE_ALIGN (type);
1178 else
1179 alignment = GET_MODE_ALIGNMENT (mode);
1180 }
1181 else
1182 alignment = 0;
1183 }
1184 else
1185 alignment = GET_MODE_ALIGNMENT (mode);
1186
1187 return alignment;
1188}
1189
1190/* Layout a function argument according to the AAPCS64 rules. The rule
1191 numbers refer to the rule numbers in the AAPCS64. */
1192
1193static void
1194aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1195 const_tree type,
1196 bool named ATTRIBUTE_UNUSED)
1197{
1198 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1199 int ncrn, nvrn, nregs;
1200 bool allocate_ncrn, allocate_nvrn;
1201
1202 /* We need to do this once per argument. */
1203 if (pcum->aapcs_arg_processed)
1204 return;
1205
1206 pcum->aapcs_arg_processed = true;
1207
1208 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1209 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1210 mode,
1211 type,
1212 &nregs);
1213
1214 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1215 The following code thus handles passing by SIMD/FP registers first. */
1216
1217 nvrn = pcum->aapcs_nvrn;
1218
1219 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1220 and homogenous short-vector aggregates (HVA). */
1221 if (allocate_nvrn)
1222 {
1223 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1224 {
1225 pcum->aapcs_nextnvrn = nvrn + nregs;
1226 if (!aarch64_composite_type_p (type, mode))
1227 {
1228 gcc_assert (nregs == 1);
1229 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1230 }
1231 else
1232 {
1233 rtx par;
1234 int i;
1235 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1236 for (i = 0; i < nregs; i++)
1237 {
1238 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1239 V0_REGNUM + nvrn + i);
1240 tmp = gen_rtx_EXPR_LIST
1241 (VOIDmode, tmp,
1242 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1243 XVECEXP (par, 0, i) = tmp;
1244 }
1245 pcum->aapcs_reg = par;
1246 }
1247 return;
1248 }
1249 else
1250 {
1251 /* C.3 NSRN is set to 8. */
1252 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1253 goto on_stack;
1254 }
1255 }
1256
1257 ncrn = pcum->aapcs_ncrn;
1258 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1259 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1260
1261
1262 /* C6 - C9. though the sign and zero extension semantics are
1263 handled elsewhere. This is the case where the argument fits
1264 entirely general registers. */
1265 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1266 {
1267 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1268
1269 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1270
1271 /* C.8 if the argument has an alignment of 16 then the NGRN is
1272 rounded up to the next even number. */
1273 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1274 {
1275 ++ncrn;
1276 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1277 }
1278 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1279 A reg is still generated for it, but the caller should be smart
1280 enough not to use it. */
1281 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1282 {
1283 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1284 }
1285 else
1286 {
1287 rtx par;
1288 int i;
1289
1290 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1291 for (i = 0; i < nregs; i++)
1292 {
1293 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1294 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1295 GEN_INT (i * UNITS_PER_WORD));
1296 XVECEXP (par, 0, i) = tmp;
1297 }
1298 pcum->aapcs_reg = par;
1299 }
1300
1301 pcum->aapcs_nextncrn = ncrn + nregs;
1302 return;
1303 }
1304
1305 /* C.11 */
1306 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1307
1308 /* The argument is passed on stack; record the needed number of words for
1309 this argument (we can re-use NREGS) and align the total size if
1310 necessary. */
1311on_stack:
1312 pcum->aapcs_stack_words = nregs;
1313 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1314 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1315 16 / UNITS_PER_WORD) + 1;
1316 return;
1317}
1318
1319/* Implement TARGET_FUNCTION_ARG. */
1320
1321static rtx
1322aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1323 const_tree type, bool named)
1324{
1325 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1326 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1327
1328 if (mode == VOIDmode)
1329 return NULL_RTX;
1330
1331 aarch64_layout_arg (pcum_v, mode, type, named);
1332 return pcum->aapcs_reg;
1333}
1334
1335void
1336aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1337 const_tree fntype ATTRIBUTE_UNUSED,
1338 rtx libname ATTRIBUTE_UNUSED,
1339 const_tree fndecl ATTRIBUTE_UNUSED,
1340 unsigned n_named ATTRIBUTE_UNUSED)
1341{
1342 pcum->aapcs_ncrn = 0;
1343 pcum->aapcs_nvrn = 0;
1344 pcum->aapcs_nextncrn = 0;
1345 pcum->aapcs_nextnvrn = 0;
1346 pcum->pcs_variant = ARM_PCS_AAPCS64;
1347 pcum->aapcs_reg = NULL_RTX;
1348 pcum->aapcs_arg_processed = false;
1349 pcum->aapcs_stack_words = 0;
1350 pcum->aapcs_stack_size = 0;
1351
1352 return;
1353}
1354
1355static void
1356aarch64_function_arg_advance (cumulative_args_t pcum_v,
1357 enum machine_mode mode,
1358 const_tree type,
1359 bool named)
1360{
1361 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1362 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1363 {
1364 aarch64_layout_arg (pcum_v, mode, type, named);
1365 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1366 != (pcum->aapcs_stack_words != 0));
1367 pcum->aapcs_arg_processed = false;
1368 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1369 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1370 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1371 pcum->aapcs_stack_words = 0;
1372 pcum->aapcs_reg = NULL_RTX;
1373 }
1374}
1375
1376bool
1377aarch64_function_arg_regno_p (unsigned regno)
1378{
1379 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1380 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1381}
1382
1383/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1384 PARM_BOUNDARY bits of alignment, but will be given anything up
1385 to STACK_BOUNDARY bits if the type requires it. This makes sure
1386 that both before and after the layout of each argument, the Next
1387 Stacked Argument Address (NSAA) will have a minimum alignment of
1388 8 bytes. */
1389
1390static unsigned int
1391aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1392{
1393 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1394
1395 if (alignment < PARM_BOUNDARY)
1396 alignment = PARM_BOUNDARY;
1397 if (alignment > STACK_BOUNDARY)
1398 alignment = STACK_BOUNDARY;
1399 return alignment;
1400}
1401
1402/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1403
1404 Return true if an argument passed on the stack should be padded upwards,
1405 i.e. if the least-significant byte of the stack slot has useful data.
1406
1407 Small aggregate types are placed in the lowest memory address.
1408
1409 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1410
1411bool
1412aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1413{
1414 /* On little-endian targets, the least significant byte of every stack
1415 argument is passed at the lowest byte address of the stack slot. */
1416 if (!BYTES_BIG_ENDIAN)
1417 return true;
1418
1419 /* Otherwise, integral types and floating point types are padded downward:
1420 the least significant byte of a stack argument is passed at the highest
1421 byte address of the stack slot. */
1422 if (type
1423 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1424 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1425 return false;
1426
1427 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1428 return true;
1429}
1430
1431/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1432
1433 It specifies padding for the last (may also be the only)
1434 element of a block move between registers and memory. If
1435 assuming the block is in the memory, padding upward means that
1436 the last element is padded after its highest significant byte,
1437 while in downward padding, the last element is padded at the
1438 its least significant byte side.
1439
1440 Small aggregates and small complex types are always padded
1441 upwards.
1442
1443 We don't need to worry about homogeneous floating-point or
1444 short-vector aggregates; their move is not affected by the
1445 padding direction determined here. Regardless of endianness,
1446 each element of such an aggregate is put in the least
1447 significant bits of a fp/simd register.
1448
1449 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1450 register has useful data, and return the opposite if the most
1451 significant byte does. */
1452
1453bool
1454aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1455 bool first ATTRIBUTE_UNUSED)
1456{
1457
1458 /* Small composite types are always padded upward. */
1459 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1460 {
1461 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1462 : GET_MODE_SIZE (mode));
1463 if (size < 2 * UNITS_PER_WORD)
1464 return true;
1465 }
1466
1467 /* Otherwise, use the default padding. */
1468 return !BYTES_BIG_ENDIAN;
1469}
1470
1471static enum machine_mode
1472aarch64_libgcc_cmp_return_mode (void)
1473{
1474 return SImode;
1475}
1476
1477static bool
1478aarch64_frame_pointer_required (void)
1479{
1480 /* If the function contains dynamic stack allocations, we need to
1481 use the frame pointer to access the static parts of the frame. */
1482 if (cfun->calls_alloca)
1483 return true;
1484
1485 /* We may have turned flag_omit_frame_pointer on in order to have this
1486 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1487 and we'll check it here.
1488 If we really did set flag_omit_frame_pointer normally, then we return false
1489 (no frame pointer required) in all cases. */
1490
1491 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1492 return false;
1493 else if (flag_omit_leaf_frame_pointer)
1494 return !crtl->is_leaf;
1495 return true;
1496}
1497
1498/* Mark the registers that need to be saved by the callee and calculate
1499 the size of the callee-saved registers area and frame record (both FP
1500 and LR may be omitted). */
1501static void
1502aarch64_layout_frame (void)
1503{
1504 HOST_WIDE_INT offset = 0;
1505 int regno;
1506
1507 if (reload_completed && cfun->machine->frame.laid_out)
1508 return;
1509
1510 cfun->machine->frame.fp_lr_offset = 0;
1511
1512 /* First mark all the registers that really need to be saved... */
1513 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1514 cfun->machine->frame.reg_offset[regno] = -1;
1515
1516 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1518
1519 /* ... that includes the eh data registers (if needed)... */
1520 if (crtl->calls_eh_return)
1521 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1522 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1523
1524 /* ... and any callee saved register that dataflow says is live. */
1525 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1526 if (df_regs_ever_live_p (regno)
1527 && !call_used_regs[regno])
1528 cfun->machine->frame.reg_offset[regno] = 0;
1529
1530 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1531 if (df_regs_ever_live_p (regno)
1532 && !call_used_regs[regno])
1533 cfun->machine->frame.reg_offset[regno] = 0;
1534
1535 if (frame_pointer_needed)
1536 {
1537 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1538 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1539 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1540 }
1541
1542 /* Now assign stack slots for them. */
1543 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1544 if (cfun->machine->frame.reg_offset[regno] != -1)
1545 {
1546 cfun->machine->frame.reg_offset[regno] = offset;
1547 offset += UNITS_PER_WORD;
1548 }
1549
1550 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1551 if (cfun->machine->frame.reg_offset[regno] != -1)
1552 {
1553 cfun->machine->frame.reg_offset[regno] = offset;
1554 offset += UNITS_PER_WORD;
1555 }
1556
1557 if (frame_pointer_needed)
1558 {
1559 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1560 offset += UNITS_PER_WORD;
1561 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1562 }
1563
1564 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1565 {
1566 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1567 offset += UNITS_PER_WORD;
1568 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1569 }
1570
1571 cfun->machine->frame.padding0 =
1572 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1573 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1574
1575 cfun->machine->frame.saved_regs_size = offset;
1576 cfun->machine->frame.laid_out = true;
1577}
1578
1579/* Make the last instruction frame-related and note that it performs
1580 the operation described by FRAME_PATTERN. */
1581
1582static void
1583aarch64_set_frame_expr (rtx frame_pattern)
1584{
1585 rtx insn;
1586
1587 insn = get_last_insn ();
1588 RTX_FRAME_RELATED_P (insn) = 1;
1589 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1590 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1591 frame_pattern,
1592 REG_NOTES (insn));
1593}
1594
1595static bool
1596aarch64_register_saved_on_entry (int regno)
1597{
1598 return cfun->machine->frame.reg_offset[regno] != -1;
1599}
1600
1601
1602static void
1603aarch64_save_or_restore_fprs (int start_offset, int increment,
1604 bool restore, rtx base_rtx)
1605
1606{
1607 unsigned regno;
1608 unsigned regno2;
1609 rtx insn;
1610 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1611
1612
1613 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1614 {
1615 if (aarch64_register_saved_on_entry (regno))
1616 {
1617 rtx mem;
1618 mem = gen_mem_ref (DFmode,
1619 plus_constant (Pmode,
1620 base_rtx,
1621 start_offset));
1622
1623 for (regno2 = regno + 1;
1624 regno2 <= V31_REGNUM
1625 && !aarch64_register_saved_on_entry (regno2);
1626 regno2++)
1627 {
1628 /* Empty loop. */
1629 }
1630 if (regno2 <= V31_REGNUM &&
1631 aarch64_register_saved_on_entry (regno2))
1632 {
1633 rtx mem2;
1634 /* Next highest register to be saved. */
1635 mem2 = gen_mem_ref (DFmode,
1636 plus_constant
1637 (Pmode,
1638 base_rtx,
1639 start_offset + increment));
1640 if (restore == false)
1641 {
1642 insn = emit_insn
1643 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1644 mem2, gen_rtx_REG (DFmode, regno2)));
1645
1646 }
1647 else
1648 {
1649 insn = emit_insn
1650 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1651 gen_rtx_REG (DFmode, regno2), mem2));
1652
1653 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1654 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1655 }
1656
1657 /* The first part of a frame-related parallel insn
1658 is always assumed to be relevant to the frame
1659 calculations; subsequent parts, are only
1660 frame-related if explicitly marked. */
1661 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1662 1)) = 1;
1663 regno = regno2;
1664 start_offset += increment * 2;
1665 }
1666 else
1667 {
1668 if (restore == false)
1669 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1670 else
1671 {
1672 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1673 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1674 }
1675 start_offset += increment;
1676 }
1677 RTX_FRAME_RELATED_P (insn) = 1;
1678 }
1679 }
1680
1681}
1682
1683
1684/* offset from the stack pointer of where the saves and
1685 restore's have to happen. */
1686static void
1687aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1688 bool restore)
1689{
1690 rtx insn;
1691 rtx base_rtx = stack_pointer_rtx;
1692 HOST_WIDE_INT start_offset = offset;
1693 HOST_WIDE_INT increment = UNITS_PER_WORD;
1694 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1695 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1696 unsigned regno;
1697 unsigned regno2;
1698
1699 for (regno = R0_REGNUM; regno <= limit; regno++)
1700 {
1701 if (aarch64_register_saved_on_entry (regno))
1702 {
1703 rtx mem;
1704 mem = gen_mem_ref (Pmode,
1705 plus_constant (Pmode,
1706 base_rtx,
1707 start_offset));
1708
1709 for (regno2 = regno + 1;
1710 regno2 <= limit
1711 && !aarch64_register_saved_on_entry (regno2);
1712 regno2++)
1713 {
1714 /* Empty loop. */
1715 }
1716 if (regno2 <= limit &&
1717 aarch64_register_saved_on_entry (regno2))
1718 {
1719 rtx mem2;
1720 /* Next highest register to be saved. */
1721 mem2 = gen_mem_ref (Pmode,
1722 plus_constant
1723 (Pmode,
1724 base_rtx,
1725 start_offset + increment));
1726 if (restore == false)
1727 {
1728 insn = emit_insn
1729 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1730 mem2, gen_rtx_REG (DImode, regno2)));
1731
1732 }
1733 else
1734 {
1735 insn = emit_insn
1736 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1737 gen_rtx_REG (DImode, regno2), mem2));
1738
1739 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1740 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1741 }
1742
1743 /* The first part of a frame-related parallel insn
1744 is always assumed to be relevant to the frame
1745 calculations; subsequent parts, are only
1746 frame-related if explicitly marked. */
1747 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1748 1)) = 1;
1749 regno = regno2;
1750 start_offset += increment * 2;
1751 }
1752 else
1753 {
1754 if (restore == false)
1755 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1756 else
1757 {
1758 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1759 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1760 }
1761 start_offset += increment;
1762 }
1763 RTX_FRAME_RELATED_P (insn) = 1;
1764 }
1765 }
1766
1767 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1768
1769}
1770
1771/* AArch64 stack frames generated by this compiler look like:
1772
1773 +-------------------------------+
1774 | |
1775 | incoming stack arguments |
1776 | |
1777 +-------------------------------+ <-- arg_pointer_rtx
1778 | |
1779 | callee-allocated save area |
1780 | for register varargs |
1781 | |
1782 +-------------------------------+
1783 | |
1784 | local variables |
1785 | |
1786 +-------------------------------+ <-- frame_pointer_rtx
1787 | |
1788 | callee-saved registers |
1789 | |
1790 +-------------------------------+
1791 | LR' |
1792 +-------------------------------+
1793 | FP' |
1794 P +-------------------------------+ <-- hard_frame_pointer_rtx
1795 | dynamic allocation |
1796 +-------------------------------+
1797 | |
1798 | outgoing stack arguments |
1799 | |
1800 +-------------------------------+ <-- stack_pointer_rtx
1801
1802 Dynamic stack allocations such as alloca insert data at point P.
1803 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1804 hard_frame_pointer_rtx unchanged. */
1805
1806/* Generate the prologue instructions for entry into a function.
1807 Establish the stack frame by decreasing the stack pointer with a
1808 properly calculated size and, if necessary, create a frame record
1809 filled with the values of LR and previous frame pointer. The
1810 current FP is also set up is it is in use. */
1811
1812void
1813aarch64_expand_prologue (void)
1814{
1815 /* sub sp, sp, #<frame_size>
1816 stp {fp, lr}, [sp, #<frame_size> - 16]
1817 add fp, sp, #<frame_size> - hardfp_offset
1818 stp {cs_reg}, [fp, #-16] etc.
1819
1820 sub sp, sp, <final_adjustment_if_any>
1821 */
1822 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1823 HOST_WIDE_INT frame_size, offset;
1824 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1825 rtx insn;
1826
1827 aarch64_layout_frame ();
1828 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1829 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1830 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1831 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1832 + crtl->outgoing_args_size);
1833 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1834 STACK_BOUNDARY / BITS_PER_UNIT);
1835
1836 if (flag_stack_usage_info)
1837 current_function_static_stack_size = frame_size;
1838
1839 fp_offset = (offset
1840 - original_frame_size
1841 - cfun->machine->frame.saved_regs_size);
1842
44c0e7b9 1843 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
1844 if (offset >= 512)
1845 {
1846 /* When the frame has a large size, an initial decrease is done on
1847 the stack pointer to jump over the callee-allocated save area for
1848 register varargs, the local variable area and/or the callee-saved
1849 register area. This will allow the pre-index write-back
1850 store pair instructions to be used for setting up the stack frame
1851 efficiently. */
1852 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1853 if (offset >= 512)
1854 offset = cfun->machine->frame.saved_regs_size;
1855
1856 frame_size -= (offset + crtl->outgoing_args_size);
1857 fp_offset = 0;
1858
1859 if (frame_size >= 0x1000000)
1860 {
1861 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1862 emit_move_insn (op0, GEN_INT (-frame_size));
1863 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1864 aarch64_set_frame_expr (gen_rtx_SET
1865 (Pmode, stack_pointer_rtx,
1866 gen_rtx_PLUS (Pmode,
1867 stack_pointer_rtx,
1868 GEN_INT (-frame_size))));
1869 }
1870 else if (frame_size > 0)
1871 {
1872 if ((frame_size & 0xfff) != frame_size)
1873 {
1874 insn = emit_insn (gen_add2_insn
1875 (stack_pointer_rtx,
1876 GEN_INT (-(frame_size
1877 & ~(HOST_WIDE_INT)0xfff))));
1878 RTX_FRAME_RELATED_P (insn) = 1;
1879 }
1880 if ((frame_size & 0xfff) != 0)
1881 {
1882 insn = emit_insn (gen_add2_insn
1883 (stack_pointer_rtx,
1884 GEN_INT (-(frame_size
1885 & (HOST_WIDE_INT)0xfff))));
1886 RTX_FRAME_RELATED_P (insn) = 1;
1887 }
1888 }
1889 }
1890 else
1891 frame_size = -1;
1892
1893 if (offset > 0)
1894 {
1895 /* Save the frame pointer and lr if the frame pointer is needed
1896 first. Make the frame pointer point to the location of the
1897 old frame pointer on the stack. */
1898 if (frame_pointer_needed)
1899 {
1900 rtx mem_fp, mem_lr;
1901
1902 if (fp_offset)
1903 {
1904 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1905 GEN_INT (-offset)));
1906 RTX_FRAME_RELATED_P (insn) = 1;
1907 aarch64_set_frame_expr (gen_rtx_SET
1908 (Pmode, stack_pointer_rtx,
1909 gen_rtx_MINUS (Pmode,
1910 stack_pointer_rtx,
1911 GEN_INT (offset))));
1912 mem_fp = gen_frame_mem (DImode,
1913 plus_constant (Pmode,
1914 stack_pointer_rtx,
1915 fp_offset));
1916 mem_lr = gen_frame_mem (DImode,
1917 plus_constant (Pmode,
1918 stack_pointer_rtx,
1919 fp_offset
1920 + UNITS_PER_WORD));
1921 insn = emit_insn (gen_store_pairdi (mem_fp,
1922 hard_frame_pointer_rtx,
1923 mem_lr,
1924 gen_rtx_REG (DImode,
1925 LR_REGNUM)));
1926 }
1927 else
1928 {
1929 insn = emit_insn (gen_storewb_pairdi_di
1930 (stack_pointer_rtx, stack_pointer_rtx,
1931 hard_frame_pointer_rtx,
1932 gen_rtx_REG (DImode, LR_REGNUM),
1933 GEN_INT (-offset),
1934 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1935 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1936 }
1937
1938 /* The first part of a frame-related parallel insn is always
1939 assumed to be relevant to the frame calculations;
1940 subsequent parts, are only frame-related if explicitly
1941 marked. */
1942 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1943 RTX_FRAME_RELATED_P (insn) = 1;
1944
1945 /* Set up frame pointer to point to the location of the
1946 previous frame pointer on the stack. */
1947 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1948 stack_pointer_rtx,
1949 GEN_INT (fp_offset)));
1950 aarch64_set_frame_expr (gen_rtx_SET
1951 (Pmode, hard_frame_pointer_rtx,
1952 gen_rtx_PLUS (Pmode,
1953 stack_pointer_rtx,
1954 GEN_INT (fp_offset))));
1955 RTX_FRAME_RELATED_P (insn) = 1;
1956 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1957 hard_frame_pointer_rtx));
1958 }
1959 else
1960 {
1961 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1962 GEN_INT (-offset)));
1963 RTX_FRAME_RELATED_P (insn) = 1;
1964 }
1965
1966 aarch64_save_or_restore_callee_save_registers
1967 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1968 }
1969
1970 /* when offset >= 512,
1971 sub sp, sp, #<outgoing_args_size> */
1972 if (frame_size > -1)
1973 {
1974 if (crtl->outgoing_args_size > 0)
1975 {
1976 insn = emit_insn (gen_add2_insn
1977 (stack_pointer_rtx,
1978 GEN_INT (- crtl->outgoing_args_size)));
1979 RTX_FRAME_RELATED_P (insn) = 1;
1980 }
1981 }
1982}
1983
1984/* Generate the epilogue instructions for returning from a function. */
1985void
1986aarch64_expand_epilogue (bool for_sibcall)
1987{
1988 HOST_WIDE_INT original_frame_size, frame_size, offset;
1989 HOST_WIDE_INT fp_offset;
1990 rtx insn;
44c0e7b9 1991 rtx cfa_reg;
43e9d192
IB
1992
1993 aarch64_layout_frame ();
1994 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1995 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1996 + crtl->outgoing_args_size);
1997 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1998 STACK_BOUNDARY / BITS_PER_UNIT);
1999
2000 fp_offset = (offset
2001 - original_frame_size
2002 - cfun->machine->frame.saved_regs_size);
2003
44c0e7b9
YZ
2004 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2005
2006 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2007 if (offset >= 512)
2008 {
2009 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2010 if (offset >= 512)
2011 offset = cfun->machine->frame.saved_regs_size;
2012
2013 frame_size -= (offset + crtl->outgoing_args_size);
2014 fp_offset = 0;
2015 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2016 {
2017 insn = emit_insn (gen_add2_insn
2018 (stack_pointer_rtx,
2019 GEN_INT (crtl->outgoing_args_size)));
2020 RTX_FRAME_RELATED_P (insn) = 1;
2021 }
2022 }
2023 else
2024 frame_size = -1;
2025
2026 /* If there were outgoing arguments or we've done dynamic stack
2027 allocation, then restore the stack pointer from the frame
2028 pointer. This is at most one insn and more efficient than using
2029 GCC's internal mechanism. */
2030 if (frame_pointer_needed
2031 && (crtl->outgoing_args_size || cfun->calls_alloca))
2032 {
2033 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2034 hard_frame_pointer_rtx,
2035 GEN_INT (- fp_offset)));
2036 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2037 /* As SP is set to (FP - fp_offset), according to the rules in
2038 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2039 from the value of SP from now on. */
2040 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2041 }
2042
2043 aarch64_save_or_restore_callee_save_registers
2044 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2045
2046 /* Restore the frame pointer and lr if the frame pointer is needed. */
2047 if (offset > 0)
2048 {
2049 if (frame_pointer_needed)
2050 {
2051 rtx mem_fp, mem_lr;
2052
2053 if (fp_offset)
2054 {
2055 mem_fp = gen_frame_mem (DImode,
2056 plus_constant (Pmode,
2057 stack_pointer_rtx,
2058 fp_offset));
2059 mem_lr = gen_frame_mem (DImode,
2060 plus_constant (Pmode,
2061 stack_pointer_rtx,
2062 fp_offset
2063 + UNITS_PER_WORD));
2064 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2065 mem_fp,
2066 gen_rtx_REG (DImode,
2067 LR_REGNUM),
2068 mem_lr));
2069 }
2070 else
2071 {
2072 insn = emit_insn (gen_loadwb_pairdi_di
2073 (stack_pointer_rtx,
2074 stack_pointer_rtx,
2075 hard_frame_pointer_rtx,
2076 gen_rtx_REG (DImode, LR_REGNUM),
2077 GEN_INT (offset),
2078 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2079 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2080 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2081 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2082 plus_constant (Pmode, cfa_reg,
2083 offset))));
43e9d192
IB
2084 }
2085
2086 /* The first part of a frame-related parallel insn
2087 is always assumed to be relevant to the frame
2088 calculations; subsequent parts, are only
2089 frame-related if explicitly marked. */
2090 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2091 RTX_FRAME_RELATED_P (insn) = 1;
2092 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2093 add_reg_note (insn, REG_CFA_RESTORE,
2094 gen_rtx_REG (DImode, LR_REGNUM));
2095
2096 if (fp_offset)
2097 {
2098 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2099 GEN_INT (offset)));
2100 RTX_FRAME_RELATED_P (insn) = 1;
2101 }
2102 }
43e9d192
IB
2103 else
2104 {
2105 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2106 GEN_INT (offset)));
2107 RTX_FRAME_RELATED_P (insn) = 1;
2108 }
2109 }
2110
2111 /* Stack adjustment for exception handler. */
2112 if (crtl->calls_eh_return)
2113 {
2114 /* We need to unwind the stack by the offset computed by
2115 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2116 based on SP. Ideally we would update the SP and define the
2117 CFA along the lines of:
2118
2119 SP = SP + EH_RETURN_STACKADJ_RTX
2120 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2121
2122 However the dwarf emitter only understands a constant
2123 register offset.
2124
2125 The solution choosen here is to use the otherwise unused IP0
2126 as a temporary register to hold the current SP value. The
2127 CFA is described using IP0 then SP is modified. */
2128
2129 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2130
2131 insn = emit_move_insn (ip0, stack_pointer_rtx);
2132 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2133 RTX_FRAME_RELATED_P (insn) = 1;
2134
2135 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2136
2137 /* Ensure the assignment to IP0 does not get optimized away. */
2138 emit_use (ip0);
2139 }
2140
2141 if (frame_size > -1)
2142 {
2143 if (frame_size >= 0x1000000)
2144 {
2145 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2146 emit_move_insn (op0, GEN_INT (frame_size));
2147 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2148 aarch64_set_frame_expr (gen_rtx_SET
2149 (Pmode, stack_pointer_rtx,
2150 gen_rtx_PLUS (Pmode,
2151 stack_pointer_rtx,
2152 GEN_INT (frame_size))));
2153 }
2154 else if (frame_size > 0)
2155 {
2156 if ((frame_size & 0xfff) != 0)
2157 {
2158 insn = emit_insn (gen_add2_insn
2159 (stack_pointer_rtx,
2160 GEN_INT ((frame_size
2161 & (HOST_WIDE_INT) 0xfff))));
2162 RTX_FRAME_RELATED_P (insn) = 1;
2163 }
2164 if ((frame_size & 0xfff) != frame_size)
2165 {
2166 insn = emit_insn (gen_add2_insn
2167 (stack_pointer_rtx,
2168 GEN_INT ((frame_size
2169 & ~ (HOST_WIDE_INT) 0xfff))));
2170 RTX_FRAME_RELATED_P (insn) = 1;
2171 }
2172 }
2173
2174 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2175 gen_rtx_PLUS (Pmode,
2176 stack_pointer_rtx,
2177 GEN_INT (offset))));
2178 }
2179
2180 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2181 if (!for_sibcall)
2182 emit_jump_insn (ret_rtx);
2183}
2184
2185/* Return the place to copy the exception unwinding return address to.
2186 This will probably be a stack slot, but could (in theory be the
2187 return register). */
2188rtx
2189aarch64_final_eh_return_addr (void)
2190{
2191 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2192 aarch64_layout_frame ();
2193 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2194 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2195 + crtl->outgoing_args_size);
2196 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2197 STACK_BOUNDARY / BITS_PER_UNIT);
2198 fp_offset = offset
2199 - original_frame_size
2200 - cfun->machine->frame.saved_regs_size;
2201
2202 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2203 return gen_rtx_REG (DImode, LR_REGNUM);
2204
2205 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2206 result in a store to save LR introduced by builtin_eh_return () being
2207 incorrectly deleted because the alias is not detected.
2208 So in the calculation of the address to copy the exception unwinding
2209 return address to, we note 2 cases.
2210 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2211 we return a SP-relative location since all the addresses are SP-relative
2212 in this case. This prevents the store from being optimized away.
2213 If the fp_offset is not 0, then the addresses will be FP-relative and
2214 therefore we return a FP-relative location. */
2215
2216 if (frame_pointer_needed)
2217 {
2218 if (fp_offset)
2219 return gen_frame_mem (DImode,
2220 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2221 else
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2224 }
2225
2226 /* If FP is not needed, we calculate the location of LR, which would be
2227 at the top of the saved registers block. */
2228
2229 return gen_frame_mem (DImode,
2230 plus_constant (Pmode,
2231 stack_pointer_rtx,
2232 fp_offset
2233 + cfun->machine->frame.saved_regs_size
2234 - 2 * UNITS_PER_WORD));
2235}
2236
2237/* Output code to build up a constant in a register. */
2238static void
d9600ae5 2239aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2240{
2241 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2242 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2243 else
2244 {
2245 int i;
2246 int ncount = 0;
2247 int zcount = 0;
2248 HOST_WIDE_INT valp = val >> 16;
2249 HOST_WIDE_INT valm;
2250 HOST_WIDE_INT tval;
2251
2252 for (i = 16; i < 64; i += 16)
2253 {
2254 valm = (valp & 0xffff);
2255
2256 if (valm != 0)
2257 ++ zcount;
2258
2259 if (valm != 0xffff)
2260 ++ ncount;
2261
2262 valp >>= 16;
2263 }
2264
2265 /* zcount contains the number of additional MOVK instructions
2266 required if the constant is built up with an initial MOVZ instruction,
2267 while ncount is the number of MOVK instructions required if starting
2268 with a MOVN instruction. Choose the sequence that yields the fewest
2269 number of instructions, preferring MOVZ instructions when they are both
2270 the same. */
2271 if (ncount < zcount)
2272 {
d9600ae5
SN
2273 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2274 GEN_INT ((~val) & 0xffff));
43e9d192
IB
2275 tval = 0xffff;
2276 }
2277 else
2278 {
d9600ae5
SN
2279 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2280 GEN_INT (val & 0xffff));
43e9d192
IB
2281 tval = 0;
2282 }
2283
2284 val >>= 16;
2285
2286 for (i = 16; i < 64; i += 16)
2287 {
2288 if ((val & 0xffff) != tval)
d9600ae5
SN
2289 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2290 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2291 val >>= 16;
2292 }
2293 }
2294}
2295
2296static void
d9600ae5 2297aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2298{
2299 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2300 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2301 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2302
2303 if (mdelta < 0)
2304 mdelta = -mdelta;
2305
2306 if (mdelta >= 4096 * 4096)
2307 {
d9600ae5
SN
2308 aarch64_build_constant (scratchreg, delta);
2309 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2310 }
2311 else if (mdelta > 0)
2312 {
43e9d192 2313 if (mdelta >= 4096)
d9600ae5
SN
2314 {
2315 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2316 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2317 if (delta < 0)
2318 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2319 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2320 else
2321 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2323 }
43e9d192 2324 if (mdelta % 4096 != 0)
d9600ae5
SN
2325 {
2326 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2327 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2328 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2329 }
43e9d192
IB
2330 }
2331}
2332
2333/* Output code to add DELTA to the first argument, and then jump
2334 to FUNCTION. Used for C++ multiple inheritance. */
2335static void
2336aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2337 HOST_WIDE_INT delta,
2338 HOST_WIDE_INT vcall_offset,
2339 tree function)
2340{
2341 /* The this pointer is always in x0. Note that this differs from
2342 Arm where the this pointer maybe bumped to r1 if r0 is required
2343 to return a pointer to an aggregate. On AArch64 a result value
2344 pointer will be in x8. */
2345 int this_regno = R0_REGNUM;
75f1d6fc 2346 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2347
75f1d6fc
SN
2348 reload_completed = 1;
2349 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2350
2351 if (vcall_offset == 0)
d9600ae5 2352 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2353 else
2354 {
2355 gcc_assert ((vcall_offset & 0x7) == 0);
2356
75f1d6fc
SN
2357 this_rtx = gen_rtx_REG (Pmode, this_regno);
2358 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2359 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2360
75f1d6fc
SN
2361 addr = this_rtx;
2362 if (delta != 0)
2363 {
2364 if (delta >= -256 && delta < 256)
2365 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2366 plus_constant (Pmode, this_rtx, delta));
2367 else
d9600ae5 2368 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2369 }
2370
75f1d6fc
SN
2371 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2372
43e9d192 2373 if (vcall_offset >= -256 && vcall_offset < 32768)
75f1d6fc 2374 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2375 else
2376 {
d9600ae5 2377 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2378 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2379 }
2380
75f1d6fc
SN
2381 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2382 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2383 }
2384
75f1d6fc
SN
2385 /* Generate a tail call to the target function. */
2386 if (!TREE_USED (function))
2387 {
2388 assemble_external (function);
2389 TREE_USED (function) = 1;
2390 }
2391 funexp = XEXP (DECL_RTL (function), 0);
2392 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2393 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2394 SIBLING_CALL_P (insn) = 1;
2395
2396 insn = get_insns ();
2397 shorten_branches (insn);
2398 final_start_function (insn, file, 1);
2399 final (insn, file, 1);
43e9d192 2400 final_end_function ();
75f1d6fc
SN
2401
2402 /* Stop pretending to be a post-reload pass. */
2403 reload_completed = 0;
43e9d192
IB
2404}
2405
43e9d192
IB
2406static int
2407aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2408{
2409 if (GET_CODE (*x) == SYMBOL_REF)
2410 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2411
2412 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2413 TLS offsets, not real symbol references. */
2414 if (GET_CODE (*x) == UNSPEC
2415 && XINT (*x, 1) == UNSPEC_TLS)
2416 return -1;
2417
2418 return 0;
2419}
2420
2421static bool
2422aarch64_tls_referenced_p (rtx x)
2423{
2424 if (!TARGET_HAVE_TLS)
2425 return false;
2426
2427 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2428}
2429
2430
2431static int
2432aarch64_bitmasks_cmp (const void *i1, const void *i2)
2433{
2434 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2435 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2436
2437 if (*imm1 < *imm2)
2438 return -1;
2439 if (*imm1 > *imm2)
2440 return +1;
2441 return 0;
2442}
2443
2444
2445static void
2446aarch64_build_bitmask_table (void)
2447{
2448 unsigned HOST_WIDE_INT mask, imm;
2449 unsigned int log_e, e, s, r;
2450 unsigned int nimms = 0;
2451
2452 for (log_e = 1; log_e <= 6; log_e++)
2453 {
2454 e = 1 << log_e;
2455 if (e == 64)
2456 mask = ~(HOST_WIDE_INT) 0;
2457 else
2458 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2459 for (s = 1; s < e; s++)
2460 {
2461 for (r = 0; r < e; r++)
2462 {
2463 /* set s consecutive bits to 1 (s < 64) */
2464 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2465 /* rotate right by r */
2466 if (r != 0)
2467 imm = ((imm >> r) | (imm << (e - r))) & mask;
2468 /* replicate the constant depending on SIMD size */
2469 switch (log_e) {
2470 case 1: imm |= (imm << 2);
2471 case 2: imm |= (imm << 4);
2472 case 3: imm |= (imm << 8);
2473 case 4: imm |= (imm << 16);
2474 case 5: imm |= (imm << 32);
2475 case 6:
2476 break;
2477 default:
2478 gcc_unreachable ();
2479 }
2480 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2481 aarch64_bitmasks[nimms++] = imm;
2482 }
2483 }
2484 }
2485
2486 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2487 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2488 aarch64_bitmasks_cmp);
2489}
2490
2491
2492/* Return true if val can be encoded as a 12-bit unsigned immediate with
2493 a left shift of 0 or 12 bits. */
2494bool
2495aarch64_uimm12_shift (HOST_WIDE_INT val)
2496{
2497 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2498 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2499 );
2500}
2501
2502
2503/* Return true if val is an immediate that can be loaded into a
2504 register by a MOVZ instruction. */
2505static bool
2506aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2507{
2508 if (GET_MODE_SIZE (mode) > 4)
2509 {
2510 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2511 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2512 return 1;
2513 }
2514 else
2515 {
2516 /* Ignore sign extension. */
2517 val &= (HOST_WIDE_INT) 0xffffffff;
2518 }
2519 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2520 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2521}
2522
2523
2524/* Return true if val is a valid bitmask immediate. */
2525bool
2526aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2527{
2528 if (GET_MODE_SIZE (mode) < 8)
2529 {
2530 /* Replicate bit pattern. */
2531 val &= (HOST_WIDE_INT) 0xffffffff;
2532 val |= val << 32;
2533 }
2534 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2535 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2536}
2537
2538
2539/* Return true if val is an immediate that can be loaded into a
2540 register in a single instruction. */
2541bool
2542aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2543{
2544 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2545 return 1;
2546 return aarch64_bitmask_imm (val, mode);
2547}
2548
2549static bool
2550aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2551{
2552 rtx base, offset;
2553 if (GET_CODE (x) == HIGH)
2554 return true;
2555
2556 split_const (x, &base, &offset);
2557 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2558 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2559
2560 return aarch64_tls_referenced_p (x);
2561}
2562
2563/* Return true if register REGNO is a valid index register.
2564 STRICT_P is true if REG_OK_STRICT is in effect. */
2565
2566bool
2567aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2568{
2569 if (!HARD_REGISTER_NUM_P (regno))
2570 {
2571 if (!strict_p)
2572 return true;
2573
2574 if (!reg_renumber)
2575 return false;
2576
2577 regno = reg_renumber[regno];
2578 }
2579 return GP_REGNUM_P (regno);
2580}
2581
2582/* Return true if register REGNO is a valid base register for mode MODE.
2583 STRICT_P is true if REG_OK_STRICT is in effect. */
2584
2585bool
2586aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2587{
2588 if (!HARD_REGISTER_NUM_P (regno))
2589 {
2590 if (!strict_p)
2591 return true;
2592
2593 if (!reg_renumber)
2594 return false;
2595
2596 regno = reg_renumber[regno];
2597 }
2598
2599 /* The fake registers will be eliminated to either the stack or
2600 hard frame pointer, both of which are usually valid base registers.
2601 Reload deals with the cases where the eliminated form isn't valid. */
2602 return (GP_REGNUM_P (regno)
2603 || regno == SP_REGNUM
2604 || regno == FRAME_POINTER_REGNUM
2605 || regno == ARG_POINTER_REGNUM);
2606}
2607
2608/* Return true if X is a valid base register for mode MODE.
2609 STRICT_P is true if REG_OK_STRICT is in effect. */
2610
2611static bool
2612aarch64_base_register_rtx_p (rtx x, bool strict_p)
2613{
2614 if (!strict_p && GET_CODE (x) == SUBREG)
2615 x = SUBREG_REG (x);
2616
2617 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2618}
2619
2620/* Return true if address offset is a valid index. If it is, fill in INFO
2621 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2622
2623static bool
2624aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2625 enum machine_mode mode, bool strict_p)
2626{
2627 enum aarch64_address_type type;
2628 rtx index;
2629 int shift;
2630
2631 /* (reg:P) */
2632 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2633 && GET_MODE (x) == Pmode)
2634 {
2635 type = ADDRESS_REG_REG;
2636 index = x;
2637 shift = 0;
2638 }
2639 /* (sign_extend:DI (reg:SI)) */
2640 else if ((GET_CODE (x) == SIGN_EXTEND
2641 || GET_CODE (x) == ZERO_EXTEND)
2642 && GET_MODE (x) == DImode
2643 && GET_MODE (XEXP (x, 0)) == SImode)
2644 {
2645 type = (GET_CODE (x) == SIGN_EXTEND)
2646 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2647 index = XEXP (x, 0);
2648 shift = 0;
2649 }
2650 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2651 else if (GET_CODE (x) == MULT
2652 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2653 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2654 && GET_MODE (XEXP (x, 0)) == DImode
2655 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2656 && CONST_INT_P (XEXP (x, 1)))
2657 {
2658 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2659 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2660 index = XEXP (XEXP (x, 0), 0);
2661 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2662 }
2663 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2664 else if (GET_CODE (x) == ASHIFT
2665 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2666 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2667 && GET_MODE (XEXP (x, 0)) == DImode
2668 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2669 && CONST_INT_P (XEXP (x, 1)))
2670 {
2671 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2672 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2673 index = XEXP (XEXP (x, 0), 0);
2674 shift = INTVAL (XEXP (x, 1));
2675 }
2676 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2677 else if ((GET_CODE (x) == SIGN_EXTRACT
2678 || GET_CODE (x) == ZERO_EXTRACT)
2679 && GET_MODE (x) == DImode
2680 && GET_CODE (XEXP (x, 0)) == MULT
2681 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2682 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2683 {
2684 type = (GET_CODE (x) == SIGN_EXTRACT)
2685 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2686 index = XEXP (XEXP (x, 0), 0);
2687 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2688 if (INTVAL (XEXP (x, 1)) != 32 + shift
2689 || INTVAL (XEXP (x, 2)) != 0)
2690 shift = -1;
2691 }
2692 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2693 (const_int 0xffffffff<<shift)) */
2694 else if (GET_CODE (x) == AND
2695 && GET_MODE (x) == DImode
2696 && GET_CODE (XEXP (x, 0)) == MULT
2697 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2698 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2699 && CONST_INT_P (XEXP (x, 1)))
2700 {
2701 type = ADDRESS_REG_UXTW;
2702 index = XEXP (XEXP (x, 0), 0);
2703 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2704 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2705 shift = -1;
2706 }
2707 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2708 else if ((GET_CODE (x) == SIGN_EXTRACT
2709 || GET_CODE (x) == ZERO_EXTRACT)
2710 && GET_MODE (x) == DImode
2711 && GET_CODE (XEXP (x, 0)) == ASHIFT
2712 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2713 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2714 {
2715 type = (GET_CODE (x) == SIGN_EXTRACT)
2716 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2717 index = XEXP (XEXP (x, 0), 0);
2718 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2719 if (INTVAL (XEXP (x, 1)) != 32 + shift
2720 || INTVAL (XEXP (x, 2)) != 0)
2721 shift = -1;
2722 }
2723 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2724 (const_int 0xffffffff<<shift)) */
2725 else if (GET_CODE (x) == AND
2726 && GET_MODE (x) == DImode
2727 && GET_CODE (XEXP (x, 0)) == ASHIFT
2728 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2729 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2730 && CONST_INT_P (XEXP (x, 1)))
2731 {
2732 type = ADDRESS_REG_UXTW;
2733 index = XEXP (XEXP (x, 0), 0);
2734 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2735 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2736 shift = -1;
2737 }
2738 /* (mult:P (reg:P) (const_int scale)) */
2739 else if (GET_CODE (x) == MULT
2740 && GET_MODE (x) == Pmode
2741 && GET_MODE (XEXP (x, 0)) == Pmode
2742 && CONST_INT_P (XEXP (x, 1)))
2743 {
2744 type = ADDRESS_REG_REG;
2745 index = XEXP (x, 0);
2746 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2747 }
2748 /* (ashift:P (reg:P) (const_int shift)) */
2749 else if (GET_CODE (x) == ASHIFT
2750 && GET_MODE (x) == Pmode
2751 && GET_MODE (XEXP (x, 0)) == Pmode
2752 && CONST_INT_P (XEXP (x, 1)))
2753 {
2754 type = ADDRESS_REG_REG;
2755 index = XEXP (x, 0);
2756 shift = INTVAL (XEXP (x, 1));
2757 }
2758 else
2759 return false;
2760
2761 if (GET_CODE (index) == SUBREG)
2762 index = SUBREG_REG (index);
2763
2764 if ((shift == 0 ||
2765 (shift > 0 && shift <= 3
2766 && (1 << shift) == GET_MODE_SIZE (mode)))
2767 && REG_P (index)
2768 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2769 {
2770 info->type = type;
2771 info->offset = index;
2772 info->shift = shift;
2773 return true;
2774 }
2775
2776 return false;
2777}
2778
2779static inline bool
2780offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2781{
2782 return (offset >= -64 * GET_MODE_SIZE (mode)
2783 && offset < 64 * GET_MODE_SIZE (mode)
2784 && offset % GET_MODE_SIZE (mode) == 0);
2785}
2786
2787static inline bool
2788offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2789 HOST_WIDE_INT offset)
2790{
2791 return offset >= -256 && offset < 256;
2792}
2793
2794static inline bool
2795offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2796{
2797 return (offset >= 0
2798 && offset < 4096 * GET_MODE_SIZE (mode)
2799 && offset % GET_MODE_SIZE (mode) == 0);
2800}
2801
2802/* Return true if X is a valid address for machine mode MODE. If it is,
2803 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2804 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2805
2806static bool
2807aarch64_classify_address (struct aarch64_address_info *info,
2808 rtx x, enum machine_mode mode,
2809 RTX_CODE outer_code, bool strict_p)
2810{
2811 enum rtx_code code = GET_CODE (x);
2812 rtx op0, op1;
2813 bool allow_reg_index_p =
2814 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2815
2816 /* Don't support anything other than POST_INC or REG addressing for
2817 AdvSIMD. */
2818 if (aarch64_vector_mode_p (mode)
2819 && (code != POST_INC && code != REG))
2820 return false;
2821
2822 switch (code)
2823 {
2824 case REG:
2825 case SUBREG:
2826 info->type = ADDRESS_REG_IMM;
2827 info->base = x;
2828 info->offset = const0_rtx;
2829 return aarch64_base_register_rtx_p (x, strict_p);
2830
2831 case PLUS:
2832 op0 = XEXP (x, 0);
2833 op1 = XEXP (x, 1);
2834 if (GET_MODE_SIZE (mode) != 0
2835 && CONST_INT_P (op1)
2836 && aarch64_base_register_rtx_p (op0, strict_p))
2837 {
2838 HOST_WIDE_INT offset = INTVAL (op1);
2839
2840 info->type = ADDRESS_REG_IMM;
2841 info->base = op0;
2842 info->offset = op1;
2843
2844 /* TImode and TFmode values are allowed in both pairs of X
2845 registers and individual Q registers. The available
2846 address modes are:
2847 X,X: 7-bit signed scaled offset
2848 Q: 9-bit signed offset
2849 We conservatively require an offset representable in either mode.
2850 */
2851 if (mode == TImode || mode == TFmode)
2852 return (offset_7bit_signed_scaled_p (mode, offset)
2853 && offset_9bit_signed_unscaled_p (mode, offset));
2854
2855 if (outer_code == PARALLEL)
2856 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2857 && offset_7bit_signed_scaled_p (mode, offset));
2858 else
2859 return (offset_9bit_signed_unscaled_p (mode, offset)
2860 || offset_12bit_unsigned_scaled_p (mode, offset));
2861 }
2862
2863 if (allow_reg_index_p)
2864 {
2865 /* Look for base + (scaled/extended) index register. */
2866 if (aarch64_base_register_rtx_p (op0, strict_p)
2867 && aarch64_classify_index (info, op1, mode, strict_p))
2868 {
2869 info->base = op0;
2870 return true;
2871 }
2872 if (aarch64_base_register_rtx_p (op1, strict_p)
2873 && aarch64_classify_index (info, op0, mode, strict_p))
2874 {
2875 info->base = op1;
2876 return true;
2877 }
2878 }
2879
2880 return false;
2881
2882 case POST_INC:
2883 case POST_DEC:
2884 case PRE_INC:
2885 case PRE_DEC:
2886 info->type = ADDRESS_REG_WB;
2887 info->base = XEXP (x, 0);
2888 info->offset = NULL_RTX;
2889 return aarch64_base_register_rtx_p (info->base, strict_p);
2890
2891 case POST_MODIFY:
2892 case PRE_MODIFY:
2893 info->type = ADDRESS_REG_WB;
2894 info->base = XEXP (x, 0);
2895 if (GET_CODE (XEXP (x, 1)) == PLUS
2896 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2897 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2898 && aarch64_base_register_rtx_p (info->base, strict_p))
2899 {
2900 HOST_WIDE_INT offset;
2901 info->offset = XEXP (XEXP (x, 1), 1);
2902 offset = INTVAL (info->offset);
2903
2904 /* TImode and TFmode values are allowed in both pairs of X
2905 registers and individual Q registers. The available
2906 address modes are:
2907 X,X: 7-bit signed scaled offset
2908 Q: 9-bit signed offset
2909 We conservatively require an offset representable in either mode.
2910 */
2911 if (mode == TImode || mode == TFmode)
2912 return (offset_7bit_signed_scaled_p (mode, offset)
2913 && offset_9bit_signed_unscaled_p (mode, offset));
2914
2915 if (outer_code == PARALLEL)
2916 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2917 && offset_7bit_signed_scaled_p (mode, offset));
2918 else
2919 return offset_9bit_signed_unscaled_p (mode, offset);
2920 }
2921 return false;
2922
2923 case CONST:
2924 case SYMBOL_REF:
2925 case LABEL_REF:
2926 /* load literal: pc-relative constant pool entry. */
2927 info->type = ADDRESS_SYMBOLIC;
2928 if (outer_code != PARALLEL)
2929 {
2930 rtx sym, addend;
2931
2932 split_const (x, &sym, &addend);
2933 return (GET_CODE (sym) == LABEL_REF
2934 || (GET_CODE (sym) == SYMBOL_REF
2935 && CONSTANT_POOL_ADDRESS_P (sym)));
2936 }
2937 return false;
2938
2939 case LO_SUM:
2940 info->type = ADDRESS_LO_SUM;
2941 info->base = XEXP (x, 0);
2942 info->offset = XEXP (x, 1);
2943 if (allow_reg_index_p
2944 && aarch64_base_register_rtx_p (info->base, strict_p))
2945 {
2946 rtx sym, offs;
2947 split_const (info->offset, &sym, &offs);
2948 if (GET_CODE (sym) == SYMBOL_REF
2949 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2950 == SYMBOL_SMALL_ABSOLUTE))
2951 {
2952 /* The symbol and offset must be aligned to the access size. */
2953 unsigned int align;
2954 unsigned int ref_size;
2955
2956 if (CONSTANT_POOL_ADDRESS_P (sym))
2957 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2958 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2959 {
2960 tree exp = SYMBOL_REF_DECL (sym);
2961 align = TYPE_ALIGN (TREE_TYPE (exp));
2962 align = CONSTANT_ALIGNMENT (exp, align);
2963 }
2964 else if (SYMBOL_REF_DECL (sym))
2965 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2966 else
2967 align = BITS_PER_UNIT;
2968
2969 ref_size = GET_MODE_SIZE (mode);
2970 if (ref_size == 0)
2971 ref_size = GET_MODE_SIZE (DImode);
2972
2973 return ((INTVAL (offs) & (ref_size - 1)) == 0
2974 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2975 }
2976 }
2977 return false;
2978
2979 default:
2980 return false;
2981 }
2982}
2983
2984bool
2985aarch64_symbolic_address_p (rtx x)
2986{
2987 rtx offset;
2988
2989 split_const (x, &x, &offset);
2990 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2991}
2992
2993/* Classify the base of symbolic expression X, given that X appears in
2994 context CONTEXT. */
2995static enum aarch64_symbol_type
2996aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
2997{
2998 rtx offset;
2999 split_const (x, &x, &offset);
3000 return aarch64_classify_symbol (x, context);
3001}
3002
3003
3004/* Return TRUE if X is a legitimate address for accessing memory in
3005 mode MODE. */
3006static bool
3007aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3008{
3009 struct aarch64_address_info addr;
3010
3011 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3012}
3013
3014/* Return TRUE if X is a legitimate address for accessing memory in
3015 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3016 pair operation. */
3017bool
3018aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3019 RTX_CODE outer_code, bool strict_p)
3020{
3021 struct aarch64_address_info addr;
3022
3023 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3024}
3025
3026/* Return TRUE if rtx X is immediate constant 0.0 */
3027bool
3028aarch64_const_double_zero_rtx_p (rtx x)
3029{
3030 REAL_VALUE_TYPE r;
3031
3032 if (GET_MODE (x) == VOIDmode)
3033 return false;
3034
3035 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3036 if (REAL_VALUE_MINUS_ZERO (r))
3037 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3038 return REAL_VALUES_EQUAL (r, dconst0);
3039}
3040
3041enum machine_mode
3042aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3043{
3044 /* All floating point compares return CCFP if it is an equality
3045 comparison, and CCFPE otherwise. */
3046 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3047 {
3048 switch (code)
3049 {
3050 case EQ:
3051 case NE:
3052 case UNORDERED:
3053 case ORDERED:
3054 case UNLT:
3055 case UNLE:
3056 case UNGT:
3057 case UNGE:
3058 case UNEQ:
3059 case LTGT:
3060 return CCFPmode;
3061
3062 case LT:
3063 case LE:
3064 case GT:
3065 case GE:
3066 return CCFPEmode;
3067
3068 default:
3069 gcc_unreachable ();
3070 }
3071 }
3072
3073 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3074 && y == const0_rtx
3075 && (code == EQ || code == NE || code == LT || code == GE)
3076 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS))
3077 return CC_NZmode;
3078
3079 /* A compare with a shifted operand. Because of canonicalization,
3080 the comparison will have to be swapped when we emit the assembly
3081 code. */
3082 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3083 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3084 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3085 || GET_CODE (x) == LSHIFTRT
3086 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3087 return CC_SWPmode;
3088
3089 /* A compare of a mode narrower than SI mode against zero can be done
3090 by extending the value in the comparison. */
3091 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3092 && y == const0_rtx)
3093 /* Only use sign-extension if we really need it. */
3094 return ((code == GT || code == GE || code == LE || code == LT)
3095 ? CC_SESWPmode : CC_ZESWPmode);
3096
3097 /* For everything else, return CCmode. */
3098 return CCmode;
3099}
3100
3101static unsigned
3102aarch64_get_condition_code (rtx x)
3103{
3104 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3105 enum rtx_code comp_code = GET_CODE (x);
3106
3107 if (GET_MODE_CLASS (mode) != MODE_CC)
3108 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3109
3110 switch (mode)
3111 {
3112 case CCFPmode:
3113 case CCFPEmode:
3114 switch (comp_code)
3115 {
3116 case GE: return AARCH64_GE;
3117 case GT: return AARCH64_GT;
3118 case LE: return AARCH64_LS;
3119 case LT: return AARCH64_MI;
3120 case NE: return AARCH64_NE;
3121 case EQ: return AARCH64_EQ;
3122 case ORDERED: return AARCH64_VC;
3123 case UNORDERED: return AARCH64_VS;
3124 case UNLT: return AARCH64_LT;
3125 case UNLE: return AARCH64_LE;
3126 case UNGT: return AARCH64_HI;
3127 case UNGE: return AARCH64_PL;
3128 default: gcc_unreachable ();
3129 }
3130 break;
3131
3132 case CCmode:
3133 switch (comp_code)
3134 {
3135 case NE: return AARCH64_NE;
3136 case EQ: return AARCH64_EQ;
3137 case GE: return AARCH64_GE;
3138 case GT: return AARCH64_GT;
3139 case LE: return AARCH64_LE;
3140 case LT: return AARCH64_LT;
3141 case GEU: return AARCH64_CS;
3142 case GTU: return AARCH64_HI;
3143 case LEU: return AARCH64_LS;
3144 case LTU: return AARCH64_CC;
3145 default: gcc_unreachable ();
3146 }
3147 break;
3148
3149 case CC_SWPmode:
3150 case CC_ZESWPmode:
3151 case CC_SESWPmode:
3152 switch (comp_code)
3153 {
3154 case NE: return AARCH64_NE;
3155 case EQ: return AARCH64_EQ;
3156 case GE: return AARCH64_LE;
3157 case GT: return AARCH64_LT;
3158 case LE: return AARCH64_GE;
3159 case LT: return AARCH64_GT;
3160 case GEU: return AARCH64_LS;
3161 case GTU: return AARCH64_CC;
3162 case LEU: return AARCH64_CS;
3163 case LTU: return AARCH64_HI;
3164 default: gcc_unreachable ();
3165 }
3166 break;
3167
3168 case CC_NZmode:
3169 switch (comp_code)
3170 {
3171 case NE: return AARCH64_NE;
3172 case EQ: return AARCH64_EQ;
3173 case GE: return AARCH64_PL;
3174 case LT: return AARCH64_MI;
3175 default: gcc_unreachable ();
3176 }
3177 break;
3178
3179 default:
3180 gcc_unreachable ();
3181 break;
3182 }
3183}
3184
3185static unsigned
3186bit_count (unsigned HOST_WIDE_INT value)
3187{
3188 unsigned count = 0;
3189
3190 while (value)
3191 {
3192 count++;
3193 value &= value - 1;
3194 }
3195
3196 return count;
3197}
3198
3199void
3200aarch64_print_operand (FILE *f, rtx x, char code)
3201{
3202 switch (code)
3203 {
3204 case 'e':
3205 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3206 {
3207 int n;
3208
3209 if (GET_CODE (x) != CONST_INT
3210 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3211 {
3212 output_operand_lossage ("invalid operand for '%%%c'", code);
3213 return;
3214 }
3215
3216 switch (n)
3217 {
3218 case 3:
3219 fputc ('b', f);
3220 break;
3221 case 4:
3222 fputc ('h', f);
3223 break;
3224 case 5:
3225 fputc ('w', f);
3226 break;
3227 default:
3228 output_operand_lossage ("invalid operand for '%%%c'", code);
3229 return;
3230 }
3231 }
3232 break;
3233
3234 case 'p':
3235 {
3236 int n;
3237
3238 /* Print N such that 2^N == X. */
3239 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3240 {
3241 output_operand_lossage ("invalid operand for '%%%c'", code);
3242 return;
3243 }
3244
3245 asm_fprintf (f, "%d", n);
3246 }
3247 break;
3248
3249 case 'P':
3250 /* Print the number of non-zero bits in X (a const_int). */
3251 if (GET_CODE (x) != CONST_INT)
3252 {
3253 output_operand_lossage ("invalid operand for '%%%c'", code);
3254 return;
3255 }
3256
3257 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3258 break;
3259
3260 case 'H':
3261 /* Print the higher numbered register of a pair (TImode) of regs. */
3262 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3263 {
3264 output_operand_lossage ("invalid operand for '%%%c'", code);
3265 return;
3266 }
3267
3268 asm_fprintf (f, "%r", REGNO (x) + 1);
3269 break;
3270
3271 case 'Q':
3272 /* Print the least significant register of a pair (TImode) of regs. */
3273 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3274 {
3275 output_operand_lossage ("invalid operand for '%%%c'", code);
3276 return;
3277 }
3278 asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
3279 break;
3280
3281 case 'R':
3282 /* Print the most significant register of a pair (TImode) of regs. */
3283 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3284 {
3285 output_operand_lossage ("invalid operand for '%%%c'", code);
3286 return;
3287 }
3288 asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
3289 break;
3290
3291 case 'm':
3292 /* Print a condition (eq, ne, etc). */
3293
3294 /* CONST_TRUE_RTX means always -- that's the default. */
3295 if (x == const_true_rtx)
3296 return;
3297
3298 if (!COMPARISON_P (x))
3299 {
3300 output_operand_lossage ("invalid operand for '%%%c'", code);
3301 return;
3302 }
3303
3304 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3305 break;
3306
3307 case 'M':
3308 /* Print the inverse of a condition (eq <-> ne, etc). */
3309
3310 /* CONST_TRUE_RTX means never -- that's the default. */
3311 if (x == const_true_rtx)
3312 {
3313 fputs ("nv", f);
3314 return;
3315 }
3316
3317 if (!COMPARISON_P (x))
3318 {
3319 output_operand_lossage ("invalid operand for '%%%c'", code);
3320 return;
3321 }
3322
3323 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3324 (aarch64_get_condition_code (x))], f);
3325 break;
3326
3327 case 'b':
3328 case 'h':
3329 case 's':
3330 case 'd':
3331 case 'q':
3332 /* Print a scalar FP/SIMD register name. */
3333 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3334 {
3335 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3336 return;
3337 }
3338 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3339 break;
3340
3341 case 'S':
3342 case 'T':
3343 case 'U':
3344 case 'V':
3345 /* Print the first FP/SIMD register name in a list. */
3346 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3347 {
3348 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3349 return;
3350 }
3351 asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3352 REGNO (x) - V0_REGNUM + (code - 'S'));
3353 break;
3354
3355 case 'w':
3356 case 'x':
3357 /* Print a general register name or the zero register (32-bit or
3358 64-bit). */
3359 if (x == const0_rtx)
3360 {
3361 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3362 break;
3363 }
3364
3365 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3366 {
3367 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3368 REGNO (x) - R0_REGNUM);
3369 break;
3370 }
3371
3372 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3373 {
3374 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3375 break;
3376 }
3377
3378 /* Fall through */
3379
3380 case 0:
3381 /* Print a normal operand, if it's a general register, then we
3382 assume DImode. */
3383 if (x == NULL)
3384 {
3385 output_operand_lossage ("missing operand");
3386 return;
3387 }
3388
3389 switch (GET_CODE (x))
3390 {
3391 case REG:
3392 asm_fprintf (f, "%r", REGNO (x));
3393 break;
3394
3395 case MEM:
3396 aarch64_memory_reference_mode = GET_MODE (x);
3397 output_address (XEXP (x, 0));
3398 break;
3399
3400 case LABEL_REF:
3401 case SYMBOL_REF:
3402 output_addr_const (asm_out_file, x);
3403 break;
3404
3405 case CONST_INT:
3406 asm_fprintf (f, "%wd", INTVAL (x));
3407 break;
3408
3409 case CONST_VECTOR:
3410 gcc_assert (aarch64_const_vec_all_same_int_p (x, HOST_WIDE_INT_MIN,
3411 HOST_WIDE_INT_MAX));
3412 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3413 break;
3414
3415 default:
3416 output_operand_lossage ("invalid operand");
3417 return;
3418 }
3419 break;
3420
3421 case 'A':
3422 if (GET_CODE (x) == HIGH)
3423 x = XEXP (x, 0);
3424
3425 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3426 {
3427 case SYMBOL_SMALL_GOT:
3428 asm_fprintf (asm_out_file, ":got:");
3429 break;
3430
3431 case SYMBOL_SMALL_TLSGD:
3432 asm_fprintf (asm_out_file, ":tlsgd:");
3433 break;
3434
3435 case SYMBOL_SMALL_TLSDESC:
3436 asm_fprintf (asm_out_file, ":tlsdesc:");
3437 break;
3438
3439 case SYMBOL_SMALL_GOTTPREL:
3440 asm_fprintf (asm_out_file, ":gottprel:");
3441 break;
3442
3443 case SYMBOL_SMALL_TPREL:
3444 asm_fprintf (asm_out_file, ":tprel:");
3445 break;
3446
3447 default:
3448 break;
3449 }
3450 output_addr_const (asm_out_file, x);
3451 break;
3452
3453 case 'L':
3454 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3455 {
3456 case SYMBOL_SMALL_GOT:
3457 asm_fprintf (asm_out_file, ":lo12:");
3458 break;
3459
3460 case SYMBOL_SMALL_TLSGD:
3461 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3462 break;
3463
3464 case SYMBOL_SMALL_TLSDESC:
3465 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3466 break;
3467
3468 case SYMBOL_SMALL_GOTTPREL:
3469 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3470 break;
3471
3472 case SYMBOL_SMALL_TPREL:
3473 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3474 break;
3475
3476 default:
3477 break;
3478 }
3479 output_addr_const (asm_out_file, x);
3480 break;
3481
3482 case 'G':
3483
3484 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3485 {
3486 case SYMBOL_SMALL_TPREL:
3487 asm_fprintf (asm_out_file, ":tprel_hi12:");
3488 break;
3489 default:
3490 break;
3491 }
3492 output_addr_const (asm_out_file, x);
3493 break;
3494
3495 default:
3496 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3497 return;
3498 }
3499}
3500
3501void
3502aarch64_print_operand_address (FILE *f, rtx x)
3503{
3504 struct aarch64_address_info addr;
3505
3506 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3507 MEM, true))
3508 switch (addr.type)
3509 {
3510 case ADDRESS_REG_IMM:
3511 if (addr.offset == const0_rtx)
3512 asm_fprintf (f, "[%r]", REGNO (addr.base));
3513 else
3514 asm_fprintf (f, "[%r,%wd]", REGNO (addr.base),
3515 INTVAL (addr.offset));
3516 return;
3517
3518 case ADDRESS_REG_REG:
3519 if (addr.shift == 0)
3520 asm_fprintf (f, "[%r,%r]", REGNO (addr.base),
3521 REGNO (addr.offset));
3522 else
3523 asm_fprintf (f, "[%r,%r,lsl %u]", REGNO (addr.base),
3524 REGNO (addr.offset), addr.shift);
3525 return;
3526
3527 case ADDRESS_REG_UXTW:
3528 if (addr.shift == 0)
3529 asm_fprintf (f, "[%r,w%d,uxtw]", REGNO (addr.base),
3530 REGNO (addr.offset) - R0_REGNUM);
3531 else
3532 asm_fprintf (f, "[%r,w%d,uxtw %u]", REGNO (addr.base),
3533 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3534 return;
3535
3536 case ADDRESS_REG_SXTW:
3537 if (addr.shift == 0)
3538 asm_fprintf (f, "[%r,w%d,sxtw]", REGNO (addr.base),
3539 REGNO (addr.offset) - R0_REGNUM);
3540 else
3541 asm_fprintf (f, "[%r,w%d,sxtw %u]", REGNO (addr.base),
3542 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3543 return;
3544
3545 case ADDRESS_REG_WB:
3546 switch (GET_CODE (x))
3547 {
3548 case PRE_INC:
3549 asm_fprintf (f, "[%r,%d]!", REGNO (addr.base),
3550 GET_MODE_SIZE (aarch64_memory_reference_mode));
3551 return;
3552 case POST_INC:
3553 asm_fprintf (f, "[%r],%d", REGNO (addr.base),
3554 GET_MODE_SIZE (aarch64_memory_reference_mode));
3555 return;
3556 case PRE_DEC:
3557 asm_fprintf (f, "[%r,-%d]!", REGNO (addr.base),
3558 GET_MODE_SIZE (aarch64_memory_reference_mode));
3559 return;
3560 case POST_DEC:
3561 asm_fprintf (f, "[%r],-%d", REGNO (addr.base),
3562 GET_MODE_SIZE (aarch64_memory_reference_mode));
3563 return;
3564 case PRE_MODIFY:
3565 asm_fprintf (f, "[%r,%wd]!", REGNO (addr.base),
3566 INTVAL (addr.offset));
3567 return;
3568 case POST_MODIFY:
3569 asm_fprintf (f, "[%r],%wd", REGNO (addr.base),
3570 INTVAL (addr.offset));
3571 return;
3572 default:
3573 break;
3574 }
3575 break;
3576
3577 case ADDRESS_LO_SUM:
3578 asm_fprintf (f, "[%r,#:lo12:", REGNO (addr.base));
3579 output_addr_const (f, addr.offset);
3580 asm_fprintf (f, "]");
3581 return;
3582
3583 case ADDRESS_SYMBOLIC:
3584 break;
3585 }
3586
3587 output_addr_const (f, x);
3588}
3589
3590void
3591aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3592 int labelno ATTRIBUTE_UNUSED)
3593{
3594 sorry ("function profiling");
3595}
3596
3597bool
3598aarch64_label_mentioned_p (rtx x)
3599{
3600 const char *fmt;
3601 int i;
3602
3603 if (GET_CODE (x) == LABEL_REF)
3604 return true;
3605
3606 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3607 referencing instruction, but they are constant offsets, not
3608 symbols. */
3609 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3610 return false;
3611
3612 fmt = GET_RTX_FORMAT (GET_CODE (x));
3613 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3614 {
3615 if (fmt[i] == 'E')
3616 {
3617 int j;
3618
3619 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3620 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3621 return 1;
3622 }
3623 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3624 return 1;
3625 }
3626
3627 return 0;
3628}
3629
3630/* Implement REGNO_REG_CLASS. */
3631
3632enum reg_class
3633aarch64_regno_regclass (unsigned regno)
3634{
3635 if (GP_REGNUM_P (regno))
3636 return CORE_REGS;
3637
3638 if (regno == SP_REGNUM)
3639 return STACK_REG;
3640
3641 if (regno == FRAME_POINTER_REGNUM
3642 || regno == ARG_POINTER_REGNUM)
3643 return CORE_REGS;
3644
3645 if (FP_REGNUM_P (regno))
3646 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3647
3648 return NO_REGS;
3649}
3650
3651/* Try a machine-dependent way of reloading an illegitimate address
3652 operand. If we find one, push the reload and return the new rtx. */
3653
3654rtx
3655aarch64_legitimize_reload_address (rtx *x_p,
3656 enum machine_mode mode,
3657 int opnum, int type,
3658 int ind_levels ATTRIBUTE_UNUSED)
3659{
3660 rtx x = *x_p;
3661
3662 /* Do not allow mem (plus (reg, const)) if vector mode. */
3663 if (aarch64_vector_mode_p (mode)
3664 && GET_CODE (x) == PLUS
3665 && REG_P (XEXP (x, 0))
3666 && CONST_INT_P (XEXP (x, 1)))
3667 {
3668 rtx orig_rtx = x;
3669 x = copy_rtx (x);
3670 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3671 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3672 opnum, (enum reload_type) type);
3673 return x;
3674 }
3675
3676 /* We must recognize output that we have already generated ourselves. */
3677 if (GET_CODE (x) == PLUS
3678 && GET_CODE (XEXP (x, 0)) == PLUS
3679 && REG_P (XEXP (XEXP (x, 0), 0))
3680 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3681 && CONST_INT_P (XEXP (x, 1)))
3682 {
3683 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3684 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3685 opnum, (enum reload_type) type);
3686 return x;
3687 }
3688
3689 /* We wish to handle large displacements off a base register by splitting
3690 the addend across an add and the mem insn. This can cut the number of
3691 extra insns needed from 3 to 1. It is only useful for load/store of a
3692 single register with 12 bit offset field. */
3693 if (GET_CODE (x) == PLUS
3694 && REG_P (XEXP (x, 0))
3695 && CONST_INT_P (XEXP (x, 1))
3696 && HARD_REGISTER_P (XEXP (x, 0))
3697 && mode != TImode
3698 && mode != TFmode
3699 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3700 {
3701 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3702 HOST_WIDE_INT low = val & 0xfff;
3703 HOST_WIDE_INT high = val - low;
3704 HOST_WIDE_INT offs;
3705 rtx cst;
3706
3707 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3708 BLKmode alignment. */
3709 if (GET_MODE_SIZE (mode) == 0)
3710 return NULL_RTX;
3711
3712 offs = low % GET_MODE_SIZE (mode);
3713
3714 /* Align misaligned offset by adjusting high part to compensate. */
3715 if (offs != 0)
3716 {
3717 if (aarch64_uimm12_shift (high + offs))
3718 {
3719 /* Align down. */
3720 low = low - offs;
3721 high = high + offs;
3722 }
3723 else
3724 {
3725 /* Align up. */
3726 offs = GET_MODE_SIZE (mode) - offs;
3727 low = low + offs;
3728 high = high + (low & 0x1000) - offs;
3729 low &= 0xfff;
3730 }
3731 }
3732
3733 /* Check for overflow. */
3734 if (high + low != val)
3735 return NULL_RTX;
3736
3737 cst = GEN_INT (high);
3738 if (!aarch64_uimm12_shift (high))
3739 cst = force_const_mem (Pmode, cst);
3740
3741 /* Reload high part into base reg, leaving the low part
3742 in the mem instruction. */
3743 x = gen_rtx_PLUS (Pmode,
3744 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3745 GEN_INT (low));
3746
3747 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3748 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3749 opnum, (enum reload_type) type);
3750 return x;
3751 }
3752
3753 return NULL_RTX;
3754}
3755
3756
3757static reg_class_t
3758aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3759 reg_class_t rclass,
3760 enum machine_mode mode,
3761 secondary_reload_info *sri)
3762{
3763 /* Address expressions of the form PLUS (SP, large_offset) need two
3764 scratch registers, one for the constant, and one for holding a
3765 copy of SP, since SP cannot be used on the RHS of an add-reg
3766 instruction. */
3767 if (mode == DImode
3768 && GET_CODE (x) == PLUS
3769 && XEXP (x, 0) == stack_pointer_rtx
3770 && CONST_INT_P (XEXP (x, 1))
3771 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3772 {
3773 sri->icode = CODE_FOR_reload_sp_immediate;
3774 return NO_REGS;
3775 }
3776
3777 /* Without the TARGET_SIMD instructions we cannot move a Q register
3778 to a Q register directly. We need a scratch. */
3779 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3780 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3781 && reg_class_subset_p (rclass, FP_REGS))
3782 {
3783 if (mode == TFmode)
3784 sri->icode = CODE_FOR_aarch64_reload_movtf;
3785 else if (mode == TImode)
3786 sri->icode = CODE_FOR_aarch64_reload_movti;
3787 return NO_REGS;
3788 }
3789
3790 /* A TFmode or TImode memory access should be handled via an FP_REGS
3791 because AArch64 has richer addressing modes for LDR/STR instructions
3792 than LDP/STP instructions. */
3793 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3794 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3795 return FP_REGS;
3796
3797 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3798 return CORE_REGS;
3799
3800 return NO_REGS;
3801}
3802
3803static bool
3804aarch64_can_eliminate (const int from, const int to)
3805{
3806 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3807 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3808
3809 if (frame_pointer_needed)
3810 {
3811 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3812 return true;
3813 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3814 return false;
3815 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3816 && !cfun->calls_alloca)
3817 return true;
3818 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3819 return true;
3820 return false;
3821 }
3822 else
3823 {
3824 /* If we decided that we didn't need a frame pointer but then used
3825 LR in the function, then we do need a frame pointer after all, so
3826 prevent this elimination to ensure a frame pointer is used. */
3827
3828 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3829 && df_regs_ever_live_p (LR_REGNUM))
3830 return false;
3831 }
3832 return true;
3833}
3834
3835HOST_WIDE_INT
3836aarch64_initial_elimination_offset (unsigned from, unsigned to)
3837{
3838 HOST_WIDE_INT frame_size;
3839 HOST_WIDE_INT offset;
3840
3841 aarch64_layout_frame ();
3842 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3843 + crtl->outgoing_args_size
3844 + cfun->machine->saved_varargs_size);
3845
3846 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3847 offset = frame_size;
3848
3849 if (to == HARD_FRAME_POINTER_REGNUM)
3850 {
3851 if (from == ARG_POINTER_REGNUM)
3852 return offset - crtl->outgoing_args_size;
3853
3854 if (from == FRAME_POINTER_REGNUM)
3855 return cfun->machine->frame.saved_regs_size;
3856 }
3857
3858 if (to == STACK_POINTER_REGNUM)
3859 {
3860 if (from == FRAME_POINTER_REGNUM)
3861 {
3862 HOST_WIDE_INT elim = crtl->outgoing_args_size
3863 + cfun->machine->frame.saved_regs_size
3864 - cfun->machine->frame.fp_lr_offset;
3865 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3866 return elim;
3867 }
3868 }
3869
3870 return offset;
3871}
3872
3873
3874/* Implement RETURN_ADDR_RTX. We do not support moving back to a
3875 previous frame. */
3876
3877rtx
3878aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3879{
3880 if (count != 0)
3881 return const0_rtx;
3882 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3883}
3884
3885
3886static void
3887aarch64_asm_trampoline_template (FILE *f)
3888{
3889 asm_fprintf (f, "\tldr\t%r, .+16\n", IP1_REGNUM);
3890 asm_fprintf (f, "\tldr\t%r, .+20\n", STATIC_CHAIN_REGNUM);
3891 asm_fprintf (f, "\tbr\t%r\n", IP1_REGNUM);
3892 assemble_aligned_integer (4, const0_rtx);
3893 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3894 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3895}
3896
3897unsigned
3898aarch64_trampoline_size (void)
3899{
3900 return 32; /* 3 insns + padding + 2 dwords. */
3901}
3902
3903static void
3904aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3905{
3906 rtx fnaddr, mem, a_tramp;
3907
3908 /* Don't need to copy the trailing D-words, we fill those in below. */
3909 emit_block_move (m_tramp, assemble_trampoline_template (),
3910 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3911 mem = adjust_address (m_tramp, DImode, 16);
3912 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3913 emit_move_insn (mem, fnaddr);
3914
3915 mem = adjust_address (m_tramp, DImode, 24);
3916 emit_move_insn (mem, chain_value);
3917
3918 /* XXX We should really define a "clear_cache" pattern and use
3919 gen_clear_cache(). */
3920 a_tramp = XEXP (m_tramp, 0);
3921 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3922 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3923 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3924}
3925
3926static unsigned char
3927aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3928{
3929 switch (regclass)
3930 {
3931 case CORE_REGS:
3932 case POINTER_REGS:
3933 case GENERAL_REGS:
3934 case ALL_REGS:
3935 case FP_REGS:
3936 case FP_LO_REGS:
3937 return
3938 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
3939 (GET_MODE_SIZE (mode) + 7) / 8;
3940 case STACK_REG:
3941 return 1;
3942
3943 case NO_REGS:
3944 return 0;
3945
3946 default:
3947 break;
3948 }
3949 gcc_unreachable ();
3950}
3951
3952static reg_class_t
3953aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
3954{
3955 return ((regclass == POINTER_REGS || regclass == STACK_REG)
3956 ? GENERAL_REGS : regclass);
3957}
3958
3959void
3960aarch64_asm_output_labelref (FILE* f, const char *name)
3961{
3962 asm_fprintf (f, "%U%s", name);
3963}
3964
3965static void
3966aarch64_elf_asm_constructor (rtx symbol, int priority)
3967{
3968 if (priority == DEFAULT_INIT_PRIORITY)
3969 default_ctor_section_asm_out_constructor (symbol, priority);
3970 else
3971 {
3972 section *s;
3973 char buf[18];
3974 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
3975 s = get_section (buf, SECTION_WRITE, NULL);
3976 switch_to_section (s);
3977 assemble_align (POINTER_SIZE);
3978 fputs ("\t.dword\t", asm_out_file);
3979 output_addr_const (asm_out_file, symbol);
3980 fputc ('\n', asm_out_file);
3981 }
3982}
3983
3984static void
3985aarch64_elf_asm_destructor (rtx symbol, int priority)
3986{
3987 if (priority == DEFAULT_INIT_PRIORITY)
3988 default_dtor_section_asm_out_destructor (symbol, priority);
3989 else
3990 {
3991 section *s;
3992 char buf[18];
3993 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
3994 s = get_section (buf, SECTION_WRITE, NULL);
3995 switch_to_section (s);
3996 assemble_align (POINTER_SIZE);
3997 fputs ("\t.dword\t", asm_out_file);
3998 output_addr_const (asm_out_file, symbol);
3999 fputc ('\n', asm_out_file);
4000 }
4001}
4002
4003const char*
4004aarch64_output_casesi (rtx *operands)
4005{
4006 char buf[100];
4007 char label[100];
4008 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4009 int index;
4010 static const char *const patterns[4][2] =
4011 {
4012 {
4013 "ldrb\t%w3, [%0,%w1,uxtw]",
4014 "add\t%3, %4, %w3, sxtb #2"
4015 },
4016 {
4017 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4018 "add\t%3, %4, %w3, sxth #2"
4019 },
4020 {
4021 "ldr\t%w3, [%0,%w1,uxtw #2]",
4022 "add\t%3, %4, %w3, sxtw #2"
4023 },
4024 /* We assume that DImode is only generated when not optimizing and
4025 that we don't really need 64-bit address offsets. That would
4026 imply an object file with 8GB of code in a single function! */
4027 {
4028 "ldr\t%w3, [%0,%w1,uxtw #2]",
4029 "add\t%3, %4, %w3, sxtw #2"
4030 }
4031 };
4032
4033 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4034
4035 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4036
4037 gcc_assert (index >= 0 && index <= 3);
4038
4039 /* Need to implement table size reduction, by chaning the code below. */
4040 output_asm_insn (patterns[index][0], operands);
4041 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4042 snprintf (buf, sizeof (buf),
4043 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4044 output_asm_insn (buf, operands);
4045 output_asm_insn (patterns[index][1], operands);
4046 output_asm_insn ("br\t%3", operands);
4047 assemble_label (asm_out_file, label);
4048 return "";
4049}
4050
4051
4052/* Return size in bits of an arithmetic operand which is shifted/scaled and
4053 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4054 operator. */
4055
4056int
4057aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4058{
4059 if (shift >= 0 && shift <= 3)
4060 {
4061 int size;
4062 for (size = 8; size <= 32; size *= 2)
4063 {
4064 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4065 if (mask == bits << shift)
4066 return size;
4067 }
4068 }
4069 return 0;
4070}
4071
4072static bool
4073aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4074 const_rtx x ATTRIBUTE_UNUSED)
4075{
4076 /* We can't use blocks for constants when we're using a per-function
4077 constant pool. */
4078 return false;
4079}
4080
4081static section *
4082aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4083 rtx x ATTRIBUTE_UNUSED,
4084 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4085{
4086 /* Force all constant pool entries into the current function section. */
4087 return function_section (current_function_decl);
4088}
4089
4090
4091/* Costs. */
4092
4093/* Helper function for rtx cost calculation. Strip a shift expression
4094 from X. Returns the inner operand if successful, or the original
4095 expression on failure. */
4096static rtx
4097aarch64_strip_shift (rtx x)
4098{
4099 rtx op = x;
4100
4101 if ((GET_CODE (op) == ASHIFT
4102 || GET_CODE (op) == ASHIFTRT
4103 || GET_CODE (op) == LSHIFTRT)
4104 && CONST_INT_P (XEXP (op, 1)))
4105 return XEXP (op, 0);
4106
4107 if (GET_CODE (op) == MULT
4108 && CONST_INT_P (XEXP (op, 1))
4109 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4110 return XEXP (op, 0);
4111
4112 return x;
4113}
4114
4115/* Helper function for rtx cost calculation. Strip a shift or extend
4116 expression from X. Returns the inner operand if successful, or the
4117 original expression on failure. We deal with a number of possible
4118 canonicalization variations here. */
4119static rtx
4120aarch64_strip_shift_or_extend (rtx x)
4121{
4122 rtx op = x;
4123
4124 /* Zero and sign extraction of a widened value. */
4125 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4126 && XEXP (op, 2) == const0_rtx
4127 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4128 XEXP (op, 1)))
4129 return XEXP (XEXP (op, 0), 0);
4130
4131 /* It can also be represented (for zero-extend) as an AND with an
4132 immediate. */
4133 if (GET_CODE (op) == AND
4134 && GET_CODE (XEXP (op, 0)) == MULT
4135 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4136 && CONST_INT_P (XEXP (op, 1))
4137 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4138 INTVAL (XEXP (op, 1))) != 0)
4139 return XEXP (XEXP (op, 0), 0);
4140
4141 /* Now handle extended register, as this may also have an optional
4142 left shift by 1..4. */
4143 if (GET_CODE (op) == ASHIFT
4144 && CONST_INT_P (XEXP (op, 1))
4145 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4146 op = XEXP (op, 0);
4147
4148 if (GET_CODE (op) == ZERO_EXTEND
4149 || GET_CODE (op) == SIGN_EXTEND)
4150 op = XEXP (op, 0);
4151
4152 if (op != x)
4153 return op;
4154
4155 return aarch64_strip_shift (x);
4156}
4157
4158/* Calculate the cost of calculating X, storing it in *COST. Result
4159 is true if the total cost of the operation has now been calculated. */
4160static bool
4161aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4162 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4163{
4164 rtx op0, op1;
4165 const struct cpu_rtx_cost_table *extra_cost
4166 = aarch64_tune_params->insn_extra_cost;
4167
4168 switch (code)
4169 {
4170 case SET:
4171 op0 = SET_DEST (x);
4172 op1 = SET_SRC (x);
4173
4174 switch (GET_CODE (op0))
4175 {
4176 case MEM:
4177 if (speed)
4178 *cost += extra_cost->memory_store;
4179
4180 if (op1 != const0_rtx)
4181 *cost += rtx_cost (op1, SET, 1, speed);
4182 return true;
4183
4184 case SUBREG:
4185 if (! REG_P (SUBREG_REG (op0)))
4186 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4187 /* Fall through. */
4188 case REG:
4189 /* Cost is just the cost of the RHS of the set. */
4190 *cost += rtx_cost (op1, SET, 1, true);
4191 return true;
4192
4193 case ZERO_EXTRACT: /* Bit-field insertion. */
4194 case SIGN_EXTRACT:
4195 /* Strip any redundant widening of the RHS to meet the width of
4196 the target. */
4197 if (GET_CODE (op1) == SUBREG)
4198 op1 = SUBREG_REG (op1);
4199 if ((GET_CODE (op1) == ZERO_EXTEND
4200 || GET_CODE (op1) == SIGN_EXTEND)
4201 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4202 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4203 >= INTVAL (XEXP (op0, 1))))
4204 op1 = XEXP (op1, 0);
4205 *cost += rtx_cost (op1, SET, 1, speed);
4206 return true;
4207
4208 default:
4209 break;
4210 }
4211 return false;
4212
4213 case MEM:
4214 if (speed)
4215 *cost += extra_cost->memory_load;
4216
4217 return true;
4218
4219 case NEG:
4220 op0 = CONST0_RTX (GET_MODE (x));
4221 op1 = XEXP (x, 0);
4222 goto cost_minus;
4223
4224 case COMPARE:
4225 op0 = XEXP (x, 0);
4226 op1 = XEXP (x, 1);
4227
4228 if (op1 == const0_rtx
4229 && GET_CODE (op0) == AND)
4230 {
4231 x = op0;
4232 goto cost_logic;
4233 }
4234
4235 /* Comparisons can work if the order is swapped.
4236 Canonicalization puts the more complex operation first, but
4237 we want it in op1. */
4238 if (! (REG_P (op0)
4239 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4240 {
4241 op0 = XEXP (x, 1);
4242 op1 = XEXP (x, 0);
4243 }
4244 goto cost_minus;
4245
4246 case MINUS:
4247 op0 = XEXP (x, 0);
4248 op1 = XEXP (x, 1);
4249
4250 cost_minus:
4251 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4252 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4253 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4254 {
4255 if (op0 != const0_rtx)
4256 *cost += rtx_cost (op0, MINUS, 0, speed);
4257
4258 if (CONST_INT_P (op1))
4259 {
4260 if (!aarch64_uimm12_shift (INTVAL (op1)))
4261 *cost += rtx_cost (op1, MINUS, 1, speed);
4262 }
4263 else
4264 {
4265 op1 = aarch64_strip_shift_or_extend (op1);
4266 *cost += rtx_cost (op1, MINUS, 1, speed);
4267 }
4268 return true;
4269 }
4270
4271 return false;
4272
4273 case PLUS:
4274 op0 = XEXP (x, 0);
4275 op1 = XEXP (x, 1);
4276
4277 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4278 {
4279 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4280 {
4281 *cost += rtx_cost (op0, PLUS, 0, speed);
4282 }
4283 else
4284 {
4285 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4286
4287 if (new_op0 == op0
4288 && GET_CODE (op0) == MULT)
4289 {
4290 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4291 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4292 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4293 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4294 {
4295 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4296 speed)
4297 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4298 speed)
4299 + rtx_cost (op1, PLUS, 1, speed));
4300 if (speed)
4301 *cost += extra_cost->int_multiply_extend_add;
4302 return true;
4303 }
4304 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4305 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4306 + rtx_cost (op1, PLUS, 1, speed));
4307
4308 if (speed)
4309 *cost += extra_cost->int_multiply_add;
4310 }
4311
4312 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4313 + rtx_cost (op1, PLUS, 1, speed));
4314 }
4315 return true;
4316 }
4317
4318 return false;
4319
4320 case IOR:
4321 case XOR:
4322 case AND:
4323 cost_logic:
4324 op0 = XEXP (x, 0);
4325 op1 = XEXP (x, 1);
4326
4327 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4328 {
4329 if (CONST_INT_P (op1)
4330 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4331 {
4332 *cost += rtx_cost (op0, AND, 0, speed);
4333 }
4334 else
4335 {
4336 if (GET_CODE (op0) == NOT)
4337 op0 = XEXP (op0, 0);
4338 op0 = aarch64_strip_shift (op0);
4339 *cost += (rtx_cost (op0, AND, 0, speed)
4340 + rtx_cost (op1, AND, 1, speed));
4341 }
4342 return true;
4343 }
4344 return false;
4345
4346 case ZERO_EXTEND:
4347 if ((GET_MODE (x) == DImode
4348 && GET_MODE (XEXP (x, 0)) == SImode)
4349 || GET_CODE (XEXP (x, 0)) == MEM)
4350 {
4351 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4352 return true;
4353 }
4354 return false;
4355
4356 case SIGN_EXTEND:
4357 if (GET_CODE (XEXP (x, 0)) == MEM)
4358 {
4359 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4360 return true;
4361 }
4362 return false;
4363
4364 case ROTATE:
4365 if (!CONST_INT_P (XEXP (x, 1)))
4366 *cost += COSTS_N_INSNS (2);
4367 /* Fall through. */
4368 case ROTATERT:
4369 case LSHIFTRT:
4370 case ASHIFT:
4371 case ASHIFTRT:
4372
4373 /* Shifting by a register often takes an extra cycle. */
4374 if (speed && !CONST_INT_P (XEXP (x, 1)))
4375 *cost += extra_cost->register_shift;
4376
4377 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4378 return true;
4379
4380 case HIGH:
4381 if (!CONSTANT_P (XEXP (x, 0)))
4382 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4383 return true;
4384
4385 case LO_SUM:
4386 if (!CONSTANT_P (XEXP (x, 1)))
4387 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4388 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4389 return true;
4390
4391 case ZERO_EXTRACT:
4392 case SIGN_EXTRACT:
4393 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4394 return true;
4395
4396 case MULT:
4397 op0 = XEXP (x, 0);
4398 op1 = XEXP (x, 1);
4399
4400 *cost = COSTS_N_INSNS (1);
4401 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4402 {
4403 if (CONST_INT_P (op1)
4404 && exact_log2 (INTVAL (op1)) > 0)
4405 {
4406 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4407 return true;
4408 }
4409
4410 if ((GET_CODE (op0) == ZERO_EXTEND
4411 && GET_CODE (op1) == ZERO_EXTEND)
4412 || (GET_CODE (op0) == SIGN_EXTEND
4413 && GET_CODE (op1) == SIGN_EXTEND))
4414 {
4415 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4416 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4417 if (speed)
4418 *cost += extra_cost->int_multiply_extend;
4419 return true;
4420 }
4421
4422 if (speed)
4423 *cost += extra_cost->int_multiply;
4424 }
4425 else if (speed)
4426 {
4427 if (GET_MODE (x) == DFmode)
4428 *cost += extra_cost->double_multiply;
4429 else if (GET_MODE (x) == SFmode)
4430 *cost += extra_cost->float_multiply;
4431 }
4432
4433 return false; /* All arguments need to be in registers. */
4434
4435 case MOD:
4436 case UMOD:
4437 *cost = COSTS_N_INSNS (2);
4438 if (speed)
4439 {
4440 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4441 *cost += (extra_cost->int_multiply_add
4442 + extra_cost->int_divide);
4443 else if (GET_MODE (x) == DFmode)
4444 *cost += (extra_cost->double_multiply
4445 + extra_cost->double_divide);
4446 else if (GET_MODE (x) == SFmode)
4447 *cost += (extra_cost->float_multiply
4448 + extra_cost->float_divide);
4449 }
4450 return false; /* All arguments need to be in registers. */
4451
4452 case DIV:
4453 case UDIV:
4454 *cost = COSTS_N_INSNS (1);
4455 if (speed)
4456 {
4457 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4458 *cost += extra_cost->int_divide;
4459 else if (GET_MODE (x) == DFmode)
4460 *cost += extra_cost->double_divide;
4461 else if (GET_MODE (x) == SFmode)
4462 *cost += extra_cost->float_divide;
4463 }
4464 return false; /* All arguments need to be in registers. */
4465
4466 default:
4467 break;
4468 }
4469 return false;
4470}
4471
4472static int
4473aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4474 enum machine_mode mode ATTRIBUTE_UNUSED,
4475 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4476{
4477 enum rtx_code c = GET_CODE (x);
4478 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4479
4480 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4481 return addr_cost->pre_modify;
4482
4483 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4484 return addr_cost->post_modify;
4485
4486 if (c == PLUS)
4487 {
4488 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4489 return addr_cost->imm_offset;
4490 else if (GET_CODE (XEXP (x, 0)) == MULT
4491 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4492 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4493 return addr_cost->register_extend;
4494
4495 return addr_cost->register_offset;
4496 }
4497 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4498 return addr_cost->imm_offset;
4499
4500 return 0;
4501}
4502
4503static int
4504aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4505 reg_class_t from, reg_class_t to)
4506{
4507 const struct cpu_regmove_cost *regmove_cost
4508 = aarch64_tune_params->regmove_cost;
4509
4510 if (from == GENERAL_REGS && to == GENERAL_REGS)
4511 return regmove_cost->GP2GP;
4512 else if (from == GENERAL_REGS)
4513 return regmove_cost->GP2FP;
4514 else if (to == GENERAL_REGS)
4515 return regmove_cost->FP2GP;
4516
4517 /* When AdvSIMD instructions are disabled it is not possible to move
4518 a 128-bit value directly between Q registers. This is handled in
4519 secondary reload. A general register is used as a scratch to move
4520 the upper DI value and the lower DI value is moved directly,
4521 hence the cost is the sum of three moves. */
4522
4523 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4524 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4525
4526 return regmove_cost->FP2FP;
4527}
4528
4529static int
4530aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4531 reg_class_t rclass ATTRIBUTE_UNUSED,
4532 bool in ATTRIBUTE_UNUSED)
4533{
4534 return aarch64_tune_params->memmov_cost;
4535}
4536
4537static void initialize_aarch64_code_model (void);
4538
4539/* Parse the architecture extension string. */
4540
4541static void
4542aarch64_parse_extension (char *str)
4543{
4544 /* The extension string is parsed left to right. */
4545 const struct aarch64_option_extension *opt = NULL;
4546
4547 /* Flag to say whether we are adding or removing an extension. */
4548 int adding_ext = -1;
4549
4550 while (str != NULL && *str != 0)
4551 {
4552 char *ext;
4553 size_t len;
4554
4555 str++;
4556 ext = strchr (str, '+');
4557
4558 if (ext != NULL)
4559 len = ext - str;
4560 else
4561 len = strlen (str);
4562
4563 if (len >= 2 && strncmp (str, "no", 2) == 0)
4564 {
4565 adding_ext = 0;
4566 len -= 2;
4567 str += 2;
4568 }
4569 else if (len > 0)
4570 adding_ext = 1;
4571
4572 if (len == 0)
4573 {
4574 error ("missing feature modifier after %qs", "+no");
4575 return;
4576 }
4577
4578 /* Scan over the extensions table trying to find an exact match. */
4579 for (opt = all_extensions; opt->name != NULL; opt++)
4580 {
4581 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4582 {
4583 /* Add or remove the extension. */
4584 if (adding_ext)
4585 aarch64_isa_flags |= opt->flags_on;
4586 else
4587 aarch64_isa_flags &= ~(opt->flags_off);
4588 break;
4589 }
4590 }
4591
4592 if (opt->name == NULL)
4593 {
4594 /* Extension not found in list. */
4595 error ("unknown feature modifier %qs", str);
4596 return;
4597 }
4598
4599 str = ext;
4600 };
4601
4602 return;
4603}
4604
4605/* Parse the ARCH string. */
4606
4607static void
4608aarch64_parse_arch (void)
4609{
4610 char *ext;
4611 const struct processor *arch;
4612 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4613 size_t len;
4614
4615 strcpy (str, aarch64_arch_string);
4616
4617 ext = strchr (str, '+');
4618
4619 if (ext != NULL)
4620 len = ext - str;
4621 else
4622 len = strlen (str);
4623
4624 if (len == 0)
4625 {
4626 error ("missing arch name in -march=%qs", str);
4627 return;
4628 }
4629
4630 /* Loop through the list of supported ARCHs to find a match. */
4631 for (arch = all_architectures; arch->name != NULL; arch++)
4632 {
4633 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4634 {
4635 selected_arch = arch;
4636 aarch64_isa_flags = selected_arch->flags;
4637 selected_cpu = &all_cores[selected_arch->core];
4638
4639 if (ext != NULL)
4640 {
4641 /* ARCH string contains at least one extension. */
4642 aarch64_parse_extension (ext);
4643 }
4644
4645 return;
4646 }
4647 }
4648
4649 /* ARCH name not found in list. */
4650 error ("unknown value %qs for -march", str);
4651 return;
4652}
4653
4654/* Parse the CPU string. */
4655
4656static void
4657aarch64_parse_cpu (void)
4658{
4659 char *ext;
4660 const struct processor *cpu;
4661 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4662 size_t len;
4663
4664 strcpy (str, aarch64_cpu_string);
4665
4666 ext = strchr (str, '+');
4667
4668 if (ext != NULL)
4669 len = ext - str;
4670 else
4671 len = strlen (str);
4672
4673 if (len == 0)
4674 {
4675 error ("missing cpu name in -mcpu=%qs", str);
4676 return;
4677 }
4678
4679 /* Loop through the list of supported CPUs to find a match. */
4680 for (cpu = all_cores; cpu->name != NULL; cpu++)
4681 {
4682 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4683 {
4684 selected_cpu = cpu;
4685 aarch64_isa_flags = selected_cpu->flags;
4686
4687 if (ext != NULL)
4688 {
4689 /* CPU string contains at least one extension. */
4690 aarch64_parse_extension (ext);
4691 }
4692
4693 return;
4694 }
4695 }
4696
4697 /* CPU name not found in list. */
4698 error ("unknown value %qs for -mcpu", str);
4699 return;
4700}
4701
4702/* Parse the TUNE string. */
4703
4704static void
4705aarch64_parse_tune (void)
4706{
4707 const struct processor *cpu;
4708 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4709 strcpy (str, aarch64_tune_string);
4710
4711 /* Loop through the list of supported CPUs to find a match. */
4712 for (cpu = all_cores; cpu->name != NULL; cpu++)
4713 {
4714 if (strcmp (cpu->name, str) == 0)
4715 {
4716 selected_tune = cpu;
4717 return;
4718 }
4719 }
4720
4721 /* CPU name not found in list. */
4722 error ("unknown value %qs for -mtune", str);
4723 return;
4724}
4725
4726
4727/* Implement TARGET_OPTION_OVERRIDE. */
4728
4729static void
4730aarch64_override_options (void)
4731{
4732 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4733 otherwise march remains undefined. mtune can be used with either march or
4734 mcpu. */
4735
4736 if (aarch64_arch_string)
4737 {
4738 aarch64_parse_arch ();
4739 aarch64_cpu_string = NULL;
4740 }
4741
4742 if (aarch64_cpu_string)
4743 {
4744 aarch64_parse_cpu ();
4745 selected_arch = NULL;
4746 }
4747
4748 if (aarch64_tune_string)
4749 {
4750 aarch64_parse_tune ();
4751 }
4752
4753 initialize_aarch64_code_model ();
4754
4755 aarch64_build_bitmask_table ();
4756
4757 /* This target defaults to strict volatile bitfields. */
4758 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4759 flag_strict_volatile_bitfields = 1;
4760
4761 /* If the user did not specify a processor, choose the default
4762 one for them. This will be the CPU set during configuration using
4763 --with-cpu, otherwise it is "generic". */
4764 if (!selected_cpu)
4765 {
4766 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4767 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4768 }
4769
4770 gcc_assert (selected_cpu);
4771
4772 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4773 if (!selected_tune)
4774 selected_tune = &all_cores[selected_cpu->core];
4775
4776 aarch64_tune_flags = selected_tune->flags;
4777 aarch64_tune = selected_tune->core;
4778 aarch64_tune_params = selected_tune->tune;
4779
4780 aarch64_override_options_after_change ();
4781}
4782
4783/* Implement targetm.override_options_after_change. */
4784
4785static void
4786aarch64_override_options_after_change (void)
4787{
4788 faked_omit_frame_pointer = false;
4789
4790 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4791 that aarch64_frame_pointer_required will be called. We need to remember
4792 whether flag_omit_frame_pointer was turned on normally or just faked. */
4793
4794 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4795 {
4796 flag_omit_frame_pointer = true;
4797 faked_omit_frame_pointer = true;
4798 }
4799}
4800
4801static struct machine_function *
4802aarch64_init_machine_status (void)
4803{
4804 struct machine_function *machine;
4805 machine = ggc_alloc_cleared_machine_function ();
4806 return machine;
4807}
4808
4809void
4810aarch64_init_expanders (void)
4811{
4812 init_machine_status = aarch64_init_machine_status;
4813}
4814
4815/* A checking mechanism for the implementation of the various code models. */
4816static void
4817initialize_aarch64_code_model (void)
4818{
4819 if (flag_pic)
4820 {
4821 switch (aarch64_cmodel_var)
4822 {
4823 case AARCH64_CMODEL_TINY:
4824 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4825 break;
4826 case AARCH64_CMODEL_SMALL:
4827 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4828 break;
4829 case AARCH64_CMODEL_LARGE:
4830 sorry ("code model %qs with -f%s", "large",
4831 flag_pic > 1 ? "PIC" : "pic");
4832 default:
4833 gcc_unreachable ();
4834 }
4835 }
4836 else
4837 aarch64_cmodel = aarch64_cmodel_var;
4838}
4839
4840/* Return true if SYMBOL_REF X binds locally. */
4841
4842static bool
4843aarch64_symbol_binds_local_p (const_rtx x)
4844{
4845 return (SYMBOL_REF_DECL (x)
4846 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4847 : SYMBOL_REF_LOCAL_P (x));
4848}
4849
4850/* Return true if SYMBOL_REF X is thread local */
4851static bool
4852aarch64_tls_symbol_p (rtx x)
4853{
4854 if (! TARGET_HAVE_TLS)
4855 return false;
4856
4857 if (GET_CODE (x) != SYMBOL_REF)
4858 return false;
4859
4860 return SYMBOL_REF_TLS_MODEL (x) != 0;
4861}
4862
4863/* Classify a TLS symbol into one of the TLS kinds. */
4864enum aarch64_symbol_type
4865aarch64_classify_tls_symbol (rtx x)
4866{
4867 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4868
4869 switch (tls_kind)
4870 {
4871 case TLS_MODEL_GLOBAL_DYNAMIC:
4872 case TLS_MODEL_LOCAL_DYNAMIC:
4873 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4874
4875 case TLS_MODEL_INITIAL_EXEC:
4876 return SYMBOL_SMALL_GOTTPREL;
4877
4878 case TLS_MODEL_LOCAL_EXEC:
4879 return SYMBOL_SMALL_TPREL;
4880
4881 case TLS_MODEL_EMULATED:
4882 case TLS_MODEL_NONE:
4883 return SYMBOL_FORCE_TO_MEM;
4884
4885 default:
4886 gcc_unreachable ();
4887 }
4888}
4889
4890/* Return the method that should be used to access SYMBOL_REF or
4891 LABEL_REF X in context CONTEXT. */
4892enum aarch64_symbol_type
4893aarch64_classify_symbol (rtx x,
4894 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4895{
4896 if (GET_CODE (x) == LABEL_REF)
4897 {
4898 switch (aarch64_cmodel)
4899 {
4900 case AARCH64_CMODEL_LARGE:
4901 return SYMBOL_FORCE_TO_MEM;
4902
4903 case AARCH64_CMODEL_TINY_PIC:
4904 case AARCH64_CMODEL_TINY:
4905 case AARCH64_CMODEL_SMALL_PIC:
4906 case AARCH64_CMODEL_SMALL:
4907 return SYMBOL_SMALL_ABSOLUTE;
4908
4909 default:
4910 gcc_unreachable ();
4911 }
4912 }
4913
4914 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4915
4916 switch (aarch64_cmodel)
4917 {
4918 case AARCH64_CMODEL_LARGE:
4919 return SYMBOL_FORCE_TO_MEM;
4920
4921 case AARCH64_CMODEL_TINY:
4922 case AARCH64_CMODEL_SMALL:
4923
4924 /* This is needed to get DFmode, TImode constants to be loaded off
4925 the constant pool. Is it necessary to dump TImode values into
4926 the constant pool. We don't handle TImode constant loads properly
4927 yet and hence need to use the constant pool. */
4928 if (CONSTANT_POOL_ADDRESS_P (x))
4929 return SYMBOL_FORCE_TO_MEM;
4930
4931 if (aarch64_tls_symbol_p (x))
4932 return aarch64_classify_tls_symbol (x);
4933
4934 if (SYMBOL_REF_WEAK (x))
4935 return SYMBOL_FORCE_TO_MEM;
4936
4937 return SYMBOL_SMALL_ABSOLUTE;
4938
4939 case AARCH64_CMODEL_TINY_PIC:
4940 case AARCH64_CMODEL_SMALL_PIC:
4941
4942 if (CONSTANT_POOL_ADDRESS_P (x))
4943 return SYMBOL_FORCE_TO_MEM;
4944
4945 if (aarch64_tls_symbol_p (x))
4946 return aarch64_classify_tls_symbol (x);
4947
4948 if (!aarch64_symbol_binds_local_p (x))
4949 return SYMBOL_SMALL_GOT;
4950
4951 return SYMBOL_SMALL_ABSOLUTE;
4952
4953 default:
4954 gcc_unreachable ();
4955 }
4956 /* By default push everything into the constant pool. */
4957 return SYMBOL_FORCE_TO_MEM;
4958}
4959
4960/* Return true if X is a symbolic constant that can be used in context
4961 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
4962
4963bool
4964aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
4965 enum aarch64_symbol_type *symbol_type)
4966{
4967 rtx offset;
4968 split_const (x, &x, &offset);
4969 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
4970 *symbol_type = aarch64_classify_symbol (x, context);
4971 else
4972 return false;
4973
4974 /* No checking of offset at this point. */
4975 return true;
4976}
4977
4978bool
4979aarch64_constant_address_p (rtx x)
4980{
4981 return (CONSTANT_P (x) && memory_address_p (DImode, x));
4982}
4983
4984bool
4985aarch64_legitimate_pic_operand_p (rtx x)
4986{
4987 if (GET_CODE (x) == SYMBOL_REF
4988 || (GET_CODE (x) == CONST
4989 && GET_CODE (XEXP (x, 0)) == PLUS
4990 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4991 return false;
4992
4993 return true;
4994}
4995
4996static bool
4997aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
4998{
4999 /* Do not allow vector struct mode constants. We could support
5000 0 and -1 easily, but they need support in aarch64-simd.md. */
5001 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5002 return false;
5003
5004 /* This could probably go away because
5005 we now decompose CONST_INTs according to expand_mov_immediate. */
5006 if ((GET_CODE (x) == CONST_VECTOR
5007 && aarch64_simd_valid_immediate (x, mode, false,
5008 NULL, NULL, NULL, NULL, NULL) != -1)
5009 || CONST_INT_P (x))
5010 return !targetm.cannot_force_const_mem (mode, x);
5011
5012 if (GET_CODE (x) == HIGH
5013 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5014 return true;
5015
5016 return aarch64_constant_address_p (x);
5017}
5018
a5bc806c 5019rtx
43e9d192
IB
5020aarch64_load_tp (rtx target)
5021{
5022 if (!target
5023 || GET_MODE (target) != Pmode
5024 || !register_operand (target, Pmode))
5025 target = gen_reg_rtx (Pmode);
5026
5027 /* Can return in any reg. */
5028 emit_insn (gen_aarch64_load_tp_hard (target));
5029 return target;
5030}
5031
43e9d192
IB
5032/* On AAPCS systems, this is the "struct __va_list". */
5033static GTY(()) tree va_list_type;
5034
5035/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5036 Return the type to use as __builtin_va_list.
5037
5038 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5039
5040 struct __va_list
5041 {
5042 void *__stack;
5043 void *__gr_top;
5044 void *__vr_top;
5045 int __gr_offs;
5046 int __vr_offs;
5047 }; */
5048
5049static tree
5050aarch64_build_builtin_va_list (void)
5051{
5052 tree va_list_name;
5053 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5054
5055 /* Create the type. */
5056 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5057 /* Give it the required name. */
5058 va_list_name = build_decl (BUILTINS_LOCATION,
5059 TYPE_DECL,
5060 get_identifier ("__va_list"),
5061 va_list_type);
5062 DECL_ARTIFICIAL (va_list_name) = 1;
5063 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5064 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5065
5066 /* Create the fields. */
5067 f_stack = build_decl (BUILTINS_LOCATION,
5068 FIELD_DECL, get_identifier ("__stack"),
5069 ptr_type_node);
5070 f_grtop = build_decl (BUILTINS_LOCATION,
5071 FIELD_DECL, get_identifier ("__gr_top"),
5072 ptr_type_node);
5073 f_vrtop = build_decl (BUILTINS_LOCATION,
5074 FIELD_DECL, get_identifier ("__vr_top"),
5075 ptr_type_node);
5076 f_groff = build_decl (BUILTINS_LOCATION,
5077 FIELD_DECL, get_identifier ("__gr_offs"),
5078 integer_type_node);
5079 f_vroff = build_decl (BUILTINS_LOCATION,
5080 FIELD_DECL, get_identifier ("__vr_offs"),
5081 integer_type_node);
5082
5083 DECL_ARTIFICIAL (f_stack) = 1;
5084 DECL_ARTIFICIAL (f_grtop) = 1;
5085 DECL_ARTIFICIAL (f_vrtop) = 1;
5086 DECL_ARTIFICIAL (f_groff) = 1;
5087 DECL_ARTIFICIAL (f_vroff) = 1;
5088
5089 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5090 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5091 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5092 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5093 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5094
5095 TYPE_FIELDS (va_list_type) = f_stack;
5096 DECL_CHAIN (f_stack) = f_grtop;
5097 DECL_CHAIN (f_grtop) = f_vrtop;
5098 DECL_CHAIN (f_vrtop) = f_groff;
5099 DECL_CHAIN (f_groff) = f_vroff;
5100
5101 /* Compute its layout. */
5102 layout_type (va_list_type);
5103
5104 return va_list_type;
5105}
5106
5107/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5108static void
5109aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5110{
5111 const CUMULATIVE_ARGS *cum;
5112 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5113 tree stack, grtop, vrtop, groff, vroff;
5114 tree t;
5115 int gr_save_area_size;
5116 int vr_save_area_size;
5117 int vr_offset;
5118
5119 cum = &crtl->args.info;
5120 gr_save_area_size
5121 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5122 vr_save_area_size
5123 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5124
5125 if (TARGET_GENERAL_REGS_ONLY)
5126 {
5127 if (cum->aapcs_nvrn > 0)
5128 sorry ("%qs and floating point or vector arguments",
5129 "-mgeneral-regs-only");
5130 vr_save_area_size = 0;
5131 }
5132
5133 f_stack = TYPE_FIELDS (va_list_type_node);
5134 f_grtop = DECL_CHAIN (f_stack);
5135 f_vrtop = DECL_CHAIN (f_grtop);
5136 f_groff = DECL_CHAIN (f_vrtop);
5137 f_vroff = DECL_CHAIN (f_groff);
5138
5139 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5140 NULL_TREE);
5141 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5142 NULL_TREE);
5143 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5144 NULL_TREE);
5145 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5146 NULL_TREE);
5147 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5148 NULL_TREE);
5149
5150 /* Emit code to initialize STACK, which points to the next varargs stack
5151 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5152 by named arguments. STACK is 8-byte aligned. */
5153 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5154 if (cum->aapcs_stack_size > 0)
5155 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5156 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5157 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5158
5159 /* Emit code to initialize GRTOP, the top of the GR save area.
5160 virtual_incoming_args_rtx should have been 16 byte aligned. */
5161 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5162 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5163 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5164
5165 /* Emit code to initialize VRTOP, the top of the VR save area.
5166 This address is gr_save_area_bytes below GRTOP, rounded
5167 down to the next 16-byte boundary. */
5168 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5169 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5170 STACK_BOUNDARY / BITS_PER_UNIT);
5171
5172 if (vr_offset)
5173 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5174 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5175 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5176
5177 /* Emit code to initialize GROFF, the offset from GRTOP of the
5178 next GPR argument. */
5179 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5180 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5181 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5182
5183 /* Likewise emit code to initialize VROFF, the offset from FTOP
5184 of the next VR argument. */
5185 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5186 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5187 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5188}
5189
5190/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5191
5192static tree
5193aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5194 gimple_seq *post_p ATTRIBUTE_UNUSED)
5195{
5196 tree addr;
5197 bool indirect_p;
5198 bool is_ha; /* is HFA or HVA. */
5199 bool dw_align; /* double-word align. */
5200 enum machine_mode ag_mode = VOIDmode;
5201 int nregs;
5202 enum machine_mode mode;
5203
5204 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5205 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5206 HOST_WIDE_INT size, rsize, adjust, align;
5207 tree t, u, cond1, cond2;
5208
5209 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5210 if (indirect_p)
5211 type = build_pointer_type (type);
5212
5213 mode = TYPE_MODE (type);
5214
5215 f_stack = TYPE_FIELDS (va_list_type_node);
5216 f_grtop = DECL_CHAIN (f_stack);
5217 f_vrtop = DECL_CHAIN (f_grtop);
5218 f_groff = DECL_CHAIN (f_vrtop);
5219 f_vroff = DECL_CHAIN (f_groff);
5220
5221 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5222 f_stack, NULL_TREE);
5223 size = int_size_in_bytes (type);
5224 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5225
5226 dw_align = false;
5227 adjust = 0;
5228 if (aarch64_vfp_is_call_or_return_candidate (mode,
5229 type,
5230 &ag_mode,
5231 &nregs,
5232 &is_ha))
5233 {
5234 /* TYPE passed in fp/simd registers. */
5235 if (TARGET_GENERAL_REGS_ONLY)
5236 sorry ("%qs and floating point or vector arguments",
5237 "-mgeneral-regs-only");
5238
5239 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5240 unshare_expr (valist), f_vrtop, NULL_TREE);
5241 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5242 unshare_expr (valist), f_vroff, NULL_TREE);
5243
5244 rsize = nregs * UNITS_PER_VREG;
5245
5246 if (is_ha)
5247 {
5248 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5249 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5250 }
5251 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5252 && size < UNITS_PER_VREG)
5253 {
5254 adjust = UNITS_PER_VREG - size;
5255 }
5256 }
5257 else
5258 {
5259 /* TYPE passed in general registers. */
5260 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5261 unshare_expr (valist), f_grtop, NULL_TREE);
5262 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5263 unshare_expr (valist), f_groff, NULL_TREE);
5264 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5265 nregs = rsize / UNITS_PER_WORD;
5266
5267 if (align > 8)
5268 dw_align = true;
5269
5270 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5271 && size < UNITS_PER_WORD)
5272 {
5273 adjust = UNITS_PER_WORD - size;
5274 }
5275 }
5276
5277 /* Get a local temporary for the field value. */
5278 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5279
5280 /* Emit code to branch if off >= 0. */
5281 t = build2 (GE_EXPR, boolean_type_node, off,
5282 build_int_cst (TREE_TYPE (off), 0));
5283 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5284
5285 if (dw_align)
5286 {
5287 /* Emit: offs = (offs + 15) & -16. */
5288 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5289 build_int_cst (TREE_TYPE (off), 15));
5290 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5291 build_int_cst (TREE_TYPE (off), -16));
5292 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5293 }
5294 else
5295 roundup = NULL;
5296
5297 /* Update ap.__[g|v]r_offs */
5298 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5299 build_int_cst (TREE_TYPE (off), rsize));
5300 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5301
5302 /* String up. */
5303 if (roundup)
5304 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5305
5306 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5307 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5308 build_int_cst (TREE_TYPE (f_off), 0));
5309 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5310
5311 /* String up: make sure the assignment happens before the use. */
5312 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5313 COND_EXPR_ELSE (cond1) = t;
5314
5315 /* Prepare the trees handling the argument that is passed on the stack;
5316 the top level node will store in ON_STACK. */
5317 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5318 if (align > 8)
5319 {
5320 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5321 t = fold_convert (intDI_type_node, arg);
5322 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5323 build_int_cst (TREE_TYPE (t), 15));
5324 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5325 build_int_cst (TREE_TYPE (t), -16));
5326 t = fold_convert (TREE_TYPE (arg), t);
5327 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5328 }
5329 else
5330 roundup = NULL;
5331 /* Advance ap.__stack */
5332 t = fold_convert (intDI_type_node, arg);
5333 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5334 build_int_cst (TREE_TYPE (t), size + 7));
5335 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5336 build_int_cst (TREE_TYPE (t), -8));
5337 t = fold_convert (TREE_TYPE (arg), t);
5338 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5339 /* String up roundup and advance. */
5340 if (roundup)
5341 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5342 /* String up with arg */
5343 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5344 /* Big-endianness related address adjustment. */
5345 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5346 && size < UNITS_PER_WORD)
5347 {
5348 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5349 size_int (UNITS_PER_WORD - size));
5350 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5351 }
5352
5353 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5354 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5355
5356 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5357 t = off;
5358 if (adjust)
5359 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5360 build_int_cst (TREE_TYPE (off), adjust));
5361
5362 t = fold_convert (sizetype, t);
5363 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5364
5365 if (is_ha)
5366 {
5367 /* type ha; // treat as "struct {ftype field[n];}"
5368 ... [computing offs]
5369 for (i = 0; i <nregs; ++i, offs += 16)
5370 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5371 return ha; */
5372 int i;
5373 tree tmp_ha, field_t, field_ptr_t;
5374
5375 /* Declare a local variable. */
5376 tmp_ha = create_tmp_var_raw (type, "ha");
5377 gimple_add_tmp_var (tmp_ha);
5378
5379 /* Establish the base type. */
5380 switch (ag_mode)
5381 {
5382 case SFmode:
5383 field_t = float_type_node;
5384 field_ptr_t = float_ptr_type_node;
5385 break;
5386 case DFmode:
5387 field_t = double_type_node;
5388 field_ptr_t = double_ptr_type_node;
5389 break;
5390 case TFmode:
5391 field_t = long_double_type_node;
5392 field_ptr_t = long_double_ptr_type_node;
5393 break;
5394/* The half precision and quad precision are not fully supported yet. Enable
5395 the following code after the support is complete. Need to find the correct
5396 type node for __fp16 *. */
5397#if 0
5398 case HFmode:
5399 field_t = float_type_node;
5400 field_ptr_t = float_ptr_type_node;
5401 break;
5402#endif
5403 case V2SImode:
5404 case V4SImode:
5405 {
5406 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5407 field_t = build_vector_type_for_mode (innertype, ag_mode);
5408 field_ptr_t = build_pointer_type (field_t);
5409 }
5410 break;
5411 default:
5412 gcc_assert (0);
5413 }
5414
5415 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5416 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5417 addr = t;
5418 t = fold_convert (field_ptr_t, addr);
5419 t = build2 (MODIFY_EXPR, field_t,
5420 build1 (INDIRECT_REF, field_t, tmp_ha),
5421 build1 (INDIRECT_REF, field_t, t));
5422
5423 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5424 for (i = 1; i < nregs; ++i)
5425 {
5426 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5427 u = fold_convert (field_ptr_t, addr);
5428 u = build2 (MODIFY_EXPR, field_t,
5429 build2 (MEM_REF, field_t, tmp_ha,
5430 build_int_cst (field_ptr_t,
5431 (i *
5432 int_size_in_bytes (field_t)))),
5433 build1 (INDIRECT_REF, field_t, u));
5434 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5435 }
5436
5437 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5438 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5439 }
5440
5441 COND_EXPR_ELSE (cond2) = t;
5442 addr = fold_convert (build_pointer_type (type), cond1);
5443 addr = build_va_arg_indirect_ref (addr);
5444
5445 if (indirect_p)
5446 addr = build_va_arg_indirect_ref (addr);
5447
5448 return addr;
5449}
5450
5451/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5452
5453static void
5454aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5455 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5456 int no_rtl)
5457{
5458 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5459 CUMULATIVE_ARGS local_cum;
5460 int gr_saved, vr_saved;
5461
5462 /* The caller has advanced CUM up to, but not beyond, the last named
5463 argument. Advance a local copy of CUM past the last "real" named
5464 argument, to find out how many registers are left over. */
5465 local_cum = *cum;
5466 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5467
5468 /* Found out how many registers we need to save. */
5469 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5470 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5471
5472 if (TARGET_GENERAL_REGS_ONLY)
5473 {
5474 if (local_cum.aapcs_nvrn > 0)
5475 sorry ("%qs and floating point or vector arguments",
5476 "-mgeneral-regs-only");
5477 vr_saved = 0;
5478 }
5479
5480 if (!no_rtl)
5481 {
5482 if (gr_saved > 0)
5483 {
5484 rtx ptr, mem;
5485
5486 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5487 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5488 - gr_saved * UNITS_PER_WORD);
5489 mem = gen_frame_mem (BLKmode, ptr);
5490 set_mem_alias_set (mem, get_varargs_alias_set ());
5491
5492 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5493 mem, gr_saved);
5494 }
5495 if (vr_saved > 0)
5496 {
5497 /* We can't use move_block_from_reg, because it will use
5498 the wrong mode, storing D regs only. */
5499 enum machine_mode mode = TImode;
5500 int off, i;
5501
5502 /* Set OFF to the offset from virtual_incoming_args_rtx of
5503 the first vector register. The VR save area lies below
5504 the GR one, and is aligned to 16 bytes. */
5505 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5506 STACK_BOUNDARY / BITS_PER_UNIT);
5507 off -= vr_saved * UNITS_PER_VREG;
5508
5509 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5510 {
5511 rtx ptr, mem;
5512
5513 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5514 mem = gen_frame_mem (mode, ptr);
5515 set_mem_alias_set (mem, get_varargs_alias_set ());
5516 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5517 off += UNITS_PER_VREG;
5518 }
5519 }
5520 }
5521
5522 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5523 any complication of having crtl->args.pretend_args_size changed. */
5524 cfun->machine->saved_varargs_size
5525 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5526 STACK_BOUNDARY / BITS_PER_UNIT)
5527 + vr_saved * UNITS_PER_VREG);
5528}
5529
5530static void
5531aarch64_conditional_register_usage (void)
5532{
5533 int i;
5534 if (!TARGET_FLOAT)
5535 {
5536 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5537 {
5538 fixed_regs[i] = 1;
5539 call_used_regs[i] = 1;
5540 }
5541 }
5542}
5543
5544/* Walk down the type tree of TYPE counting consecutive base elements.
5545 If *MODEP is VOIDmode, then set it to the first valid floating point
5546 type. If a non-floating point type is found, or if a floating point
5547 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5548 otherwise return the count in the sub-tree. */
5549static int
5550aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5551{
5552 enum machine_mode mode;
5553 HOST_WIDE_INT size;
5554
5555 switch (TREE_CODE (type))
5556 {
5557 case REAL_TYPE:
5558 mode = TYPE_MODE (type);
5559 if (mode != DFmode && mode != SFmode && mode != TFmode)
5560 return -1;
5561
5562 if (*modep == VOIDmode)
5563 *modep = mode;
5564
5565 if (*modep == mode)
5566 return 1;
5567
5568 break;
5569
5570 case COMPLEX_TYPE:
5571 mode = TYPE_MODE (TREE_TYPE (type));
5572 if (mode != DFmode && mode != SFmode && mode != TFmode)
5573 return -1;
5574
5575 if (*modep == VOIDmode)
5576 *modep = mode;
5577
5578 if (*modep == mode)
5579 return 2;
5580
5581 break;
5582
5583 case VECTOR_TYPE:
5584 /* Use V2SImode and V4SImode as representatives of all 64-bit
5585 and 128-bit vector types. */
5586 size = int_size_in_bytes (type);
5587 switch (size)
5588 {
5589 case 8:
5590 mode = V2SImode;
5591 break;
5592 case 16:
5593 mode = V4SImode;
5594 break;
5595 default:
5596 return -1;
5597 }
5598
5599 if (*modep == VOIDmode)
5600 *modep = mode;
5601
5602 /* Vector modes are considered to be opaque: two vectors are
5603 equivalent for the purposes of being homogeneous aggregates
5604 if they are the same size. */
5605 if (*modep == mode)
5606 return 1;
5607
5608 break;
5609
5610 case ARRAY_TYPE:
5611 {
5612 int count;
5613 tree index = TYPE_DOMAIN (type);
5614
5615 /* Can't handle incomplete types. */
5616 if (!COMPLETE_TYPE_P (type))
5617 return -1;
5618
5619 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5620 if (count == -1
5621 || !index
5622 || !TYPE_MAX_VALUE (index)
5623 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5624 || !TYPE_MIN_VALUE (index)
5625 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5626 || count < 0)
5627 return -1;
5628
5629 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5630 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5631
5632 /* There must be no padding. */
5633 if (!host_integerp (TYPE_SIZE (type), 1)
5634 || (tree_low_cst (TYPE_SIZE (type), 1)
5635 != count * GET_MODE_BITSIZE (*modep)))
5636 return -1;
5637
5638 return count;
5639 }
5640
5641 case RECORD_TYPE:
5642 {
5643 int count = 0;
5644 int sub_count;
5645 tree field;
5646
5647 /* Can't handle incomplete types. */
5648 if (!COMPLETE_TYPE_P (type))
5649 return -1;
5650
5651 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5652 {
5653 if (TREE_CODE (field) != FIELD_DECL)
5654 continue;
5655
5656 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5657 if (sub_count < 0)
5658 return -1;
5659 count += sub_count;
5660 }
5661
5662 /* There must be no padding. */
5663 if (!host_integerp (TYPE_SIZE (type), 1)
5664 || (tree_low_cst (TYPE_SIZE (type), 1)
5665 != count * GET_MODE_BITSIZE (*modep)))
5666 return -1;
5667
5668 return count;
5669 }
5670
5671 case UNION_TYPE:
5672 case QUAL_UNION_TYPE:
5673 {
5674 /* These aren't very interesting except in a degenerate case. */
5675 int count = 0;
5676 int sub_count;
5677 tree field;
5678
5679 /* Can't handle incomplete types. */
5680 if (!COMPLETE_TYPE_P (type))
5681 return -1;
5682
5683 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5684 {
5685 if (TREE_CODE (field) != FIELD_DECL)
5686 continue;
5687
5688 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5689 if (sub_count < 0)
5690 return -1;
5691 count = count > sub_count ? count : sub_count;
5692 }
5693
5694 /* There must be no padding. */
5695 if (!host_integerp (TYPE_SIZE (type), 1)
5696 || (tree_low_cst (TYPE_SIZE (type), 1)
5697 != count * GET_MODE_BITSIZE (*modep)))
5698 return -1;
5699
5700 return count;
5701 }
5702
5703 default:
5704 break;
5705 }
5706
5707 return -1;
5708}
5709
5710/* Return TRUE if the type, as described by TYPE and MODE, is a composite
5711 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5712 array types. The C99 floating-point complex types are also considered
5713 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5714 types, which are GCC extensions and out of the scope of AAPCS64, are
5715 treated as composite types here as well.
5716
5717 Note that MODE itself is not sufficient in determining whether a type
5718 is such a composite type or not. This is because
5719 stor-layout.c:compute_record_mode may have already changed the MODE
5720 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5721 structure with only one field may have its MODE set to the mode of the
5722 field. Also an integer mode whose size matches the size of the
5723 RECORD_TYPE type may be used to substitute the original mode
5724 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5725 solely relied on. */
5726
5727static bool
5728aarch64_composite_type_p (const_tree type,
5729 enum machine_mode mode)
5730{
5731 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5732 return true;
5733
5734 if (mode == BLKmode
5735 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5736 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5737 return true;
5738
5739 return false;
5740}
5741
5742/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5743 type as described in AAPCS64 \S 4.1.2.
5744
5745 See the comment above aarch64_composite_type_p for the notes on MODE. */
5746
5747static bool
5748aarch64_short_vector_p (const_tree type,
5749 enum machine_mode mode)
5750{
5751 HOST_WIDE_INT size = -1;
5752
5753 if (type && TREE_CODE (type) == VECTOR_TYPE)
5754 size = int_size_in_bytes (type);
5755 else if (!aarch64_composite_type_p (type, mode)
5756 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5757 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5758 size = GET_MODE_SIZE (mode);
5759
5760 return (size == 8 || size == 16) ? true : false;
5761}
5762
5763/* Return TRUE if an argument, whose type is described by TYPE and MODE,
5764 shall be passed or returned in simd/fp register(s) (providing these
5765 parameter passing registers are available).
5766
5767 Upon successful return, *COUNT returns the number of needed registers,
5768 *BASE_MODE returns the mode of the individual register and when IS_HAF
5769 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5770 floating-point aggregate or a homogeneous short-vector aggregate. */
5771
5772static bool
5773aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5774 const_tree type,
5775 enum machine_mode *base_mode,
5776 int *count,
5777 bool *is_ha)
5778{
5779 enum machine_mode new_mode = VOIDmode;
5780 bool composite_p = aarch64_composite_type_p (type, mode);
5781
5782 if (is_ha != NULL) *is_ha = false;
5783
5784 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5785 || aarch64_short_vector_p (type, mode))
5786 {
5787 *count = 1;
5788 new_mode = mode;
5789 }
5790 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5791 {
5792 if (is_ha != NULL) *is_ha = true;
5793 *count = 2;
5794 new_mode = GET_MODE_INNER (mode);
5795 }
5796 else if (type && composite_p)
5797 {
5798 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5799
5800 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5801 {
5802 if (is_ha != NULL) *is_ha = true;
5803 *count = ag_count;
5804 }
5805 else
5806 return false;
5807 }
5808 else
5809 return false;
5810
5811 *base_mode = new_mode;
5812 return true;
5813}
5814
5815/* Implement TARGET_STRUCT_VALUE_RTX. */
5816
5817static rtx
5818aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5819 int incoming ATTRIBUTE_UNUSED)
5820{
5821 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5822}
5823
5824/* Implements target hook vector_mode_supported_p. */
5825static bool
5826aarch64_vector_mode_supported_p (enum machine_mode mode)
5827{
5828 if (TARGET_SIMD
5829 && (mode == V4SImode || mode == V8HImode
5830 || mode == V16QImode || mode == V2DImode
5831 || mode == V2SImode || mode == V4HImode
5832 || mode == V8QImode || mode == V2SFmode
5833 || mode == V4SFmode || mode == V2DFmode))
5834 return true;
5835
5836 return false;
5837}
5838
5839/* Return quad mode as the preferred SIMD mode. */
5840static enum machine_mode
5841aarch64_preferred_simd_mode (enum machine_mode mode)
5842{
5843 if (TARGET_SIMD)
5844 switch (mode)
5845 {
5846 case DFmode:
5847 return V2DFmode;
5848 case SFmode:
5849 return V4SFmode;
5850 case SImode:
5851 return V4SImode;
5852 case HImode:
5853 return V8HImode;
5854 case QImode:
5855 return V16QImode;
5856 case DImode:
5857 return V2DImode;
5858 break;
5859
5860 default:;
5861 }
5862 return word_mode;
5863}
5864
c6fc9e43
YZ
5865/* A table to help perform AArch64-specific name mangling for AdvSIMD
5866 vector types in order to conform to the AAPCS64 (see "Procedure
5867 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5868 qualify for emission with the mangled names defined in that document,
5869 a vector type must not only be of the correct mode but also be
5870 composed of AdvSIMD vector element types (e.g.
5871 _builtin_aarch64_simd_qi); these types are registered by
5872 aarch64_init_simd_builtins (). In other words, vector types defined
5873 in other ways e.g. via vector_size attribute will get default
5874 mangled names. */
5875typedef struct
5876{
5877 enum machine_mode mode;
5878 const char *element_type_name;
5879 const char *mangled_name;
5880} aarch64_simd_mangle_map_entry;
5881
5882static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5883 /* 64-bit containerized types. */
5884 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5885 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5886 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5887 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5888 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5889 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5890 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
5891 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
5892 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5893 /* 128-bit containerized types. */
5894 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
5895 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
5896 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
5897 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
5898 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
5899 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
5900 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
5901 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
5902 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
5903 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
5904 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
5905 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
5906 { VOIDmode, NULL, NULL }
5907};
5908
ac2b960f
YZ
5909/* Implement TARGET_MANGLE_TYPE. */
5910
5911const char *
5912aarch64_mangle_type (const_tree type)
5913{
5914 /* The AArch64 ABI documents say that "__va_list" has to be
5915 managled as if it is in the "std" namespace. */
5916 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
5917 return "St9__va_list";
5918
c6fc9e43
YZ
5919 /* Check the mode of the vector type, and the name of the vector
5920 element type, against the table. */
5921 if (TREE_CODE (type) == VECTOR_TYPE)
5922 {
5923 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
5924
5925 while (pos->mode != VOIDmode)
5926 {
5927 tree elt_type = TREE_TYPE (type);
5928
5929 if (pos->mode == TYPE_MODE (type)
5930 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
5931 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
5932 pos->element_type_name))
5933 return pos->mangled_name;
5934
5935 pos++;
5936 }
5937 }
5938
ac2b960f
YZ
5939 /* Use the default mangling. */
5940 return NULL;
5941}
5942
43e9d192
IB
5943/* Return the equivalent letter for size. */
5944static unsigned char
5945sizetochar (int size)
5946{
5947 switch (size)
5948 {
5949 case 64: return 'd';
5950 case 32: return 's';
5951 case 16: return 'h';
5952 case 8 : return 'b';
5953 default: gcc_unreachable ();
5954 }
5955}
5956
5957static int
5958aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5959 rtx *modconst, int *elementwidth,
5960 unsigned char *elementchar,
5961 int *mvn, int *shift)
5962{
5963#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
5964 matches = 1; \
5965 for (i = 0; i < idx; i += (STRIDE)) \
5966 if (!(TEST)) \
5967 matches = 0; \
5968 if (matches) \
5969 { \
5970 immtype = (CLASS); \
5971 elsize = (ELSIZE); \
5972 elchar = sizetochar (elsize); \
5973 eshift = (SHIFT); \
5974 emvn = (NEG); \
5975 break; \
5976 }
5977
5978 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5979 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5980 unsigned char bytes[16];
5981 unsigned char elchar = 0;
5982 int immtype = -1, matches;
5983 unsigned int invmask = inverse ? 0xff : 0;
5984 int eshift, emvn;
5985
5986 /* TODO: Vectors of float constants. */
5987 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5988 return -1;
5989
5990 /* Splat vector constant out into a byte vector. */
5991 for (i = 0; i < n_elts; i++)
5992 {
5993 rtx el = CONST_VECTOR_ELT (op, i);
5994 unsigned HOST_WIDE_INT elpart;
5995 unsigned int part, parts;
5996
5997 if (GET_CODE (el) == CONST_INT)
5998 {
5999 elpart = INTVAL (el);
6000 parts = 1;
6001 }
6002 else if (GET_CODE (el) == CONST_DOUBLE)
6003 {
6004 elpart = CONST_DOUBLE_LOW (el);
6005 parts = 2;
6006 }
6007 else
6008 gcc_unreachable ();
6009
6010 for (part = 0; part < parts; part++)
6011 {
6012 unsigned int byte;
6013 for (byte = 0; byte < innersize; byte++)
6014 {
6015 bytes[idx++] = (elpart & 0xff) ^ invmask;
6016 elpart >>= BITS_PER_UNIT;
6017 }
6018 if (GET_CODE (el) == CONST_DOUBLE)
6019 elpart = CONST_DOUBLE_HIGH (el);
6020 }
6021 }
6022
6023 /* Sanity check. */
6024 gcc_assert (idx == GET_MODE_SIZE (mode));
6025
6026 do
6027 {
6028 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6029 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6030
6031 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6032 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6033
6034 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6035 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6036
6037 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6038 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6039
6040 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6041
6042 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6043
6044 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6045 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6046
6047 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6048 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6049
6050 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6051 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6052
6053 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6054 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6055
6056 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6057
6058 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6059
6060 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6061 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6062
6063 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6064 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6065
6066 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6067 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6068
6069 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6070 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6071
6072 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6073
6074 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6075 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6076 }
6077 while (0);
6078
6079 /* TODO: Currently the assembler cannot handle types 12 to 15.
6080 And there is no way to specify cmode through the compiler.
6081 Disable them till there is support in the assembler. */
6082 if (immtype == -1
6083 || (immtype >= 12 && immtype <= 15)
6084 || immtype == 18)
6085 return -1;
6086
6087
6088 if (elementwidth)
6089 *elementwidth = elsize;
6090
6091 if (elementchar)
6092 *elementchar = elchar;
6093
6094 if (mvn)
6095 *mvn = emvn;
6096
6097 if (shift)
6098 *shift = eshift;
6099
6100 if (modconst)
6101 {
6102 unsigned HOST_WIDE_INT imm = 0;
6103
6104 /* Un-invert bytes of recognized vector, if necessary. */
6105 if (invmask != 0)
6106 for (i = 0; i < idx; i++)
6107 bytes[i] ^= invmask;
6108
6109 if (immtype == 17)
6110 {
6111 /* FIXME: Broken on 32-bit H_W_I hosts. */
6112 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6113
6114 for (i = 0; i < 8; i++)
6115 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6116 << (i * BITS_PER_UNIT);
6117
6118 *modconst = GEN_INT (imm);
6119 }
6120 else
6121 {
6122 unsigned HOST_WIDE_INT imm = 0;
6123
6124 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6125 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6126
6127 /* Construct 'abcdefgh' because the assembler cannot handle
6128 generic constants. */
6129 gcc_assert (shift != NULL && mvn != NULL);
6130 if (*mvn)
6131 imm = ~imm;
6132 imm = (imm >> *shift) & 0xff;
6133 *modconst = GEN_INT (imm);
6134 }
6135 }
6136
6137 return immtype;
6138#undef CHECK
6139}
6140
6141/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6142 (or, implicitly, MVNI) immediate. Write back width per element
6143 to *ELEMENTWIDTH (or zero for float elements), and a modified constant
6144 (whatever should be output for a MOVI instruction) in *MODCONST. */
6145int
6146aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6147 rtx *modconst, int *elementwidth,
6148 unsigned char *elementchar,
6149 int *mvn, int *shift)
6150{
6151 rtx tmpconst;
6152 int tmpwidth;
6153 unsigned char tmpwidthc;
6154 int tmpmvn = 0, tmpshift = 0;
6155 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6156 &tmpwidth, &tmpwidthc,
6157 &tmpmvn, &tmpshift);
6158
6159 if (retval == -1)
6160 return 0;
6161
6162 if (modconst)
6163 *modconst = tmpconst;
6164
6165 if (elementwidth)
6166 *elementwidth = tmpwidth;
6167
6168 if (elementchar)
6169 *elementchar = tmpwidthc;
6170
6171 if (mvn)
6172 *mvn = tmpmvn;
6173
6174 if (shift)
6175 *shift = tmpshift;
6176
6177 return 1;
6178}
6179
6180static bool
6181aarch64_const_vec_all_same_int_p (rtx x,
6182 HOST_WIDE_INT minval,
6183 HOST_WIDE_INT maxval)
6184{
6185 HOST_WIDE_INT firstval;
6186 int count, i;
6187
6188 if (GET_CODE (x) != CONST_VECTOR
6189 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6190 return false;
6191
6192 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6193 if (firstval < minval || firstval > maxval)
6194 return false;
6195
6196 count = CONST_VECTOR_NUNITS (x);
6197 for (i = 1; i < count; i++)
6198 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6199 return false;
6200
6201 return true;
6202}
6203
6204/* Check of immediate shift constants are within range. */
6205bool
6206aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6207{
6208 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6209 if (left)
6210 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6211 else
6212 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6213}
6214
6215bool
6216aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6217{
6218 int nunits;
6219 int i;
6220
6221 if (GET_CODE (x) != CONST_VECTOR)
6222 return false;
6223
6224 nunits = GET_MODE_NUNITS (mode);
6225
6226 for (i = 0; i < nunits; i++)
6227 if (INTVAL (CONST_VECTOR_ELT (x, i)) != 0)
6228 return false;
6229
6230 return true;
6231}
6232
6233bool
6234aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6235{
6236 HOST_WIDE_INT imm = INTVAL (x);
6237 int i;
6238
6239 for (i = 0; i < 8; i++)
6240 {
6241 unsigned int byte = imm & 0xff;
6242 if (byte != 0xff && byte != 0)
6243 return false;
6244 imm >>= 8;
6245 }
6246
6247 return true;
6248}
6249
6250/* Return a const_int vector of VAL. */
6251rtx
6252aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6253{
6254 int nunits = GET_MODE_NUNITS (mode);
6255 rtvec v = rtvec_alloc (nunits);
6256 int i;
6257
6258 for (i=0; i < nunits; i++)
6259 RTVEC_ELT (v, i) = GEN_INT (val);
6260
6261 return gen_rtx_CONST_VECTOR (mode, v);
6262}
6263
6264/* Construct and return a PARALLEL RTX vector. */
6265rtx
6266aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6267{
6268 int nunits = GET_MODE_NUNITS (mode);
6269 rtvec v = rtvec_alloc (nunits / 2);
6270 int base = high ? nunits / 2 : 0;
6271 rtx t1;
6272 int i;
6273
6274 for (i=0; i < nunits / 2; i++)
6275 RTVEC_ELT (v, i) = GEN_INT (base + i);
6276
6277 t1 = gen_rtx_PARALLEL (mode, v);
6278 return t1;
6279}
6280
6281/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6282 HIGH (exclusive). */
6283void
6284aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6285{
6286 HOST_WIDE_INT lane;
6287 gcc_assert (GET_CODE (operand) == CONST_INT);
6288 lane = INTVAL (operand);
6289
6290 if (lane < low || lane >= high)
6291 error ("lane out of range");
6292}
6293
6294void
6295aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6296{
6297 gcc_assert (GET_CODE (operand) == CONST_INT);
6298 HOST_WIDE_INT lane = INTVAL (operand);
6299
6300 if (lane < low || lane >= high)
6301 error ("constant out of range");
6302}
6303
6304/* Emit code to reinterpret one AdvSIMD type as another,
6305 without altering bits. */
6306void
6307aarch64_simd_reinterpret (rtx dest, rtx src)
6308{
6309 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6310}
6311
6312/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6313 registers). */
6314void
6315aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6316 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6317 rtx op1)
6318{
6319 rtx mem = gen_rtx_MEM (mode, destaddr);
6320 rtx tmp1 = gen_reg_rtx (mode);
6321 rtx tmp2 = gen_reg_rtx (mode);
6322
6323 emit_insn (intfn (tmp1, op1, tmp2));
6324
6325 emit_move_insn (mem, tmp1);
6326 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6327 emit_move_insn (mem, tmp2);
6328}
6329
6330/* Return TRUE if OP is a valid vector addressing mode. */
6331bool
6332aarch64_simd_mem_operand_p (rtx op)
6333{
6334 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6335 || GET_CODE (XEXP (op, 0)) == REG);
6336}
6337
6338/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6339 not to early-clobber SRC registers in the process.
6340
6341 We assume that the operands described by SRC and DEST represent a
6342 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6343 number of components into which the copy has been decomposed. */
6344void
6345aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6346 rtx *src, unsigned int count)
6347{
6348 unsigned int i;
6349
6350 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6351 || REGNO (operands[0]) < REGNO (operands[1]))
6352 {
6353 for (i = 0; i < count; i++)
6354 {
6355 operands[2 * i] = dest[i];
6356 operands[2 * i + 1] = src[i];
6357 }
6358 }
6359 else
6360 {
6361 for (i = 0; i < count; i++)
6362 {
6363 operands[2 * i] = dest[count - i - 1];
6364 operands[2 * i + 1] = src[count - i - 1];
6365 }
6366 }
6367}
6368
6369/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6370 one of VSTRUCT modes: OI, CI or XI. */
6371int
6372aarch64_simd_attr_length_move (rtx insn)
6373{
6374 rtx reg, mem, addr;
6375 int load;
6376 enum machine_mode mode;
6377
6378 extract_insn_cached (insn);
6379
6380 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6381 {
6382 mode = GET_MODE (recog_data.operand[0]);
6383 switch (mode)
6384 {
6385 case OImode:
6386 return 8;
6387 case CImode:
6388 return 12;
6389 case XImode:
6390 return 16;
6391 default:
6392 gcc_unreachable ();
6393 }
6394 }
6395 return 4;
6396}
6397
db0253a4
TB
6398/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6399 alignment of a vector to 128 bits. */
6400static HOST_WIDE_INT
6401aarch64_simd_vector_alignment (const_tree type)
6402{
6403 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6404 return MIN (align, 128);
6405}
6406
6407/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6408static bool
6409aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6410{
6411 if (is_packed)
6412 return false;
6413
6414 /* We guarantee alignment for vectors up to 128-bits. */
6415 if (tree_int_cst_compare (TYPE_SIZE (type),
6416 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6417 return false;
6418
6419 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6420 return true;
6421}
6422
43e9d192
IB
6423static unsigned HOST_WIDE_INT
6424aarch64_shift_truncation_mask (enum machine_mode mode)
6425{
6426 return
6427 (aarch64_vector_mode_supported_p (mode)
6428 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6429}
6430
6431#ifndef TLS_SECTION_ASM_FLAG
6432#define TLS_SECTION_ASM_FLAG 'T'
6433#endif
6434
6435void
6436aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6437 tree decl ATTRIBUTE_UNUSED)
6438{
6439 char flagchars[10], *f = flagchars;
6440
6441 /* If we have already declared this section, we can use an
6442 abbreviated form to switch back to it -- unless this section is
6443 part of a COMDAT groups, in which case GAS requires the full
6444 declaration every time. */
6445 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6446 && (flags & SECTION_DECLARED))
6447 {
6448 fprintf (asm_out_file, "\t.section\t%s\n", name);
6449 return;
6450 }
6451
6452 if (!(flags & SECTION_DEBUG))
6453 *f++ = 'a';
6454 if (flags & SECTION_WRITE)
6455 *f++ = 'w';
6456 if (flags & SECTION_CODE)
6457 *f++ = 'x';
6458 if (flags & SECTION_SMALL)
6459 *f++ = 's';
6460 if (flags & SECTION_MERGE)
6461 *f++ = 'M';
6462 if (flags & SECTION_STRINGS)
6463 *f++ = 'S';
6464 if (flags & SECTION_TLS)
6465 *f++ = TLS_SECTION_ASM_FLAG;
6466 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6467 *f++ = 'G';
6468 *f = '\0';
6469
6470 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6471
6472 if (!(flags & SECTION_NOTYPE))
6473 {
6474 const char *type;
6475 const char *format;
6476
6477 if (flags & SECTION_BSS)
6478 type = "nobits";
6479 else
6480 type = "progbits";
6481
6482#ifdef TYPE_OPERAND_FMT
6483 format = "," TYPE_OPERAND_FMT;
6484#else
6485 format = ",@%s";
6486#endif
6487
6488 fprintf (asm_out_file, format, type);
6489
6490 if (flags & SECTION_ENTSIZE)
6491 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6492 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6493 {
6494 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6495 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6496 else
6497 fprintf (asm_out_file, ",%s,comdat",
6498 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6499 }
6500 }
6501
6502 putc ('\n', asm_out_file);
6503}
6504
6505/* Select a format to encode pointers in exception handling data. */
6506int
6507aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6508{
6509 int type;
6510 switch (aarch64_cmodel)
6511 {
6512 case AARCH64_CMODEL_TINY:
6513 case AARCH64_CMODEL_TINY_PIC:
6514 case AARCH64_CMODEL_SMALL:
6515 case AARCH64_CMODEL_SMALL_PIC:
6516 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6517 for everything. */
6518 type = DW_EH_PE_sdata4;
6519 break;
6520 default:
6521 /* No assumptions here. 8-byte relocs required. */
6522 type = DW_EH_PE_sdata8;
6523 break;
6524 }
6525 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6526}
6527
0462169c
SN
6528/* Emit load exclusive. */
6529
6530static void
6531aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6532 rtx mem, rtx model_rtx)
6533{
6534 rtx (*gen) (rtx, rtx, rtx);
6535
6536 switch (mode)
6537 {
6538 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6539 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6540 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6541 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6542 default:
6543 gcc_unreachable ();
6544 }
6545
6546 emit_insn (gen (rval, mem, model_rtx));
6547}
6548
6549/* Emit store exclusive. */
6550
6551static void
6552aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6553 rtx rval, rtx mem, rtx model_rtx)
6554{
6555 rtx (*gen) (rtx, rtx, rtx, rtx);
6556
6557 switch (mode)
6558 {
6559 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6560 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6561 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6562 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6563 default:
6564 gcc_unreachable ();
6565 }
6566
6567 emit_insn (gen (bval, rval, mem, model_rtx));
6568}
6569
6570/* Mark the previous jump instruction as unlikely. */
6571
6572static void
6573aarch64_emit_unlikely_jump (rtx insn)
6574{
6575 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6576
6577 insn = emit_jump_insn (insn);
6578 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6579}
6580
6581/* Expand a compare and swap pattern. */
6582
6583void
6584aarch64_expand_compare_and_swap (rtx operands[])
6585{
6586 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6587 enum machine_mode mode, cmp_mode;
6588 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6589
6590 bval = operands[0];
6591 rval = operands[1];
6592 mem = operands[2];
6593 oldval = operands[3];
6594 newval = operands[4];
6595 is_weak = operands[5];
6596 mod_s = operands[6];
6597 mod_f = operands[7];
6598 mode = GET_MODE (mem);
6599 cmp_mode = mode;
6600
6601 /* Normally the succ memory model must be stronger than fail, but in the
6602 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6603 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6604
6605 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6606 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6607 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6608
6609 switch (mode)
6610 {
6611 case QImode:
6612 case HImode:
6613 /* For short modes, we're going to perform the comparison in SImode,
6614 so do the zero-extension now. */
6615 cmp_mode = SImode;
6616 rval = gen_reg_rtx (SImode);
6617 oldval = convert_modes (SImode, mode, oldval, true);
6618 /* Fall through. */
6619
6620 case SImode:
6621 case DImode:
6622 /* Force the value into a register if needed. */
6623 if (!aarch64_plus_operand (oldval, mode))
6624 oldval = force_reg (cmp_mode, oldval);
6625 break;
6626
6627 default:
6628 gcc_unreachable ();
6629 }
6630
6631 switch (mode)
6632 {
6633 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6634 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6635 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6636 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6637 default:
6638 gcc_unreachable ();
6639 }
6640
6641 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6642
6643 if (mode == QImode || mode == HImode)
6644 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6645
6646 x = gen_rtx_REG (CCmode, CC_REGNUM);
6647 x = gen_rtx_EQ (SImode, x, const0_rtx);
6648 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6649}
6650
6651/* Split a compare and swap pattern. */
6652
6653void
6654aarch64_split_compare_and_swap (rtx operands[])
6655{
6656 rtx rval, mem, oldval, newval, scratch;
6657 enum machine_mode mode;
6658 enum memmodel mod_s;
6659 bool is_weak;
6660 rtx label1, label2, x, cond;
6661
6662 rval = operands[0];
6663 mem = operands[1];
6664 oldval = operands[2];
6665 newval = operands[3];
6666 is_weak = (operands[4] != const0_rtx);
6667 mod_s = (enum memmodel) INTVAL (operands[5]);
6668 scratch = operands[7];
6669 mode = GET_MODE (mem);
6670
6671 label1 = NULL_RTX;
6672 if (!is_weak)
6673 {
6674 label1 = gen_label_rtx ();
6675 emit_label (label1);
6676 }
6677 label2 = gen_label_rtx ();
6678
6679 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6680
6681 cond = aarch64_gen_compare_reg (NE, rval, oldval);
6682 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6683 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6684 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6685 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6686
6687 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6688
6689 if (!is_weak)
6690 {
6691 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6692 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6693 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6694 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6695 }
6696 else
6697 {
6698 cond = gen_rtx_REG (CCmode, CC_REGNUM);
6699 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
6700 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
6701 }
6702
6703 emit_label (label2);
6704}
6705
6706/* Split an atomic operation. */
6707
6708void
6709aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
6710 rtx value, rtx model_rtx, rtx cond)
6711{
6712 enum machine_mode mode = GET_MODE (mem);
6713 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
6714 rtx label, x;
6715
6716 label = gen_label_rtx ();
6717 emit_label (label);
6718
6719 if (new_out)
6720 new_out = gen_lowpart (wmode, new_out);
6721 if (old_out)
6722 old_out = gen_lowpart (wmode, old_out);
6723 else
6724 old_out = new_out;
6725 value = simplify_gen_subreg (wmode, value, mode, 0);
6726
6727 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
6728
6729 switch (code)
6730 {
6731 case SET:
6732 new_out = value;
6733 break;
6734
6735 case NOT:
6736 x = gen_rtx_AND (wmode, old_out, value);
6737 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6738 x = gen_rtx_NOT (wmode, new_out);
6739 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6740 break;
6741
6742 case MINUS:
6743 if (CONST_INT_P (value))
6744 {
6745 value = GEN_INT (-INTVAL (value));
6746 code = PLUS;
6747 }
6748 /* Fall through. */
6749
6750 default:
6751 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
6752 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6753 break;
6754 }
6755
6756 aarch64_emit_store_exclusive (mode, cond, mem,
6757 gen_lowpart (mode, new_out), model_rtx);
6758
6759 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6760 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6761 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
6762 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6763}
6764
43e9d192
IB
6765static void
6766aarch64_start_file (void)
6767{
6768 if (selected_arch)
6769 asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
6770 else if (selected_cpu)
6771 asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
6772 default_file_start();
6773}
6774
6775/* Target hook for c_mode_for_suffix. */
6776static enum machine_mode
6777aarch64_c_mode_for_suffix (char suffix)
6778{
6779 if (suffix == 'q')
6780 return TFmode;
6781
6782 return VOIDmode;
6783}
6784
6785#undef TARGET_ADDRESS_COST
6786#define TARGET_ADDRESS_COST aarch64_address_cost
6787
6788/* This hook will determines whether unnamed bitfields affect the alignment
6789 of the containing structure. The hook returns true if the structure
6790 should inherit the alignment requirements of an unnamed bitfield's
6791 type. */
6792#undef TARGET_ALIGN_ANON_BITFIELD
6793#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
6794
6795#undef TARGET_ASM_ALIGNED_DI_OP
6796#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
6797
6798#undef TARGET_ASM_ALIGNED_HI_OP
6799#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
6800
6801#undef TARGET_ASM_ALIGNED_SI_OP
6802#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
6803
6804#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6805#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
6806 hook_bool_const_tree_hwi_hwi_const_tree_true
6807
6808#undef TARGET_ASM_FILE_START
6809#define TARGET_ASM_FILE_START aarch64_start_file
6810
6811#undef TARGET_ASM_OUTPUT_MI_THUNK
6812#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
6813
6814#undef TARGET_ASM_SELECT_RTX_SECTION
6815#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
6816
6817#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6818#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
6819
6820#undef TARGET_BUILD_BUILTIN_VA_LIST
6821#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
6822
6823#undef TARGET_CALLEE_COPIES
6824#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
6825
6826#undef TARGET_CAN_ELIMINATE
6827#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
6828
6829#undef TARGET_CANNOT_FORCE_CONST_MEM
6830#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
6831
6832#undef TARGET_CONDITIONAL_REGISTER_USAGE
6833#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
6834
6835/* Only the least significant bit is used for initialization guard
6836 variables. */
6837#undef TARGET_CXX_GUARD_MASK_BIT
6838#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
6839
6840#undef TARGET_C_MODE_FOR_SUFFIX
6841#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
6842
6843#ifdef TARGET_BIG_ENDIAN_DEFAULT
6844#undef TARGET_DEFAULT_TARGET_FLAGS
6845#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
6846#endif
6847
6848#undef TARGET_CLASS_MAX_NREGS
6849#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
6850
119103ca
JG
6851#undef TARGET_BUILTIN_DECL
6852#define TARGET_BUILTIN_DECL aarch64_builtin_decl
6853
43e9d192
IB
6854#undef TARGET_EXPAND_BUILTIN
6855#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
6856
6857#undef TARGET_EXPAND_BUILTIN_VA_START
6858#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
6859
6860#undef TARGET_FUNCTION_ARG
6861#define TARGET_FUNCTION_ARG aarch64_function_arg
6862
6863#undef TARGET_FUNCTION_ARG_ADVANCE
6864#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
6865
6866#undef TARGET_FUNCTION_ARG_BOUNDARY
6867#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
6868
6869#undef TARGET_FUNCTION_OK_FOR_SIBCALL
6870#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
6871
6872#undef TARGET_FUNCTION_VALUE
6873#define TARGET_FUNCTION_VALUE aarch64_function_value
6874
6875#undef TARGET_FUNCTION_VALUE_REGNO_P
6876#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
6877
6878#undef TARGET_FRAME_POINTER_REQUIRED
6879#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
6880
6881#undef TARGET_GIMPLIFY_VA_ARG_EXPR
6882#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
6883
6884#undef TARGET_INIT_BUILTINS
6885#define TARGET_INIT_BUILTINS aarch64_init_builtins
6886
6887#undef TARGET_LEGITIMATE_ADDRESS_P
6888#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
6889
6890#undef TARGET_LEGITIMATE_CONSTANT_P
6891#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
6892
6893#undef TARGET_LIBGCC_CMP_RETURN_MODE
6894#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
6895
ac2b960f
YZ
6896#undef TARGET_MANGLE_TYPE
6897#define TARGET_MANGLE_TYPE aarch64_mangle_type
6898
43e9d192
IB
6899#undef TARGET_MEMORY_MOVE_COST
6900#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
6901
6902#undef TARGET_MUST_PASS_IN_STACK
6903#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
6904
6905/* This target hook should return true if accesses to volatile bitfields
6906 should use the narrowest mode possible. It should return false if these
6907 accesses should use the bitfield container type. */
6908#undef TARGET_NARROW_VOLATILE_BITFIELD
6909#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
6910
6911#undef TARGET_OPTION_OVERRIDE
6912#define TARGET_OPTION_OVERRIDE aarch64_override_options
6913
6914#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
6915#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
6916 aarch64_override_options_after_change
6917
6918#undef TARGET_PASS_BY_REFERENCE
6919#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
6920
6921#undef TARGET_PREFERRED_RELOAD_CLASS
6922#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
6923
6924#undef TARGET_SECONDARY_RELOAD
6925#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
6926
6927#undef TARGET_SHIFT_TRUNCATION_MASK
6928#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
6929
6930#undef TARGET_SETUP_INCOMING_VARARGS
6931#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
6932
6933#undef TARGET_STRUCT_VALUE_RTX
6934#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
6935
6936#undef TARGET_REGISTER_MOVE_COST
6937#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
6938
6939#undef TARGET_RETURN_IN_MEMORY
6940#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
6941
6942#undef TARGET_RETURN_IN_MSB
6943#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
6944
6945#undef TARGET_RTX_COSTS
6946#define TARGET_RTX_COSTS aarch64_rtx_costs
6947
6948#undef TARGET_TRAMPOLINE_INIT
6949#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
6950
6951#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
6952#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
6953
6954#undef TARGET_VECTOR_MODE_SUPPORTED_P
6955#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
6956
6957#undef TARGET_ARRAY_MODE_SUPPORTED_P
6958#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
6959
6960#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6961#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
6962
42fc9a7f
JG
6963#undef TARGET_VECTORIZE_BUILTINS
6964#define TARGET_VECTORIZE_BUILTINS
6965
6966#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
6967#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
6968 aarch64_builtin_vectorized_function
6969
43e9d192
IB
6970/* Section anchor support. */
6971
6972#undef TARGET_MIN_ANCHOR_OFFSET
6973#define TARGET_MIN_ANCHOR_OFFSET -256
6974
6975/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
6976 byte offset; we can do much more for larger data types, but have no way
6977 to determine the size of the access. We assume accesses are aligned. */
6978#undef TARGET_MAX_ANCHOR_OFFSET
6979#define TARGET_MAX_ANCHOR_OFFSET 4095
6980
db0253a4
TB
6981#undef TARGET_VECTOR_ALIGNMENT
6982#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
6983
6984#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
6985#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
6986 aarch64_simd_vector_alignment_reachable
6987
43e9d192
IB
6988struct gcc_target targetm = TARGET_INITIALIZER;
6989
6990#include "gt-aarch64.h"