]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
pr35634.c: Use signed char.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
43e9d192
IB
1/* Machine description for AArch64 architecture.
2 Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
29#include "regs.h"
30#include "df.h"
31#include "hard-reg-set.h"
32#include "output.h"
33#include "expr.h"
34#include "reload.h"
35#include "toplev.h"
36#include "target.h"
37#include "target-def.h"
38#include "targhooks.h"
39#include "ggc.h"
40#include "function.h"
41#include "tm_p.h"
42#include "recog.h"
43#include "langhooks.h"
44#include "diagnostic-core.h"
45#include "gimple.h"
46#include "optabs.h"
47#include "dwarf2.h"
48
49/* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80};
81
82struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88};
89
90/* The current code model. */
91enum aarch64_code_model aarch64_cmodel;
92
93#ifdef HAVE_AS_TLS
94#undef TARGET_HAVE_TLS
95#define TARGET_HAVE_TLS 1
96#endif
97
98static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192
IB
105static void aarch64_override_options_after_change (void);
106static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108static bool aarch64_vector_mode_supported_p (enum machine_mode);
109static unsigned bit_count (unsigned HOST_WIDE_INT);
110static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
112
113/* The processor for which instructions should be scheduled. */
114enum aarch64_processor aarch64_tune = generic;
115
116/* The current tuning set. */
117const struct tune_params *aarch64_tune_params;
118
119/* Mask to specify which instructions we are allowed to generate. */
120unsigned long aarch64_isa_flags = 0;
121
122/* Mask to specify which instruction scheduling options should be used. */
123unsigned long aarch64_tune_flags = 0;
124
125/* Tuning parameters. */
126
127#if HAVE_DESIGNATED_INITIALIZERS
128#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
129#else
130#define NAMED_PARAM(NAME, VAL) (VAL)
131#endif
132
133#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
134__extension__
135#endif
136static const struct cpu_rtx_cost_table generic_rtx_cost_table =
137{
138 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
139 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
140 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
141 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
142 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
143 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
144 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
145 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
146 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
147 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
148 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
149 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
150};
151
152#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
153__extension__
154#endif
155static const struct cpu_addrcost_table generic_addrcost_table =
156{
157 NAMED_PARAM (pre_modify, 0),
158 NAMED_PARAM (post_modify, 0),
159 NAMED_PARAM (register_offset, 0),
160 NAMED_PARAM (register_extend, 0),
161 NAMED_PARAM (imm_offset, 0)
162};
163
164#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
165__extension__
166#endif
167static const struct cpu_regmove_cost generic_regmove_cost =
168{
169 NAMED_PARAM (GP2GP, 1),
170 NAMED_PARAM (GP2FP, 2),
171 NAMED_PARAM (FP2GP, 2),
172 /* We currently do not provide direct support for TFmode Q->Q move.
173 Therefore we need to raise the cost above 2 in order to have
174 reload handle the situation. */
175 NAMED_PARAM (FP2FP, 4)
176};
177
178#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
179__extension__
180#endif
181static const struct tune_params generic_tunings =
182{
183 &generic_rtx_cost_table,
184 &generic_addrcost_table,
185 &generic_regmove_cost,
186 NAMED_PARAM (memmov_cost, 4)
187};
188
189/* A processor implementing AArch64. */
190struct processor
191{
192 const char *const name;
193 enum aarch64_processor core;
194 const char *arch;
195 const unsigned long flags;
196 const struct tune_params *const tune;
197};
198
199/* Processor cores implementing AArch64. */
200static const struct processor all_cores[] =
201{
202#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
203 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
204#include "aarch64-cores.def"
205#undef AARCH64_CORE
206 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
207 {NULL, aarch64_none, NULL, 0, NULL}
208};
209
210/* Architectures implementing AArch64. */
211static const struct processor all_architectures[] =
212{
213#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
214 {NAME, CORE, #ARCH, FLAGS, NULL},
215#include "aarch64-arches.def"
216#undef AARCH64_ARCH
217 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
218 {NULL, aarch64_none, NULL, 0, NULL}
219};
220
221/* Target specification. These are populated as commandline arguments
222 are processed, or NULL if not specified. */
223static const struct processor *selected_arch;
224static const struct processor *selected_cpu;
225static const struct processor *selected_tune;
226
227#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
228
229/* An ISA extension in the co-processor and main instruction set space. */
230struct aarch64_option_extension
231{
232 const char *const name;
233 const unsigned long flags_on;
234 const unsigned long flags_off;
235};
236
237/* ISA extensions in AArch64. */
238static const struct aarch64_option_extension all_extensions[] =
239{
240#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
241 {NAME, FLAGS_ON, FLAGS_OFF},
242#include "aarch64-option-extensions.def"
243#undef AARCH64_OPT_EXTENSION
244 {NULL, 0, 0}
245};
246
247/* Used to track the size of an address when generating a pre/post
248 increment address. */
249static enum machine_mode aarch64_memory_reference_mode;
250
251/* Used to force GTY into this file. */
252static GTY(()) int gty_dummy;
253
254/* A table of valid AArch64 "bitmask immediate" values for
255 logical instructions. */
256
257#define AARCH64_NUM_BITMASKS 5334
258static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
259
260/* Did we set flag_omit_frame_pointer just so
261 aarch64_frame_pointer_required would be called? */
262static bool faked_omit_frame_pointer;
263
264typedef enum aarch64_cond_code
265{
266 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
267 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
268 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
269}
270aarch64_cc;
271
272#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
273
274/* The condition codes of the processor, and the inverse function. */
275static const char * const aarch64_condition_codes[] =
276{
277 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
278 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
279};
280
281/* Provide a mapping from gcc register numbers to dwarf register numbers. */
282unsigned
283aarch64_dbx_register_number (unsigned regno)
284{
285 if (GP_REGNUM_P (regno))
286 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
287 else if (regno == SP_REGNUM)
288 return AARCH64_DWARF_SP;
289 else if (FP_REGNUM_P (regno))
290 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
291
292 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
293 equivalent DWARF register. */
294 return DWARF_FRAME_REGISTERS;
295}
296
297/* Return TRUE if MODE is any of the large INT modes. */
298static bool
299aarch64_vect_struct_mode_p (enum machine_mode mode)
300{
301 return mode == OImode || mode == CImode || mode == XImode;
302}
303
304/* Return TRUE if MODE is any of the vector modes. */
305static bool
306aarch64_vector_mode_p (enum machine_mode mode)
307{
308 return aarch64_vector_mode_supported_p (mode)
309 || aarch64_vect_struct_mode_p (mode);
310}
311
312/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
313static bool
314aarch64_array_mode_supported_p (enum machine_mode mode,
315 unsigned HOST_WIDE_INT nelems)
316{
317 if (TARGET_SIMD
318 && AARCH64_VALID_SIMD_QREG_MODE (mode)
319 && (nelems >= 2 && nelems <= 4))
320 return true;
321
322 return false;
323}
324
325/* Implement HARD_REGNO_NREGS. */
326
327int
328aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
329{
330 switch (aarch64_regno_regclass (regno))
331 {
332 case FP_REGS:
333 case FP_LO_REGS:
334 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
335 default:
336 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
337 }
338 gcc_unreachable ();
339}
340
341/* Implement HARD_REGNO_MODE_OK. */
342
343int
344aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
345{
346 if (GET_MODE_CLASS (mode) == MODE_CC)
347 return regno == CC_REGNUM;
348
349 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
350 || regno == ARG_POINTER_REGNUM)
351 return mode == Pmode;
352
353 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
354 return 1;
355
356 if (FP_REGNUM_P (regno))
357 {
358 if (aarch64_vect_struct_mode_p (mode))
359 return
360 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
361 else
362 return 1;
363 }
364
365 return 0;
366}
367
368/* Return true if calls to DECL should be treated as
369 long-calls (ie called via a register). */
370static bool
371aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
372{
373 return false;
374}
375
376/* Return true if calls to symbol-ref SYM should be treated as
377 long-calls (ie called via a register). */
378bool
379aarch64_is_long_call_p (rtx sym)
380{
381 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
382}
383
384/* Return true if the offsets to a zero/sign-extract operation
385 represent an expression that matches an extend operation. The
386 operands represent the paramters from
387
388 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
389bool
390aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
391 rtx extract_imm)
392{
393 HOST_WIDE_INT mult_val, extract_val;
394
395 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
396 return false;
397
398 mult_val = INTVAL (mult_imm);
399 extract_val = INTVAL (extract_imm);
400
401 if (extract_val > 8
402 && extract_val < GET_MODE_BITSIZE (mode)
403 && exact_log2 (extract_val & ~7) > 0
404 && (extract_val & 7) <= 4
405 && mult_val == (1 << (extract_val & 7)))
406 return true;
407
408 return false;
409}
410
411/* Emit an insn that's a simple single-set. Both the operands must be
412 known to be valid. */
413inline static rtx
414emit_set_insn (rtx x, rtx y)
415{
416 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
417}
418
419/* X and Y are two things to compare using CODE. Emit the compare insn and
420 return the rtx for register 0 in the proper mode. */
421rtx
422aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
423{
424 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
425 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
426
427 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
428 return cc_reg;
429}
430
431/* Build the SYMBOL_REF for __tls_get_addr. */
432
433static GTY(()) rtx tls_get_addr_libfunc;
434
435rtx
436aarch64_tls_get_addr (void)
437{
438 if (!tls_get_addr_libfunc)
439 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
440 return tls_get_addr_libfunc;
441}
442
443/* Return the TLS model to use for ADDR. */
444
445static enum tls_model
446tls_symbolic_operand_type (rtx addr)
447{
448 enum tls_model tls_kind = TLS_MODEL_NONE;
449 rtx sym, addend;
450
451 if (GET_CODE (addr) == CONST)
452 {
453 split_const (addr, &sym, &addend);
454 if (GET_CODE (sym) == SYMBOL_REF)
455 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
456 }
457 else if (GET_CODE (addr) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
459
460 return tls_kind;
461}
462
463/* We'll allow lo_sum's in addresses in our legitimate addresses
464 so that combine would take care of combining addresses where
465 necessary, but for generation purposes, we'll generate the address
466 as :
467 RTL Absolute
468 tmp = hi (symbol_ref); adrp x1, foo
469 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
470 nop
471
472 PIC TLS
473 adrp x1, :got:foo adrp tmp, :tlsgd:foo
474 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
475 bl __tls_get_addr
476 nop
477
478 Load TLS symbol, depending on TLS mechanism and TLS access model.
479
480 Global Dynamic - Traditional TLS:
481 adrp tmp, :tlsgd:imm
482 add dest, tmp, #:tlsgd_lo12:imm
483 bl __tls_get_addr
484
485 Global Dynamic - TLS Descriptors:
486 adrp dest, :tlsdesc:imm
487 ldr tmp, [dest, #:tlsdesc_lo12:imm]
488 add dest, dest, #:tlsdesc_lo12:imm
489 blr tmp
490 mrs tp, tpidr_el0
491 add dest, dest, tp
492
493 Initial Exec:
494 mrs tp, tpidr_el0
495 adrp tmp, :gottprel:imm
496 ldr dest, [tmp, #:gottprel_lo12:imm]
497 add dest, dest, tp
498
499 Local Exec:
500 mrs tp, tpidr_el0
501 add t0, tp, #:tprel_hi12:imm
502 add t0, #:tprel_lo12_nc:imm
503*/
504
505static void
506aarch64_load_symref_appropriately (rtx dest, rtx imm,
507 enum aarch64_symbol_type type)
508{
509 switch (type)
510 {
511 case SYMBOL_SMALL_ABSOLUTE:
512 {
513 rtx tmp_reg = dest;
514 if (can_create_pseudo_p ())
515 {
516 tmp_reg = gen_reg_rtx (Pmode);
517 }
518
519 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
520 emit_insn (gen_add_losym (dest, tmp_reg, imm));
521 return;
522 }
523
524 case SYMBOL_SMALL_GOT:
525 {
526 rtx tmp_reg = dest;
527 if (can_create_pseudo_p ())
528 {
529 tmp_reg = gen_reg_rtx (Pmode);
530 }
531 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
532 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
533 return;
534 }
535
536 case SYMBOL_SMALL_TLSGD:
537 {
538 rtx insns;
539 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
540
541 start_sequence ();
542 emit_call_insn (gen_tlsgd_small (result, imm));
543 insns = get_insns ();
544 end_sequence ();
545
546 RTL_CONST_CALL_P (insns) = 1;
547 emit_libcall_block (insns, dest, result, imm);
548 return;
549 }
550
551 case SYMBOL_SMALL_TLSDESC:
552 {
553 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
554 rtx tp;
555
556 emit_insn (gen_tlsdesc_small (imm));
557 tp = aarch64_load_tp (NULL);
558 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
559 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
560 return;
561 }
562
563 case SYMBOL_SMALL_GOTTPREL:
564 {
565 rtx tmp_reg = gen_reg_rtx (Pmode);
566 rtx tp = aarch64_load_tp (NULL);
567 emit_insn (gen_tlsie_small (tmp_reg, imm));
568 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
569 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
570 return;
571 }
572
573 case SYMBOL_SMALL_TPREL:
574 {
575 rtx tp = aarch64_load_tp (NULL);
576 emit_insn (gen_tlsle_small (dest, tp, imm));
577 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
578 return;
579 }
580
581 default:
582 gcc_unreachable ();
583 }
584}
585
586/* Emit a move from SRC to DEST. Assume that the move expanders can
587 handle all moves if !can_create_pseudo_p (). The distinction is
588 important because, unlike emit_move_insn, the move expanders know
589 how to force Pmode objects into the constant pool even when the
590 constant pool address is not itself legitimate. */
591static rtx
592aarch64_emit_move (rtx dest, rtx src)
593{
594 return (can_create_pseudo_p ()
595 ? emit_move_insn (dest, src)
596 : emit_move_insn_1 (dest, src));
597}
598
599void
600aarch64_split_128bit_move (rtx dst, rtx src)
601{
602 rtx low_dst;
603
604 gcc_assert (GET_MODE (dst) == TImode);
605
606 if (REG_P (dst) && REG_P (src))
607 {
608 int src_regno = REGNO (src);
609 int dst_regno = REGNO (dst);
610
611 gcc_assert (GET_MODE (src) == TImode);
612
613 /* Handle r -> w, w -> r. */
614 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
615 {
616 emit_insn (gen_aarch64_movtilow_di (dst,
617 gen_lowpart (word_mode, src)));
618 emit_insn (gen_aarch64_movtihigh_di (dst,
619 gen_highpart (word_mode, src)));
620 return;
621 }
622 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
623 {
624 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
625 src));
626 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
627 src));
628 return;
629 }
630 /* Fall through to r -> r cases. */
631 }
632
633 low_dst = gen_lowpart (word_mode, dst);
634 if (REG_P (low_dst)
635 && reg_overlap_mentioned_p (low_dst, src))
636 {
637 aarch64_emit_move (gen_highpart (word_mode, dst),
638 gen_highpart_mode (word_mode, TImode, src));
639 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
640 }
641 else
642 {
643 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
644 aarch64_emit_move (gen_highpart (word_mode, dst),
645 gen_highpart_mode (word_mode, TImode, src));
646 }
647}
648
649bool
650aarch64_split_128bit_move_p (rtx dst, rtx src)
651{
652 return (! REG_P (src)
653 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
654}
655
656static rtx
657aarch64_force_temporary (rtx x, rtx value)
658{
659 if (can_create_pseudo_p ())
660 return force_reg (Pmode, value);
661 else
662 {
663 x = aarch64_emit_move (x, value);
664 return x;
665 }
666}
667
668
669static rtx
670aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
671{
672 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
673 {
674 rtx high;
675 /* Load the full offset into a register. This
676 might be improvable in the future. */
677 high = GEN_INT (offset);
678 offset = 0;
679 high = aarch64_force_temporary (temp, high);
680 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
681 }
682 return plus_constant (mode, reg, offset);
683}
684
685void
686aarch64_expand_mov_immediate (rtx dest, rtx imm)
687{
688 enum machine_mode mode = GET_MODE (dest);
689 unsigned HOST_WIDE_INT mask;
690 int i;
691 bool first;
692 unsigned HOST_WIDE_INT val;
693 bool subtargets;
694 rtx subtarget;
695 int one_match, zero_match;
696
697 gcc_assert (mode == SImode || mode == DImode);
698
699 /* Check on what type of symbol it is. */
700 if (GET_CODE (imm) == SYMBOL_REF
701 || GET_CODE (imm) == LABEL_REF
702 || GET_CODE (imm) == CONST)
703 {
704 rtx mem, base, offset;
705 enum aarch64_symbol_type sty;
706
707 /* If we have (const (plus symbol offset)), separate out the offset
708 before we start classifying the symbol. */
709 split_const (imm, &base, &offset);
710
711 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
712 switch (sty)
713 {
714 case SYMBOL_FORCE_TO_MEM:
715 if (offset != const0_rtx
716 && targetm.cannot_force_const_mem (mode, imm))
717 {
718 gcc_assert(can_create_pseudo_p ());
719 base = aarch64_force_temporary (dest, base);
720 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
721 aarch64_emit_move (dest, base);
722 return;
723 }
724 mem = force_const_mem (mode, imm);
725 gcc_assert (mem);
726 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
727 return;
728
729 case SYMBOL_SMALL_TLSGD:
730 case SYMBOL_SMALL_TLSDESC:
731 case SYMBOL_SMALL_GOTTPREL:
732 case SYMBOL_SMALL_GOT:
733 if (offset != const0_rtx)
734 {
735 gcc_assert(can_create_pseudo_p ());
736 base = aarch64_force_temporary (dest, base);
737 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
738 aarch64_emit_move (dest, base);
739 return;
740 }
741 /* FALLTHRU */
742
743 case SYMBOL_SMALL_TPREL:
744 case SYMBOL_SMALL_ABSOLUTE:
745 aarch64_load_symref_appropriately (dest, imm, sty);
746 return;
747
748 default:
749 gcc_unreachable ();
750 }
751 }
752
753 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
754 {
755 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
756 return;
757 }
758
759 if (!CONST_INT_P (imm))
760 {
761 if (GET_CODE (imm) == HIGH)
762 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
763 else
764 {
765 rtx mem = force_const_mem (mode, imm);
766 gcc_assert (mem);
767 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
768 }
769
770 return;
771 }
772
773 if (mode == SImode)
774 {
775 /* We know we can't do this in 1 insn, and we must be able to do it
776 in two; so don't mess around looking for sequences that don't buy
777 us anything. */
778 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
779 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
780 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
781 return;
782 }
783
784 /* Remaining cases are all for DImode. */
785
786 val = INTVAL (imm);
787 subtargets = optimize && can_create_pseudo_p ();
788
789 one_match = 0;
790 zero_match = 0;
791 mask = 0xffff;
792
793 for (i = 0; i < 64; i += 16, mask <<= 16)
794 {
795 if ((val & mask) == 0)
796 zero_match++;
797 else if ((val & mask) == mask)
798 one_match++;
799 }
800
801 if (one_match == 2)
802 {
803 mask = 0xffff;
804 for (i = 0; i < 64; i += 16, mask <<= 16)
805 {
806 if ((val & mask) != mask)
807 {
808 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
809 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
810 GEN_INT ((val >> i) & 0xffff)));
811 return;
812 }
813 }
814 gcc_unreachable ();
815 }
816
817 if (zero_match == 2)
818 goto simple_sequence;
819
820 mask = 0x0ffff0000UL;
821 for (i = 16; i < 64; i += 16, mask <<= 16)
822 {
823 HOST_WIDE_INT comp = mask & ~(mask - 1);
824
825 if (aarch64_uimm12_shift (val - (val & mask)))
826 {
827 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
828
829 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
830 emit_insn (gen_adddi3 (dest, subtarget,
831 GEN_INT (val - (val & mask))));
832 return;
833 }
834 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
835 {
836 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
837
838 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
839 GEN_INT ((val + comp) & mask)));
840 emit_insn (gen_adddi3 (dest, subtarget,
841 GEN_INT (val - ((val + comp) & mask))));
842 return;
843 }
844 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
845 {
846 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
847
848 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
849 GEN_INT ((val - comp) | ~mask)));
850 emit_insn (gen_adddi3 (dest, subtarget,
851 GEN_INT (val - ((val - comp) | ~mask))));
852 return;
853 }
854 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
855 {
856 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
857
858 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
859 GEN_INT (val | ~mask)));
860 emit_insn (gen_adddi3 (dest, subtarget,
861 GEN_INT (val - (val | ~mask))));
862 return;
863 }
864 }
865
866 /* See if we can do it by arithmetically combining two
867 immediates. */
868 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
869 {
870 int j;
871 mask = 0xffff;
872
873 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
874 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
875 {
876 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
877 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
878 GEN_INT (aarch64_bitmasks[i])));
879 emit_insn (gen_adddi3 (dest, subtarget,
880 GEN_INT (val - aarch64_bitmasks[i])));
881 return;
882 }
883
884 for (j = 0; j < 64; j += 16, mask <<= 16)
885 {
886 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
887 {
888 emit_insn (gen_rtx_SET (VOIDmode, dest,
889 GEN_INT (aarch64_bitmasks[i])));
890 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
891 GEN_INT ((val >> j) & 0xffff)));
892 return;
893 }
894 }
895 }
896
897 /* See if we can do it by logically combining two immediates. */
898 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
899 {
900 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
901 {
902 int j;
903
904 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
905 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
906 {
907 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
908 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
909 GEN_INT (aarch64_bitmasks[i])));
910 emit_insn (gen_iordi3 (dest, subtarget,
911 GEN_INT (aarch64_bitmasks[j])));
912 return;
913 }
914 }
915 else if ((val & aarch64_bitmasks[i]) == val)
916 {
917 int j;
918
919 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
920 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
921 {
922
923 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
924 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
925 GEN_INT (aarch64_bitmasks[j])));
926 emit_insn (gen_anddi3 (dest, subtarget,
927 GEN_INT (aarch64_bitmasks[i])));
928 return;
929 }
930 }
931 }
932
933 simple_sequence:
934 first = true;
935 mask = 0xffff;
936 for (i = 0; i < 64; i += 16, mask <<= 16)
937 {
938 if ((val & mask) != 0)
939 {
940 if (first)
941 {
942 emit_insn (gen_rtx_SET (VOIDmode, dest,
943 GEN_INT (val & mask)));
944 first = false;
945 }
946 else
947 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
948 GEN_INT ((val >> i) & 0xffff)));
949 }
950 }
951}
952
953static bool
954aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
955{
956 /* Indirect calls are not currently supported. */
957 if (decl == NULL)
958 return false;
959
960 /* Cannot tail-call to long-calls, since these are outside of the
961 range of a branch instruction (we could handle this if we added
962 support for indirect tail-calls. */
963 if (aarch64_decl_is_long_call_p (decl))
964 return false;
965
966 return true;
967}
968
969/* Implement TARGET_PASS_BY_REFERENCE. */
970
971static bool
972aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
973 enum machine_mode mode,
974 const_tree type,
975 bool named ATTRIBUTE_UNUSED)
976{
977 HOST_WIDE_INT size;
978 enum machine_mode dummymode;
979 int nregs;
980
981 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
982 size = (mode == BLKmode && type)
983 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
984
985 if (type)
986 {
987 /* Arrays always passed by reference. */
988 if (TREE_CODE (type) == ARRAY_TYPE)
989 return true;
990 /* Other aggregates based on their size. */
991 if (AGGREGATE_TYPE_P (type))
992 size = int_size_in_bytes (type);
993 }
994
995 /* Variable sized arguments are always returned by reference. */
996 if (size < 0)
997 return true;
998
999 /* Can this be a candidate to be passed in fp/simd register(s)? */
1000 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1001 &dummymode, &nregs,
1002 NULL))
1003 return false;
1004
1005 /* Arguments which are variable sized or larger than 2 registers are
1006 passed by reference unless they are a homogenous floating point
1007 aggregate. */
1008 return size > 2 * UNITS_PER_WORD;
1009}
1010
1011/* Return TRUE if VALTYPE is padded to its least significant bits. */
1012static bool
1013aarch64_return_in_msb (const_tree valtype)
1014{
1015 enum machine_mode dummy_mode;
1016 int dummy_int;
1017
1018 /* Never happens in little-endian mode. */
1019 if (!BYTES_BIG_ENDIAN)
1020 return false;
1021
1022 /* Only composite types smaller than or equal to 16 bytes can
1023 be potentially returned in registers. */
1024 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1025 || int_size_in_bytes (valtype) <= 0
1026 || int_size_in_bytes (valtype) > 16)
1027 return false;
1028
1029 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1030 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1031 is always passed/returned in the least significant bits of fp/simd
1032 register(s). */
1033 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1034 &dummy_mode, &dummy_int, NULL))
1035 return false;
1036
1037 return true;
1038}
1039
1040/* Implement TARGET_FUNCTION_VALUE.
1041 Define how to find the value returned by a function. */
1042
1043static rtx
1044aarch64_function_value (const_tree type, const_tree func,
1045 bool outgoing ATTRIBUTE_UNUSED)
1046{
1047 enum machine_mode mode;
1048 int unsignedp;
1049 int count;
1050 enum machine_mode ag_mode;
1051
1052 mode = TYPE_MODE (type);
1053 if (INTEGRAL_TYPE_P (type))
1054 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1055
1056 if (aarch64_return_in_msb (type))
1057 {
1058 HOST_WIDE_INT size = int_size_in_bytes (type);
1059
1060 if (size % UNITS_PER_WORD != 0)
1061 {
1062 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1063 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1064 }
1065 }
1066
1067 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1068 &ag_mode, &count, NULL))
1069 {
1070 if (!aarch64_composite_type_p (type, mode))
1071 {
1072 gcc_assert (count == 1 && mode == ag_mode);
1073 return gen_rtx_REG (mode, V0_REGNUM);
1074 }
1075 else
1076 {
1077 int i;
1078 rtx par;
1079
1080 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1081 for (i = 0; i < count; i++)
1082 {
1083 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1084 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1085 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1086 XVECEXP (par, 0, i) = tmp;
1087 }
1088 return par;
1089 }
1090 }
1091 else
1092 return gen_rtx_REG (mode, R0_REGNUM);
1093}
1094
1095/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1096 Return true if REGNO is the number of a hard register in which the values
1097 of called function may come back. */
1098
1099static bool
1100aarch64_function_value_regno_p (const unsigned int regno)
1101{
1102 /* Maximum of 16 bytes can be returned in the general registers. Examples
1103 of 16-byte return values are: 128-bit integers and 16-byte small
1104 structures (excluding homogeneous floating-point aggregates). */
1105 if (regno == R0_REGNUM || regno == R1_REGNUM)
1106 return true;
1107
1108 /* Up to four fp/simd registers can return a function value, e.g. a
1109 homogeneous floating-point aggregate having four members. */
1110 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1111 return !TARGET_GENERAL_REGS_ONLY;
1112
1113 return false;
1114}
1115
1116/* Implement TARGET_RETURN_IN_MEMORY.
1117
1118 If the type T of the result of a function is such that
1119 void func (T arg)
1120 would require that arg be passed as a value in a register (or set of
1121 registers) according to the parameter passing rules, then the result
1122 is returned in the same registers as would be used for such an
1123 argument. */
1124
1125static bool
1126aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1127{
1128 HOST_WIDE_INT size;
1129 enum machine_mode ag_mode;
1130 int count;
1131
1132 if (!AGGREGATE_TYPE_P (type)
1133 && TREE_CODE (type) != COMPLEX_TYPE
1134 && TREE_CODE (type) != VECTOR_TYPE)
1135 /* Simple scalar types always returned in registers. */
1136 return false;
1137
1138 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1139 type,
1140 &ag_mode,
1141 &count,
1142 NULL))
1143 return false;
1144
1145 /* Types larger than 2 registers returned in memory. */
1146 size = int_size_in_bytes (type);
1147 return (size < 0 || size > 2 * UNITS_PER_WORD);
1148}
1149
1150static bool
1151aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1152 const_tree type, int *nregs)
1153{
1154 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1155 return aarch64_vfp_is_call_or_return_candidate (mode,
1156 type,
1157 &pcum->aapcs_vfp_rmode,
1158 nregs,
1159 NULL);
1160}
1161
1162/* Given MODE and TYPE of a function argument, return the alignment in
1163 bits. The idea is to suppress any stronger alignment requested by
1164 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1165 This is a helper function for local use only. */
1166
1167static unsigned int
1168aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1169{
1170 unsigned int alignment;
1171
1172 if (type)
1173 {
1174 if (!integer_zerop (TYPE_SIZE (type)))
1175 {
1176 if (TYPE_MODE (type) == mode)
1177 alignment = TYPE_ALIGN (type);
1178 else
1179 alignment = GET_MODE_ALIGNMENT (mode);
1180 }
1181 else
1182 alignment = 0;
1183 }
1184 else
1185 alignment = GET_MODE_ALIGNMENT (mode);
1186
1187 return alignment;
1188}
1189
1190/* Layout a function argument according to the AAPCS64 rules. The rule
1191 numbers refer to the rule numbers in the AAPCS64. */
1192
1193static void
1194aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1195 const_tree type,
1196 bool named ATTRIBUTE_UNUSED)
1197{
1198 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1199 int ncrn, nvrn, nregs;
1200 bool allocate_ncrn, allocate_nvrn;
1201
1202 /* We need to do this once per argument. */
1203 if (pcum->aapcs_arg_processed)
1204 return;
1205
1206 pcum->aapcs_arg_processed = true;
1207
1208 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1209 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1210 mode,
1211 type,
1212 &nregs);
1213
1214 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1215 The following code thus handles passing by SIMD/FP registers first. */
1216
1217 nvrn = pcum->aapcs_nvrn;
1218
1219 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1220 and homogenous short-vector aggregates (HVA). */
1221 if (allocate_nvrn)
1222 {
1223 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1224 {
1225 pcum->aapcs_nextnvrn = nvrn + nregs;
1226 if (!aarch64_composite_type_p (type, mode))
1227 {
1228 gcc_assert (nregs == 1);
1229 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1230 }
1231 else
1232 {
1233 rtx par;
1234 int i;
1235 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1236 for (i = 0; i < nregs; i++)
1237 {
1238 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1239 V0_REGNUM + nvrn + i);
1240 tmp = gen_rtx_EXPR_LIST
1241 (VOIDmode, tmp,
1242 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1243 XVECEXP (par, 0, i) = tmp;
1244 }
1245 pcum->aapcs_reg = par;
1246 }
1247 return;
1248 }
1249 else
1250 {
1251 /* C.3 NSRN is set to 8. */
1252 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1253 goto on_stack;
1254 }
1255 }
1256
1257 ncrn = pcum->aapcs_ncrn;
1258 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1259 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1260
1261
1262 /* C6 - C9. though the sign and zero extension semantics are
1263 handled elsewhere. This is the case where the argument fits
1264 entirely general registers. */
1265 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1266 {
1267 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1268
1269 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1270
1271 /* C.8 if the argument has an alignment of 16 then the NGRN is
1272 rounded up to the next even number. */
1273 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1274 {
1275 ++ncrn;
1276 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1277 }
1278 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1279 A reg is still generated for it, but the caller should be smart
1280 enough not to use it. */
1281 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1282 {
1283 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1284 }
1285 else
1286 {
1287 rtx par;
1288 int i;
1289
1290 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1291 for (i = 0; i < nregs; i++)
1292 {
1293 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1294 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1295 GEN_INT (i * UNITS_PER_WORD));
1296 XVECEXP (par, 0, i) = tmp;
1297 }
1298 pcum->aapcs_reg = par;
1299 }
1300
1301 pcum->aapcs_nextncrn = ncrn + nregs;
1302 return;
1303 }
1304
1305 /* C.11 */
1306 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1307
1308 /* The argument is passed on stack; record the needed number of words for
1309 this argument (we can re-use NREGS) and align the total size if
1310 necessary. */
1311on_stack:
1312 pcum->aapcs_stack_words = nregs;
1313 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1314 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1315 16 / UNITS_PER_WORD) + 1;
1316 return;
1317}
1318
1319/* Implement TARGET_FUNCTION_ARG. */
1320
1321static rtx
1322aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1323 const_tree type, bool named)
1324{
1325 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1326 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1327
1328 if (mode == VOIDmode)
1329 return NULL_RTX;
1330
1331 aarch64_layout_arg (pcum_v, mode, type, named);
1332 return pcum->aapcs_reg;
1333}
1334
1335void
1336aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1337 const_tree fntype ATTRIBUTE_UNUSED,
1338 rtx libname ATTRIBUTE_UNUSED,
1339 const_tree fndecl ATTRIBUTE_UNUSED,
1340 unsigned n_named ATTRIBUTE_UNUSED)
1341{
1342 pcum->aapcs_ncrn = 0;
1343 pcum->aapcs_nvrn = 0;
1344 pcum->aapcs_nextncrn = 0;
1345 pcum->aapcs_nextnvrn = 0;
1346 pcum->pcs_variant = ARM_PCS_AAPCS64;
1347 pcum->aapcs_reg = NULL_RTX;
1348 pcum->aapcs_arg_processed = false;
1349 pcum->aapcs_stack_words = 0;
1350 pcum->aapcs_stack_size = 0;
1351
1352 return;
1353}
1354
1355static void
1356aarch64_function_arg_advance (cumulative_args_t pcum_v,
1357 enum machine_mode mode,
1358 const_tree type,
1359 bool named)
1360{
1361 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1362 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1363 {
1364 aarch64_layout_arg (pcum_v, mode, type, named);
1365 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1366 != (pcum->aapcs_stack_words != 0));
1367 pcum->aapcs_arg_processed = false;
1368 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1369 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1370 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1371 pcum->aapcs_stack_words = 0;
1372 pcum->aapcs_reg = NULL_RTX;
1373 }
1374}
1375
1376bool
1377aarch64_function_arg_regno_p (unsigned regno)
1378{
1379 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1380 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1381}
1382
1383/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1384 PARM_BOUNDARY bits of alignment, but will be given anything up
1385 to STACK_BOUNDARY bits if the type requires it. This makes sure
1386 that both before and after the layout of each argument, the Next
1387 Stacked Argument Address (NSAA) will have a minimum alignment of
1388 8 bytes. */
1389
1390static unsigned int
1391aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1392{
1393 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1394
1395 if (alignment < PARM_BOUNDARY)
1396 alignment = PARM_BOUNDARY;
1397 if (alignment > STACK_BOUNDARY)
1398 alignment = STACK_BOUNDARY;
1399 return alignment;
1400}
1401
1402/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1403
1404 Return true if an argument passed on the stack should be padded upwards,
1405 i.e. if the least-significant byte of the stack slot has useful data.
1406
1407 Small aggregate types are placed in the lowest memory address.
1408
1409 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1410
1411bool
1412aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1413{
1414 /* On little-endian targets, the least significant byte of every stack
1415 argument is passed at the lowest byte address of the stack slot. */
1416 if (!BYTES_BIG_ENDIAN)
1417 return true;
1418
1419 /* Otherwise, integral types and floating point types are padded downward:
1420 the least significant byte of a stack argument is passed at the highest
1421 byte address of the stack slot. */
1422 if (type
1423 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1424 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1425 return false;
1426
1427 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1428 return true;
1429}
1430
1431/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1432
1433 It specifies padding for the last (may also be the only)
1434 element of a block move between registers and memory. If
1435 assuming the block is in the memory, padding upward means that
1436 the last element is padded after its highest significant byte,
1437 while in downward padding, the last element is padded at the
1438 its least significant byte side.
1439
1440 Small aggregates and small complex types are always padded
1441 upwards.
1442
1443 We don't need to worry about homogeneous floating-point or
1444 short-vector aggregates; their move is not affected by the
1445 padding direction determined here. Regardless of endianness,
1446 each element of such an aggregate is put in the least
1447 significant bits of a fp/simd register.
1448
1449 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1450 register has useful data, and return the opposite if the most
1451 significant byte does. */
1452
1453bool
1454aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1455 bool first ATTRIBUTE_UNUSED)
1456{
1457
1458 /* Small composite types are always padded upward. */
1459 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1460 {
1461 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1462 : GET_MODE_SIZE (mode));
1463 if (size < 2 * UNITS_PER_WORD)
1464 return true;
1465 }
1466
1467 /* Otherwise, use the default padding. */
1468 return !BYTES_BIG_ENDIAN;
1469}
1470
1471static enum machine_mode
1472aarch64_libgcc_cmp_return_mode (void)
1473{
1474 return SImode;
1475}
1476
1477static bool
1478aarch64_frame_pointer_required (void)
1479{
1480 /* If the function contains dynamic stack allocations, we need to
1481 use the frame pointer to access the static parts of the frame. */
1482 if (cfun->calls_alloca)
1483 return true;
1484
1485 /* We may have turned flag_omit_frame_pointer on in order to have this
1486 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1487 and we'll check it here.
1488 If we really did set flag_omit_frame_pointer normally, then we return false
1489 (no frame pointer required) in all cases. */
1490
1491 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1492 return false;
1493 else if (flag_omit_leaf_frame_pointer)
1494 return !crtl->is_leaf;
1495 return true;
1496}
1497
1498/* Mark the registers that need to be saved by the callee and calculate
1499 the size of the callee-saved registers area and frame record (both FP
1500 and LR may be omitted). */
1501static void
1502aarch64_layout_frame (void)
1503{
1504 HOST_WIDE_INT offset = 0;
1505 int regno;
1506
1507 if (reload_completed && cfun->machine->frame.laid_out)
1508 return;
1509
1510 cfun->machine->frame.fp_lr_offset = 0;
1511
1512 /* First mark all the registers that really need to be saved... */
1513 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1514 cfun->machine->frame.reg_offset[regno] = -1;
1515
1516 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1518
1519 /* ... that includes the eh data registers (if needed)... */
1520 if (crtl->calls_eh_return)
1521 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1522 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1523
1524 /* ... and any callee saved register that dataflow says is live. */
1525 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1526 if (df_regs_ever_live_p (regno)
1527 && !call_used_regs[regno])
1528 cfun->machine->frame.reg_offset[regno] = 0;
1529
1530 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1531 if (df_regs_ever_live_p (regno)
1532 && !call_used_regs[regno])
1533 cfun->machine->frame.reg_offset[regno] = 0;
1534
1535 if (frame_pointer_needed)
1536 {
1537 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1538 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1539 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1540 }
1541
1542 /* Now assign stack slots for them. */
1543 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1544 if (cfun->machine->frame.reg_offset[regno] != -1)
1545 {
1546 cfun->machine->frame.reg_offset[regno] = offset;
1547 offset += UNITS_PER_WORD;
1548 }
1549
1550 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1551 if (cfun->machine->frame.reg_offset[regno] != -1)
1552 {
1553 cfun->machine->frame.reg_offset[regno] = offset;
1554 offset += UNITS_PER_WORD;
1555 }
1556
1557 if (frame_pointer_needed)
1558 {
1559 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1560 offset += UNITS_PER_WORD;
1561 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1562 }
1563
1564 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1565 {
1566 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1567 offset += UNITS_PER_WORD;
1568 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1569 }
1570
1571 cfun->machine->frame.padding0 =
1572 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1573 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1574
1575 cfun->machine->frame.saved_regs_size = offset;
1576 cfun->machine->frame.laid_out = true;
1577}
1578
1579/* Make the last instruction frame-related and note that it performs
1580 the operation described by FRAME_PATTERN. */
1581
1582static void
1583aarch64_set_frame_expr (rtx frame_pattern)
1584{
1585 rtx insn;
1586
1587 insn = get_last_insn ();
1588 RTX_FRAME_RELATED_P (insn) = 1;
1589 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1590 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1591 frame_pattern,
1592 REG_NOTES (insn));
1593}
1594
1595static bool
1596aarch64_register_saved_on_entry (int regno)
1597{
1598 return cfun->machine->frame.reg_offset[regno] != -1;
1599}
1600
1601
1602static void
1603aarch64_save_or_restore_fprs (int start_offset, int increment,
1604 bool restore, rtx base_rtx)
1605
1606{
1607 unsigned regno;
1608 unsigned regno2;
1609 rtx insn;
1610 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1611
1612
1613 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1614 {
1615 if (aarch64_register_saved_on_entry (regno))
1616 {
1617 rtx mem;
1618 mem = gen_mem_ref (DFmode,
1619 plus_constant (Pmode,
1620 base_rtx,
1621 start_offset));
1622
1623 for (regno2 = regno + 1;
1624 regno2 <= V31_REGNUM
1625 && !aarch64_register_saved_on_entry (regno2);
1626 regno2++)
1627 {
1628 /* Empty loop. */
1629 }
1630 if (regno2 <= V31_REGNUM &&
1631 aarch64_register_saved_on_entry (regno2))
1632 {
1633 rtx mem2;
1634 /* Next highest register to be saved. */
1635 mem2 = gen_mem_ref (DFmode,
1636 plus_constant
1637 (Pmode,
1638 base_rtx,
1639 start_offset + increment));
1640 if (restore == false)
1641 {
1642 insn = emit_insn
1643 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1644 mem2, gen_rtx_REG (DFmode, regno2)));
1645
1646 }
1647 else
1648 {
1649 insn = emit_insn
1650 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1651 gen_rtx_REG (DFmode, regno2), mem2));
1652
1653 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1654 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1655 }
1656
1657 /* The first part of a frame-related parallel insn
1658 is always assumed to be relevant to the frame
1659 calculations; subsequent parts, are only
1660 frame-related if explicitly marked. */
1661 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1662 1)) = 1;
1663 regno = regno2;
1664 start_offset += increment * 2;
1665 }
1666 else
1667 {
1668 if (restore == false)
1669 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1670 else
1671 {
1672 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1673 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1674 }
1675 start_offset += increment;
1676 }
1677 RTX_FRAME_RELATED_P (insn) = 1;
1678 }
1679 }
1680
1681}
1682
1683
1684/* offset from the stack pointer of where the saves and
1685 restore's have to happen. */
1686static void
1687aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1688 bool restore)
1689{
1690 rtx insn;
1691 rtx base_rtx = stack_pointer_rtx;
1692 HOST_WIDE_INT start_offset = offset;
1693 HOST_WIDE_INT increment = UNITS_PER_WORD;
1694 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1695 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1696 unsigned regno;
1697 unsigned regno2;
1698
1699 for (regno = R0_REGNUM; regno <= limit; regno++)
1700 {
1701 if (aarch64_register_saved_on_entry (regno))
1702 {
1703 rtx mem;
1704 mem = gen_mem_ref (Pmode,
1705 plus_constant (Pmode,
1706 base_rtx,
1707 start_offset));
1708
1709 for (regno2 = regno + 1;
1710 regno2 <= limit
1711 && !aarch64_register_saved_on_entry (regno2);
1712 regno2++)
1713 {
1714 /* Empty loop. */
1715 }
1716 if (regno2 <= limit &&
1717 aarch64_register_saved_on_entry (regno2))
1718 {
1719 rtx mem2;
1720 /* Next highest register to be saved. */
1721 mem2 = gen_mem_ref (Pmode,
1722 plus_constant
1723 (Pmode,
1724 base_rtx,
1725 start_offset + increment));
1726 if (restore == false)
1727 {
1728 insn = emit_insn
1729 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1730 mem2, gen_rtx_REG (DImode, regno2)));
1731
1732 }
1733 else
1734 {
1735 insn = emit_insn
1736 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1737 gen_rtx_REG (DImode, regno2), mem2));
1738
1739 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1740 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1741 }
1742
1743 /* The first part of a frame-related parallel insn
1744 is always assumed to be relevant to the frame
1745 calculations; subsequent parts, are only
1746 frame-related if explicitly marked. */
1747 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1748 1)) = 1;
1749 regno = regno2;
1750 start_offset += increment * 2;
1751 }
1752 else
1753 {
1754 if (restore == false)
1755 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1756 else
1757 {
1758 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1759 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1760 }
1761 start_offset += increment;
1762 }
1763 RTX_FRAME_RELATED_P (insn) = 1;
1764 }
1765 }
1766
1767 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1768
1769}
1770
1771/* AArch64 stack frames generated by this compiler look like:
1772
1773 +-------------------------------+
1774 | |
1775 | incoming stack arguments |
1776 | |
1777 +-------------------------------+ <-- arg_pointer_rtx
1778 | |
1779 | callee-allocated save area |
1780 | for register varargs |
1781 | |
1782 +-------------------------------+
1783 | |
1784 | local variables |
1785 | |
1786 +-------------------------------+ <-- frame_pointer_rtx
1787 | |
1788 | callee-saved registers |
1789 | |
1790 +-------------------------------+
1791 | LR' |
1792 +-------------------------------+
1793 | FP' |
1794 P +-------------------------------+ <-- hard_frame_pointer_rtx
1795 | dynamic allocation |
1796 +-------------------------------+
1797 | |
1798 | outgoing stack arguments |
1799 | |
1800 +-------------------------------+ <-- stack_pointer_rtx
1801
1802 Dynamic stack allocations such as alloca insert data at point P.
1803 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1804 hard_frame_pointer_rtx unchanged. */
1805
1806/* Generate the prologue instructions for entry into a function.
1807 Establish the stack frame by decreasing the stack pointer with a
1808 properly calculated size and, if necessary, create a frame record
1809 filled with the values of LR and previous frame pointer. The
1810 current FP is also set up is it is in use. */
1811
1812void
1813aarch64_expand_prologue (void)
1814{
1815 /* sub sp, sp, #<frame_size>
1816 stp {fp, lr}, [sp, #<frame_size> - 16]
1817 add fp, sp, #<frame_size> - hardfp_offset
1818 stp {cs_reg}, [fp, #-16] etc.
1819
1820 sub sp, sp, <final_adjustment_if_any>
1821 */
1822 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1823 HOST_WIDE_INT frame_size, offset;
1824 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1825 rtx insn;
1826
1827 aarch64_layout_frame ();
1828 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1829 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1830 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1831 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1832 + crtl->outgoing_args_size);
1833 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1834 STACK_BOUNDARY / BITS_PER_UNIT);
1835
1836 if (flag_stack_usage_info)
1837 current_function_static_stack_size = frame_size;
1838
1839 fp_offset = (offset
1840 - original_frame_size
1841 - cfun->machine->frame.saved_regs_size);
1842
44c0e7b9 1843 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
1844 if (offset >= 512)
1845 {
1846 /* When the frame has a large size, an initial decrease is done on
1847 the stack pointer to jump over the callee-allocated save area for
1848 register varargs, the local variable area and/or the callee-saved
1849 register area. This will allow the pre-index write-back
1850 store pair instructions to be used for setting up the stack frame
1851 efficiently. */
1852 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1853 if (offset >= 512)
1854 offset = cfun->machine->frame.saved_regs_size;
1855
1856 frame_size -= (offset + crtl->outgoing_args_size);
1857 fp_offset = 0;
1858
1859 if (frame_size >= 0x1000000)
1860 {
1861 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1862 emit_move_insn (op0, GEN_INT (-frame_size));
1863 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1864 aarch64_set_frame_expr (gen_rtx_SET
1865 (Pmode, stack_pointer_rtx,
1866 gen_rtx_PLUS (Pmode,
1867 stack_pointer_rtx,
1868 GEN_INT (-frame_size))));
1869 }
1870 else if (frame_size > 0)
1871 {
1872 if ((frame_size & 0xfff) != frame_size)
1873 {
1874 insn = emit_insn (gen_add2_insn
1875 (stack_pointer_rtx,
1876 GEN_INT (-(frame_size
1877 & ~(HOST_WIDE_INT)0xfff))));
1878 RTX_FRAME_RELATED_P (insn) = 1;
1879 }
1880 if ((frame_size & 0xfff) != 0)
1881 {
1882 insn = emit_insn (gen_add2_insn
1883 (stack_pointer_rtx,
1884 GEN_INT (-(frame_size
1885 & (HOST_WIDE_INT)0xfff))));
1886 RTX_FRAME_RELATED_P (insn) = 1;
1887 }
1888 }
1889 }
1890 else
1891 frame_size = -1;
1892
1893 if (offset > 0)
1894 {
1895 /* Save the frame pointer and lr if the frame pointer is needed
1896 first. Make the frame pointer point to the location of the
1897 old frame pointer on the stack. */
1898 if (frame_pointer_needed)
1899 {
1900 rtx mem_fp, mem_lr;
1901
1902 if (fp_offset)
1903 {
1904 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1905 GEN_INT (-offset)));
1906 RTX_FRAME_RELATED_P (insn) = 1;
1907 aarch64_set_frame_expr (gen_rtx_SET
1908 (Pmode, stack_pointer_rtx,
1909 gen_rtx_MINUS (Pmode,
1910 stack_pointer_rtx,
1911 GEN_INT (offset))));
1912 mem_fp = gen_frame_mem (DImode,
1913 plus_constant (Pmode,
1914 stack_pointer_rtx,
1915 fp_offset));
1916 mem_lr = gen_frame_mem (DImode,
1917 plus_constant (Pmode,
1918 stack_pointer_rtx,
1919 fp_offset
1920 + UNITS_PER_WORD));
1921 insn = emit_insn (gen_store_pairdi (mem_fp,
1922 hard_frame_pointer_rtx,
1923 mem_lr,
1924 gen_rtx_REG (DImode,
1925 LR_REGNUM)));
1926 }
1927 else
1928 {
1929 insn = emit_insn (gen_storewb_pairdi_di
1930 (stack_pointer_rtx, stack_pointer_rtx,
1931 hard_frame_pointer_rtx,
1932 gen_rtx_REG (DImode, LR_REGNUM),
1933 GEN_INT (-offset),
1934 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1935 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1936 }
1937
1938 /* The first part of a frame-related parallel insn is always
1939 assumed to be relevant to the frame calculations;
1940 subsequent parts, are only frame-related if explicitly
1941 marked. */
1942 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1943 RTX_FRAME_RELATED_P (insn) = 1;
1944
1945 /* Set up frame pointer to point to the location of the
1946 previous frame pointer on the stack. */
1947 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1948 stack_pointer_rtx,
1949 GEN_INT (fp_offset)));
1950 aarch64_set_frame_expr (gen_rtx_SET
1951 (Pmode, hard_frame_pointer_rtx,
1952 gen_rtx_PLUS (Pmode,
1953 stack_pointer_rtx,
1954 GEN_INT (fp_offset))));
1955 RTX_FRAME_RELATED_P (insn) = 1;
1956 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1957 hard_frame_pointer_rtx));
1958 }
1959 else
1960 {
1961 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1962 GEN_INT (-offset)));
1963 RTX_FRAME_RELATED_P (insn) = 1;
1964 }
1965
1966 aarch64_save_or_restore_callee_save_registers
1967 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1968 }
1969
1970 /* when offset >= 512,
1971 sub sp, sp, #<outgoing_args_size> */
1972 if (frame_size > -1)
1973 {
1974 if (crtl->outgoing_args_size > 0)
1975 {
1976 insn = emit_insn (gen_add2_insn
1977 (stack_pointer_rtx,
1978 GEN_INT (- crtl->outgoing_args_size)));
1979 RTX_FRAME_RELATED_P (insn) = 1;
1980 }
1981 }
1982}
1983
1984/* Generate the epilogue instructions for returning from a function. */
1985void
1986aarch64_expand_epilogue (bool for_sibcall)
1987{
1988 HOST_WIDE_INT original_frame_size, frame_size, offset;
1989 HOST_WIDE_INT fp_offset;
1990 rtx insn;
44c0e7b9 1991 rtx cfa_reg;
43e9d192
IB
1992
1993 aarch64_layout_frame ();
1994 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1995 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1996 + crtl->outgoing_args_size);
1997 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1998 STACK_BOUNDARY / BITS_PER_UNIT);
1999
2000 fp_offset = (offset
2001 - original_frame_size
2002 - cfun->machine->frame.saved_regs_size);
2003
44c0e7b9
YZ
2004 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2005
2006 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2007 if (offset >= 512)
2008 {
2009 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2010 if (offset >= 512)
2011 offset = cfun->machine->frame.saved_regs_size;
2012
2013 frame_size -= (offset + crtl->outgoing_args_size);
2014 fp_offset = 0;
2015 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2016 {
2017 insn = emit_insn (gen_add2_insn
2018 (stack_pointer_rtx,
2019 GEN_INT (crtl->outgoing_args_size)));
2020 RTX_FRAME_RELATED_P (insn) = 1;
2021 }
2022 }
2023 else
2024 frame_size = -1;
2025
2026 /* If there were outgoing arguments or we've done dynamic stack
2027 allocation, then restore the stack pointer from the frame
2028 pointer. This is at most one insn and more efficient than using
2029 GCC's internal mechanism. */
2030 if (frame_pointer_needed
2031 && (crtl->outgoing_args_size || cfun->calls_alloca))
2032 {
2033 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2034 hard_frame_pointer_rtx,
2035 GEN_INT (- fp_offset)));
2036 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2037 /* As SP is set to (FP - fp_offset), according to the rules in
2038 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2039 from the value of SP from now on. */
2040 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2041 }
2042
2043 aarch64_save_or_restore_callee_save_registers
2044 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2045
2046 /* Restore the frame pointer and lr if the frame pointer is needed. */
2047 if (offset > 0)
2048 {
2049 if (frame_pointer_needed)
2050 {
2051 rtx mem_fp, mem_lr;
2052
2053 if (fp_offset)
2054 {
2055 mem_fp = gen_frame_mem (DImode,
2056 plus_constant (Pmode,
2057 stack_pointer_rtx,
2058 fp_offset));
2059 mem_lr = gen_frame_mem (DImode,
2060 plus_constant (Pmode,
2061 stack_pointer_rtx,
2062 fp_offset
2063 + UNITS_PER_WORD));
2064 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2065 mem_fp,
2066 gen_rtx_REG (DImode,
2067 LR_REGNUM),
2068 mem_lr));
2069 }
2070 else
2071 {
2072 insn = emit_insn (gen_loadwb_pairdi_di
2073 (stack_pointer_rtx,
2074 stack_pointer_rtx,
2075 hard_frame_pointer_rtx,
2076 gen_rtx_REG (DImode, LR_REGNUM),
2077 GEN_INT (offset),
2078 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2079 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2080 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2081 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2082 plus_constant (Pmode, cfa_reg,
2083 offset))));
43e9d192
IB
2084 }
2085
2086 /* The first part of a frame-related parallel insn
2087 is always assumed to be relevant to the frame
2088 calculations; subsequent parts, are only
2089 frame-related if explicitly marked. */
2090 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2091 RTX_FRAME_RELATED_P (insn) = 1;
2092 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2093 add_reg_note (insn, REG_CFA_RESTORE,
2094 gen_rtx_REG (DImode, LR_REGNUM));
2095
2096 if (fp_offset)
2097 {
2098 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2099 GEN_INT (offset)));
2100 RTX_FRAME_RELATED_P (insn) = 1;
2101 }
2102 }
43e9d192
IB
2103 else
2104 {
2105 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2106 GEN_INT (offset)));
2107 RTX_FRAME_RELATED_P (insn) = 1;
2108 }
2109 }
2110
2111 /* Stack adjustment for exception handler. */
2112 if (crtl->calls_eh_return)
2113 {
2114 /* We need to unwind the stack by the offset computed by
2115 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2116 based on SP. Ideally we would update the SP and define the
2117 CFA along the lines of:
2118
2119 SP = SP + EH_RETURN_STACKADJ_RTX
2120 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2121
2122 However the dwarf emitter only understands a constant
2123 register offset.
2124
2125 The solution choosen here is to use the otherwise unused IP0
2126 as a temporary register to hold the current SP value. The
2127 CFA is described using IP0 then SP is modified. */
2128
2129 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2130
2131 insn = emit_move_insn (ip0, stack_pointer_rtx);
2132 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2133 RTX_FRAME_RELATED_P (insn) = 1;
2134
2135 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2136
2137 /* Ensure the assignment to IP0 does not get optimized away. */
2138 emit_use (ip0);
2139 }
2140
2141 if (frame_size > -1)
2142 {
2143 if (frame_size >= 0x1000000)
2144 {
2145 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2146 emit_move_insn (op0, GEN_INT (frame_size));
2147 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2148 aarch64_set_frame_expr (gen_rtx_SET
2149 (Pmode, stack_pointer_rtx,
2150 gen_rtx_PLUS (Pmode,
2151 stack_pointer_rtx,
2152 GEN_INT (frame_size))));
2153 }
2154 else if (frame_size > 0)
2155 {
2156 if ((frame_size & 0xfff) != 0)
2157 {
2158 insn = emit_insn (gen_add2_insn
2159 (stack_pointer_rtx,
2160 GEN_INT ((frame_size
2161 & (HOST_WIDE_INT) 0xfff))));
2162 RTX_FRAME_RELATED_P (insn) = 1;
2163 }
2164 if ((frame_size & 0xfff) != frame_size)
2165 {
2166 insn = emit_insn (gen_add2_insn
2167 (stack_pointer_rtx,
2168 GEN_INT ((frame_size
2169 & ~ (HOST_WIDE_INT) 0xfff))));
2170 RTX_FRAME_RELATED_P (insn) = 1;
2171 }
2172 }
2173
2174 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2175 gen_rtx_PLUS (Pmode,
2176 stack_pointer_rtx,
2177 GEN_INT (offset))));
2178 }
2179
2180 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2181 if (!for_sibcall)
2182 emit_jump_insn (ret_rtx);
2183}
2184
2185/* Return the place to copy the exception unwinding return address to.
2186 This will probably be a stack slot, but could (in theory be the
2187 return register). */
2188rtx
2189aarch64_final_eh_return_addr (void)
2190{
2191 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2192 aarch64_layout_frame ();
2193 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2194 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2195 + crtl->outgoing_args_size);
2196 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2197 STACK_BOUNDARY / BITS_PER_UNIT);
2198 fp_offset = offset
2199 - original_frame_size
2200 - cfun->machine->frame.saved_regs_size;
2201
2202 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2203 return gen_rtx_REG (DImode, LR_REGNUM);
2204
2205 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2206 result in a store to save LR introduced by builtin_eh_return () being
2207 incorrectly deleted because the alias is not detected.
2208 So in the calculation of the address to copy the exception unwinding
2209 return address to, we note 2 cases.
2210 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2211 we return a SP-relative location since all the addresses are SP-relative
2212 in this case. This prevents the store from being optimized away.
2213 If the fp_offset is not 0, then the addresses will be FP-relative and
2214 therefore we return a FP-relative location. */
2215
2216 if (frame_pointer_needed)
2217 {
2218 if (fp_offset)
2219 return gen_frame_mem (DImode,
2220 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2221 else
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2224 }
2225
2226 /* If FP is not needed, we calculate the location of LR, which would be
2227 at the top of the saved registers block. */
2228
2229 return gen_frame_mem (DImode,
2230 plus_constant (Pmode,
2231 stack_pointer_rtx,
2232 fp_offset
2233 + cfun->machine->frame.saved_regs_size
2234 - 2 * UNITS_PER_WORD));
2235}
2236
2237/* Output code to build up a constant in a register. */
2238static void
2239aarch64_build_constant (FILE *file,
2240 int regnum,
2241 HOST_WIDE_INT val)
2242{
2243 if (aarch64_bitmask_imm (val, DImode))
2244 asm_fprintf (file, "\tmovi\t%r, %wd\n", regnum, val);
2245 else
2246 {
2247 int i;
2248 int ncount = 0;
2249 int zcount = 0;
2250 HOST_WIDE_INT valp = val >> 16;
2251 HOST_WIDE_INT valm;
2252 HOST_WIDE_INT tval;
2253
2254 for (i = 16; i < 64; i += 16)
2255 {
2256 valm = (valp & 0xffff);
2257
2258 if (valm != 0)
2259 ++ zcount;
2260
2261 if (valm != 0xffff)
2262 ++ ncount;
2263
2264 valp >>= 16;
2265 }
2266
2267 /* zcount contains the number of additional MOVK instructions
2268 required if the constant is built up with an initial MOVZ instruction,
2269 while ncount is the number of MOVK instructions required if starting
2270 with a MOVN instruction. Choose the sequence that yields the fewest
2271 number of instructions, preferring MOVZ instructions when they are both
2272 the same. */
2273 if (ncount < zcount)
2274 {
2275 asm_fprintf (file, "\tmovn\t%r, %wd\n", regnum, (~val) & 0xffff);
2276 tval = 0xffff;
2277 }
2278 else
2279 {
2280 asm_fprintf (file, "\tmovz\t%r, %wd\n", regnum, val & 0xffff);
2281 tval = 0;
2282 }
2283
2284 val >>= 16;
2285
2286 for (i = 16; i < 64; i += 16)
2287 {
2288 if ((val & 0xffff) != tval)
2289 asm_fprintf (file, "\tmovk\t%r, %wd, lsl %d\n",
2290 regnum, val & 0xffff, i);
2291 val >>= 16;
2292 }
2293 }
2294}
2295
2296static void
2297aarch64_add_constant (FILE *file, int regnum, int scratchreg,
2298 HOST_WIDE_INT delta)
2299{
2300 HOST_WIDE_INT mdelta = delta;
2301
2302 if (mdelta < 0)
2303 mdelta = -mdelta;
2304
2305 if (mdelta >= 4096 * 4096)
2306 {
2307 aarch64_build_constant (file, scratchreg, delta);
2308 asm_fprintf (file, "\tadd\t%r, %r, %r\n", regnum, regnum,
2309 scratchreg);
2310 }
2311 else if (mdelta > 0)
2312 {
2313 const char *const mi_op = delta < 0 ? "sub" : "add";
2314
2315 if (mdelta >= 4096)
2316 asm_fprintf (file, "\t%s\t%r, %r, %wd, lsl 12\n", mi_op, regnum, regnum,
2317 mdelta / 4096);
2318
2319 if (mdelta % 4096 != 0)
2320 asm_fprintf (file, "\t%s\t%r, %r, %wd\n", mi_op, regnum, regnum,
2321 mdelta % 4096);
2322 }
2323}
2324
2325/* Output code to add DELTA to the first argument, and then jump
2326 to FUNCTION. Used for C++ multiple inheritance. */
2327static void
2328aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2329 HOST_WIDE_INT delta,
2330 HOST_WIDE_INT vcall_offset,
2331 tree function)
2332{
2333 /* The this pointer is always in x0. Note that this differs from
2334 Arm where the this pointer maybe bumped to r1 if r0 is required
2335 to return a pointer to an aggregate. On AArch64 a result value
2336 pointer will be in x8. */
2337 int this_regno = R0_REGNUM;
75f1d6fc 2338 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2339
75f1d6fc
SN
2340 reload_completed = 1;
2341 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2342
2343 if (vcall_offset == 0)
2344 aarch64_add_constant (file, this_regno, IP1_REGNUM, delta);
2345 else
2346 {
2347 gcc_assert ((vcall_offset & 0x7) == 0);
2348
75f1d6fc
SN
2349 this_rtx = gen_rtx_REG (Pmode, this_regno);
2350 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2351 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2352
75f1d6fc
SN
2353 addr = this_rtx;
2354 if (delta != 0)
2355 {
2356 if (delta >= -256 && delta < 256)
2357 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2358 plus_constant (Pmode, this_rtx, delta));
2359 else
2360 aarch64_add_constant (file, this_regno, IP1_REGNUM, delta);
43e9d192
IB
2361 }
2362
75f1d6fc
SN
2363 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2364
43e9d192 2365 if (vcall_offset >= -256 && vcall_offset < 32768)
75f1d6fc 2366 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2367 else
2368 {
2369 aarch64_build_constant (file, IP1_REGNUM, vcall_offset);
75f1d6fc 2370 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2371 }
2372
75f1d6fc
SN
2373 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2374 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2375 }
2376
75f1d6fc
SN
2377 /* Generate a tail call to the target function. */
2378 if (!TREE_USED (function))
2379 {
2380 assemble_external (function);
2381 TREE_USED (function) = 1;
2382 }
2383 funexp = XEXP (DECL_RTL (function), 0);
2384 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2385 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2386 SIBLING_CALL_P (insn) = 1;
2387
2388 insn = get_insns ();
2389 shorten_branches (insn);
2390 final_start_function (insn, file, 1);
2391 final (insn, file, 1);
43e9d192 2392 final_end_function ();
75f1d6fc
SN
2393
2394 /* Stop pretending to be a post-reload pass. */
2395 reload_completed = 0;
43e9d192
IB
2396}
2397
43e9d192
IB
2398static int
2399aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2400{
2401 if (GET_CODE (*x) == SYMBOL_REF)
2402 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2403
2404 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2405 TLS offsets, not real symbol references. */
2406 if (GET_CODE (*x) == UNSPEC
2407 && XINT (*x, 1) == UNSPEC_TLS)
2408 return -1;
2409
2410 return 0;
2411}
2412
2413static bool
2414aarch64_tls_referenced_p (rtx x)
2415{
2416 if (!TARGET_HAVE_TLS)
2417 return false;
2418
2419 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2420}
2421
2422
2423static int
2424aarch64_bitmasks_cmp (const void *i1, const void *i2)
2425{
2426 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2427 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2428
2429 if (*imm1 < *imm2)
2430 return -1;
2431 if (*imm1 > *imm2)
2432 return +1;
2433 return 0;
2434}
2435
2436
2437static void
2438aarch64_build_bitmask_table (void)
2439{
2440 unsigned HOST_WIDE_INT mask, imm;
2441 unsigned int log_e, e, s, r;
2442 unsigned int nimms = 0;
2443
2444 for (log_e = 1; log_e <= 6; log_e++)
2445 {
2446 e = 1 << log_e;
2447 if (e == 64)
2448 mask = ~(HOST_WIDE_INT) 0;
2449 else
2450 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2451 for (s = 1; s < e; s++)
2452 {
2453 for (r = 0; r < e; r++)
2454 {
2455 /* set s consecutive bits to 1 (s < 64) */
2456 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2457 /* rotate right by r */
2458 if (r != 0)
2459 imm = ((imm >> r) | (imm << (e - r))) & mask;
2460 /* replicate the constant depending on SIMD size */
2461 switch (log_e) {
2462 case 1: imm |= (imm << 2);
2463 case 2: imm |= (imm << 4);
2464 case 3: imm |= (imm << 8);
2465 case 4: imm |= (imm << 16);
2466 case 5: imm |= (imm << 32);
2467 case 6:
2468 break;
2469 default:
2470 gcc_unreachable ();
2471 }
2472 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2473 aarch64_bitmasks[nimms++] = imm;
2474 }
2475 }
2476 }
2477
2478 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2479 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2480 aarch64_bitmasks_cmp);
2481}
2482
2483
2484/* Return true if val can be encoded as a 12-bit unsigned immediate with
2485 a left shift of 0 or 12 bits. */
2486bool
2487aarch64_uimm12_shift (HOST_WIDE_INT val)
2488{
2489 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2490 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2491 );
2492}
2493
2494
2495/* Return true if val is an immediate that can be loaded into a
2496 register by a MOVZ instruction. */
2497static bool
2498aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2499{
2500 if (GET_MODE_SIZE (mode) > 4)
2501 {
2502 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2503 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2504 return 1;
2505 }
2506 else
2507 {
2508 /* Ignore sign extension. */
2509 val &= (HOST_WIDE_INT) 0xffffffff;
2510 }
2511 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2512 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2513}
2514
2515
2516/* Return true if val is a valid bitmask immediate. */
2517bool
2518aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2519{
2520 if (GET_MODE_SIZE (mode) < 8)
2521 {
2522 /* Replicate bit pattern. */
2523 val &= (HOST_WIDE_INT) 0xffffffff;
2524 val |= val << 32;
2525 }
2526 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2527 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2528}
2529
2530
2531/* Return true if val is an immediate that can be loaded into a
2532 register in a single instruction. */
2533bool
2534aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2535{
2536 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2537 return 1;
2538 return aarch64_bitmask_imm (val, mode);
2539}
2540
2541static bool
2542aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2543{
2544 rtx base, offset;
2545 if (GET_CODE (x) == HIGH)
2546 return true;
2547
2548 split_const (x, &base, &offset);
2549 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2550 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2551
2552 return aarch64_tls_referenced_p (x);
2553}
2554
2555/* Return true if register REGNO is a valid index register.
2556 STRICT_P is true if REG_OK_STRICT is in effect. */
2557
2558bool
2559aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2560{
2561 if (!HARD_REGISTER_NUM_P (regno))
2562 {
2563 if (!strict_p)
2564 return true;
2565
2566 if (!reg_renumber)
2567 return false;
2568
2569 regno = reg_renumber[regno];
2570 }
2571 return GP_REGNUM_P (regno);
2572}
2573
2574/* Return true if register REGNO is a valid base register for mode MODE.
2575 STRICT_P is true if REG_OK_STRICT is in effect. */
2576
2577bool
2578aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2579{
2580 if (!HARD_REGISTER_NUM_P (regno))
2581 {
2582 if (!strict_p)
2583 return true;
2584
2585 if (!reg_renumber)
2586 return false;
2587
2588 regno = reg_renumber[regno];
2589 }
2590
2591 /* The fake registers will be eliminated to either the stack or
2592 hard frame pointer, both of which are usually valid base registers.
2593 Reload deals with the cases where the eliminated form isn't valid. */
2594 return (GP_REGNUM_P (regno)
2595 || regno == SP_REGNUM
2596 || regno == FRAME_POINTER_REGNUM
2597 || regno == ARG_POINTER_REGNUM);
2598}
2599
2600/* Return true if X is a valid base register for mode MODE.
2601 STRICT_P is true if REG_OK_STRICT is in effect. */
2602
2603static bool
2604aarch64_base_register_rtx_p (rtx x, bool strict_p)
2605{
2606 if (!strict_p && GET_CODE (x) == SUBREG)
2607 x = SUBREG_REG (x);
2608
2609 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2610}
2611
2612/* Return true if address offset is a valid index. If it is, fill in INFO
2613 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2614
2615static bool
2616aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2617 enum machine_mode mode, bool strict_p)
2618{
2619 enum aarch64_address_type type;
2620 rtx index;
2621 int shift;
2622
2623 /* (reg:P) */
2624 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2625 && GET_MODE (x) == Pmode)
2626 {
2627 type = ADDRESS_REG_REG;
2628 index = x;
2629 shift = 0;
2630 }
2631 /* (sign_extend:DI (reg:SI)) */
2632 else if ((GET_CODE (x) == SIGN_EXTEND
2633 || GET_CODE (x) == ZERO_EXTEND)
2634 && GET_MODE (x) == DImode
2635 && GET_MODE (XEXP (x, 0)) == SImode)
2636 {
2637 type = (GET_CODE (x) == SIGN_EXTEND)
2638 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2639 index = XEXP (x, 0);
2640 shift = 0;
2641 }
2642 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2643 else if (GET_CODE (x) == MULT
2644 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2645 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2646 && GET_MODE (XEXP (x, 0)) == DImode
2647 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2648 && CONST_INT_P (XEXP (x, 1)))
2649 {
2650 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2651 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2652 index = XEXP (XEXP (x, 0), 0);
2653 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2654 }
2655 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2656 else if (GET_CODE (x) == ASHIFT
2657 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2658 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2659 && GET_MODE (XEXP (x, 0)) == DImode
2660 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2661 && CONST_INT_P (XEXP (x, 1)))
2662 {
2663 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2664 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2665 index = XEXP (XEXP (x, 0), 0);
2666 shift = INTVAL (XEXP (x, 1));
2667 }
2668 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2669 else if ((GET_CODE (x) == SIGN_EXTRACT
2670 || GET_CODE (x) == ZERO_EXTRACT)
2671 && GET_MODE (x) == DImode
2672 && GET_CODE (XEXP (x, 0)) == MULT
2673 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2674 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2675 {
2676 type = (GET_CODE (x) == SIGN_EXTRACT)
2677 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2678 index = XEXP (XEXP (x, 0), 0);
2679 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2680 if (INTVAL (XEXP (x, 1)) != 32 + shift
2681 || INTVAL (XEXP (x, 2)) != 0)
2682 shift = -1;
2683 }
2684 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2685 (const_int 0xffffffff<<shift)) */
2686 else if (GET_CODE (x) == AND
2687 && GET_MODE (x) == DImode
2688 && GET_CODE (XEXP (x, 0)) == MULT
2689 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2690 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2691 && CONST_INT_P (XEXP (x, 1)))
2692 {
2693 type = ADDRESS_REG_UXTW;
2694 index = XEXP (XEXP (x, 0), 0);
2695 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2696 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2697 shift = -1;
2698 }
2699 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2700 else if ((GET_CODE (x) == SIGN_EXTRACT
2701 || GET_CODE (x) == ZERO_EXTRACT)
2702 && GET_MODE (x) == DImode
2703 && GET_CODE (XEXP (x, 0)) == ASHIFT
2704 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2705 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2706 {
2707 type = (GET_CODE (x) == SIGN_EXTRACT)
2708 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2709 index = XEXP (XEXP (x, 0), 0);
2710 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2711 if (INTVAL (XEXP (x, 1)) != 32 + shift
2712 || INTVAL (XEXP (x, 2)) != 0)
2713 shift = -1;
2714 }
2715 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2716 (const_int 0xffffffff<<shift)) */
2717 else if (GET_CODE (x) == AND
2718 && GET_MODE (x) == DImode
2719 && GET_CODE (XEXP (x, 0)) == ASHIFT
2720 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2721 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2722 && CONST_INT_P (XEXP (x, 1)))
2723 {
2724 type = ADDRESS_REG_UXTW;
2725 index = XEXP (XEXP (x, 0), 0);
2726 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2727 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2728 shift = -1;
2729 }
2730 /* (mult:P (reg:P) (const_int scale)) */
2731 else if (GET_CODE (x) == MULT
2732 && GET_MODE (x) == Pmode
2733 && GET_MODE (XEXP (x, 0)) == Pmode
2734 && CONST_INT_P (XEXP (x, 1)))
2735 {
2736 type = ADDRESS_REG_REG;
2737 index = XEXP (x, 0);
2738 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2739 }
2740 /* (ashift:P (reg:P) (const_int shift)) */
2741 else if (GET_CODE (x) == ASHIFT
2742 && GET_MODE (x) == Pmode
2743 && GET_MODE (XEXP (x, 0)) == Pmode
2744 && CONST_INT_P (XEXP (x, 1)))
2745 {
2746 type = ADDRESS_REG_REG;
2747 index = XEXP (x, 0);
2748 shift = INTVAL (XEXP (x, 1));
2749 }
2750 else
2751 return false;
2752
2753 if (GET_CODE (index) == SUBREG)
2754 index = SUBREG_REG (index);
2755
2756 if ((shift == 0 ||
2757 (shift > 0 && shift <= 3
2758 && (1 << shift) == GET_MODE_SIZE (mode)))
2759 && REG_P (index)
2760 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2761 {
2762 info->type = type;
2763 info->offset = index;
2764 info->shift = shift;
2765 return true;
2766 }
2767
2768 return false;
2769}
2770
2771static inline bool
2772offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2773{
2774 return (offset >= -64 * GET_MODE_SIZE (mode)
2775 && offset < 64 * GET_MODE_SIZE (mode)
2776 && offset % GET_MODE_SIZE (mode) == 0);
2777}
2778
2779static inline bool
2780offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2781 HOST_WIDE_INT offset)
2782{
2783 return offset >= -256 && offset < 256;
2784}
2785
2786static inline bool
2787offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2788{
2789 return (offset >= 0
2790 && offset < 4096 * GET_MODE_SIZE (mode)
2791 && offset % GET_MODE_SIZE (mode) == 0);
2792}
2793
2794/* Return true if X is a valid address for machine mode MODE. If it is,
2795 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2796 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2797
2798static bool
2799aarch64_classify_address (struct aarch64_address_info *info,
2800 rtx x, enum machine_mode mode,
2801 RTX_CODE outer_code, bool strict_p)
2802{
2803 enum rtx_code code = GET_CODE (x);
2804 rtx op0, op1;
2805 bool allow_reg_index_p =
2806 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2807
2808 /* Don't support anything other than POST_INC or REG addressing for
2809 AdvSIMD. */
2810 if (aarch64_vector_mode_p (mode)
2811 && (code != POST_INC && code != REG))
2812 return false;
2813
2814 switch (code)
2815 {
2816 case REG:
2817 case SUBREG:
2818 info->type = ADDRESS_REG_IMM;
2819 info->base = x;
2820 info->offset = const0_rtx;
2821 return aarch64_base_register_rtx_p (x, strict_p);
2822
2823 case PLUS:
2824 op0 = XEXP (x, 0);
2825 op1 = XEXP (x, 1);
2826 if (GET_MODE_SIZE (mode) != 0
2827 && CONST_INT_P (op1)
2828 && aarch64_base_register_rtx_p (op0, strict_p))
2829 {
2830 HOST_WIDE_INT offset = INTVAL (op1);
2831
2832 info->type = ADDRESS_REG_IMM;
2833 info->base = op0;
2834 info->offset = op1;
2835
2836 /* TImode and TFmode values are allowed in both pairs of X
2837 registers and individual Q registers. The available
2838 address modes are:
2839 X,X: 7-bit signed scaled offset
2840 Q: 9-bit signed offset
2841 We conservatively require an offset representable in either mode.
2842 */
2843 if (mode == TImode || mode == TFmode)
2844 return (offset_7bit_signed_scaled_p (mode, offset)
2845 && offset_9bit_signed_unscaled_p (mode, offset));
2846
2847 if (outer_code == PARALLEL)
2848 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2849 && offset_7bit_signed_scaled_p (mode, offset));
2850 else
2851 return (offset_9bit_signed_unscaled_p (mode, offset)
2852 || offset_12bit_unsigned_scaled_p (mode, offset));
2853 }
2854
2855 if (allow_reg_index_p)
2856 {
2857 /* Look for base + (scaled/extended) index register. */
2858 if (aarch64_base_register_rtx_p (op0, strict_p)
2859 && aarch64_classify_index (info, op1, mode, strict_p))
2860 {
2861 info->base = op0;
2862 return true;
2863 }
2864 if (aarch64_base_register_rtx_p (op1, strict_p)
2865 && aarch64_classify_index (info, op0, mode, strict_p))
2866 {
2867 info->base = op1;
2868 return true;
2869 }
2870 }
2871
2872 return false;
2873
2874 case POST_INC:
2875 case POST_DEC:
2876 case PRE_INC:
2877 case PRE_DEC:
2878 info->type = ADDRESS_REG_WB;
2879 info->base = XEXP (x, 0);
2880 info->offset = NULL_RTX;
2881 return aarch64_base_register_rtx_p (info->base, strict_p);
2882
2883 case POST_MODIFY:
2884 case PRE_MODIFY:
2885 info->type = ADDRESS_REG_WB;
2886 info->base = XEXP (x, 0);
2887 if (GET_CODE (XEXP (x, 1)) == PLUS
2888 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2889 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2890 && aarch64_base_register_rtx_p (info->base, strict_p))
2891 {
2892 HOST_WIDE_INT offset;
2893 info->offset = XEXP (XEXP (x, 1), 1);
2894 offset = INTVAL (info->offset);
2895
2896 /* TImode and TFmode values are allowed in both pairs of X
2897 registers and individual Q registers. The available
2898 address modes are:
2899 X,X: 7-bit signed scaled offset
2900 Q: 9-bit signed offset
2901 We conservatively require an offset representable in either mode.
2902 */
2903 if (mode == TImode || mode == TFmode)
2904 return (offset_7bit_signed_scaled_p (mode, offset)
2905 && offset_9bit_signed_unscaled_p (mode, offset));
2906
2907 if (outer_code == PARALLEL)
2908 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2909 && offset_7bit_signed_scaled_p (mode, offset));
2910 else
2911 return offset_9bit_signed_unscaled_p (mode, offset);
2912 }
2913 return false;
2914
2915 case CONST:
2916 case SYMBOL_REF:
2917 case LABEL_REF:
2918 /* load literal: pc-relative constant pool entry. */
2919 info->type = ADDRESS_SYMBOLIC;
2920 if (outer_code != PARALLEL)
2921 {
2922 rtx sym, addend;
2923
2924 split_const (x, &sym, &addend);
2925 return (GET_CODE (sym) == LABEL_REF
2926 || (GET_CODE (sym) == SYMBOL_REF
2927 && CONSTANT_POOL_ADDRESS_P (sym)));
2928 }
2929 return false;
2930
2931 case LO_SUM:
2932 info->type = ADDRESS_LO_SUM;
2933 info->base = XEXP (x, 0);
2934 info->offset = XEXP (x, 1);
2935 if (allow_reg_index_p
2936 && aarch64_base_register_rtx_p (info->base, strict_p))
2937 {
2938 rtx sym, offs;
2939 split_const (info->offset, &sym, &offs);
2940 if (GET_CODE (sym) == SYMBOL_REF
2941 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2942 == SYMBOL_SMALL_ABSOLUTE))
2943 {
2944 /* The symbol and offset must be aligned to the access size. */
2945 unsigned int align;
2946 unsigned int ref_size;
2947
2948 if (CONSTANT_POOL_ADDRESS_P (sym))
2949 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2950 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2951 {
2952 tree exp = SYMBOL_REF_DECL (sym);
2953 align = TYPE_ALIGN (TREE_TYPE (exp));
2954 align = CONSTANT_ALIGNMENT (exp, align);
2955 }
2956 else if (SYMBOL_REF_DECL (sym))
2957 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2958 else
2959 align = BITS_PER_UNIT;
2960
2961 ref_size = GET_MODE_SIZE (mode);
2962 if (ref_size == 0)
2963 ref_size = GET_MODE_SIZE (DImode);
2964
2965 return ((INTVAL (offs) & (ref_size - 1)) == 0
2966 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2967 }
2968 }
2969 return false;
2970
2971 default:
2972 return false;
2973 }
2974}
2975
2976bool
2977aarch64_symbolic_address_p (rtx x)
2978{
2979 rtx offset;
2980
2981 split_const (x, &x, &offset);
2982 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2983}
2984
2985/* Classify the base of symbolic expression X, given that X appears in
2986 context CONTEXT. */
2987static enum aarch64_symbol_type
2988aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
2989{
2990 rtx offset;
2991 split_const (x, &x, &offset);
2992 return aarch64_classify_symbol (x, context);
2993}
2994
2995
2996/* Return TRUE if X is a legitimate address for accessing memory in
2997 mode MODE. */
2998static bool
2999aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3000{
3001 struct aarch64_address_info addr;
3002
3003 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3004}
3005
3006/* Return TRUE if X is a legitimate address for accessing memory in
3007 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3008 pair operation. */
3009bool
3010aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3011 RTX_CODE outer_code, bool strict_p)
3012{
3013 struct aarch64_address_info addr;
3014
3015 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3016}
3017
3018/* Return TRUE if rtx X is immediate constant 0.0 */
3019bool
3020aarch64_const_double_zero_rtx_p (rtx x)
3021{
3022 REAL_VALUE_TYPE r;
3023
3024 if (GET_MODE (x) == VOIDmode)
3025 return false;
3026
3027 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3028 if (REAL_VALUE_MINUS_ZERO (r))
3029 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3030 return REAL_VALUES_EQUAL (r, dconst0);
3031}
3032
3033enum machine_mode
3034aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3035{
3036 /* All floating point compares return CCFP if it is an equality
3037 comparison, and CCFPE otherwise. */
3038 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3039 {
3040 switch (code)
3041 {
3042 case EQ:
3043 case NE:
3044 case UNORDERED:
3045 case ORDERED:
3046 case UNLT:
3047 case UNLE:
3048 case UNGT:
3049 case UNGE:
3050 case UNEQ:
3051 case LTGT:
3052 return CCFPmode;
3053
3054 case LT:
3055 case LE:
3056 case GT:
3057 case GE:
3058 return CCFPEmode;
3059
3060 default:
3061 gcc_unreachable ();
3062 }
3063 }
3064
3065 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3066 && y == const0_rtx
3067 && (code == EQ || code == NE || code == LT || code == GE)
3068 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS))
3069 return CC_NZmode;
3070
3071 /* A compare with a shifted operand. Because of canonicalization,
3072 the comparison will have to be swapped when we emit the assembly
3073 code. */
3074 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3075 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3076 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3077 || GET_CODE (x) == LSHIFTRT
3078 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3079 return CC_SWPmode;
3080
3081 /* A compare of a mode narrower than SI mode against zero can be done
3082 by extending the value in the comparison. */
3083 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3084 && y == const0_rtx)
3085 /* Only use sign-extension if we really need it. */
3086 return ((code == GT || code == GE || code == LE || code == LT)
3087 ? CC_SESWPmode : CC_ZESWPmode);
3088
3089 /* For everything else, return CCmode. */
3090 return CCmode;
3091}
3092
3093static unsigned
3094aarch64_get_condition_code (rtx x)
3095{
3096 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3097 enum rtx_code comp_code = GET_CODE (x);
3098
3099 if (GET_MODE_CLASS (mode) != MODE_CC)
3100 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3101
3102 switch (mode)
3103 {
3104 case CCFPmode:
3105 case CCFPEmode:
3106 switch (comp_code)
3107 {
3108 case GE: return AARCH64_GE;
3109 case GT: return AARCH64_GT;
3110 case LE: return AARCH64_LS;
3111 case LT: return AARCH64_MI;
3112 case NE: return AARCH64_NE;
3113 case EQ: return AARCH64_EQ;
3114 case ORDERED: return AARCH64_VC;
3115 case UNORDERED: return AARCH64_VS;
3116 case UNLT: return AARCH64_LT;
3117 case UNLE: return AARCH64_LE;
3118 case UNGT: return AARCH64_HI;
3119 case UNGE: return AARCH64_PL;
3120 default: gcc_unreachable ();
3121 }
3122 break;
3123
3124 case CCmode:
3125 switch (comp_code)
3126 {
3127 case NE: return AARCH64_NE;
3128 case EQ: return AARCH64_EQ;
3129 case GE: return AARCH64_GE;
3130 case GT: return AARCH64_GT;
3131 case LE: return AARCH64_LE;
3132 case LT: return AARCH64_LT;
3133 case GEU: return AARCH64_CS;
3134 case GTU: return AARCH64_HI;
3135 case LEU: return AARCH64_LS;
3136 case LTU: return AARCH64_CC;
3137 default: gcc_unreachable ();
3138 }
3139 break;
3140
3141 case CC_SWPmode:
3142 case CC_ZESWPmode:
3143 case CC_SESWPmode:
3144 switch (comp_code)
3145 {
3146 case NE: return AARCH64_NE;
3147 case EQ: return AARCH64_EQ;
3148 case GE: return AARCH64_LE;
3149 case GT: return AARCH64_LT;
3150 case LE: return AARCH64_GE;
3151 case LT: return AARCH64_GT;
3152 case GEU: return AARCH64_LS;
3153 case GTU: return AARCH64_CC;
3154 case LEU: return AARCH64_CS;
3155 case LTU: return AARCH64_HI;
3156 default: gcc_unreachable ();
3157 }
3158 break;
3159
3160 case CC_NZmode:
3161 switch (comp_code)
3162 {
3163 case NE: return AARCH64_NE;
3164 case EQ: return AARCH64_EQ;
3165 case GE: return AARCH64_PL;
3166 case LT: return AARCH64_MI;
3167 default: gcc_unreachable ();
3168 }
3169 break;
3170
3171 default:
3172 gcc_unreachable ();
3173 break;
3174 }
3175}
3176
3177static unsigned
3178bit_count (unsigned HOST_WIDE_INT value)
3179{
3180 unsigned count = 0;
3181
3182 while (value)
3183 {
3184 count++;
3185 value &= value - 1;
3186 }
3187
3188 return count;
3189}
3190
3191void
3192aarch64_print_operand (FILE *f, rtx x, char code)
3193{
3194 switch (code)
3195 {
3196 case 'e':
3197 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3198 {
3199 int n;
3200
3201 if (GET_CODE (x) != CONST_INT
3202 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3203 {
3204 output_operand_lossage ("invalid operand for '%%%c'", code);
3205 return;
3206 }
3207
3208 switch (n)
3209 {
3210 case 3:
3211 fputc ('b', f);
3212 break;
3213 case 4:
3214 fputc ('h', f);
3215 break;
3216 case 5:
3217 fputc ('w', f);
3218 break;
3219 default:
3220 output_operand_lossage ("invalid operand for '%%%c'", code);
3221 return;
3222 }
3223 }
3224 break;
3225
3226 case 'p':
3227 {
3228 int n;
3229
3230 /* Print N such that 2^N == X. */
3231 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3232 {
3233 output_operand_lossage ("invalid operand for '%%%c'", code);
3234 return;
3235 }
3236
3237 asm_fprintf (f, "%d", n);
3238 }
3239 break;
3240
3241 case 'P':
3242 /* Print the number of non-zero bits in X (a const_int). */
3243 if (GET_CODE (x) != CONST_INT)
3244 {
3245 output_operand_lossage ("invalid operand for '%%%c'", code);
3246 return;
3247 }
3248
3249 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3250 break;
3251
3252 case 'H':
3253 /* Print the higher numbered register of a pair (TImode) of regs. */
3254 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3255 {
3256 output_operand_lossage ("invalid operand for '%%%c'", code);
3257 return;
3258 }
3259
3260 asm_fprintf (f, "%r", REGNO (x) + 1);
3261 break;
3262
3263 case 'Q':
3264 /* Print the least significant register of a pair (TImode) of regs. */
3265 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3266 {
3267 output_operand_lossage ("invalid operand for '%%%c'", code);
3268 return;
3269 }
3270 asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
3271 break;
3272
3273 case 'R':
3274 /* Print the most significant register of a pair (TImode) of regs. */
3275 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3276 {
3277 output_operand_lossage ("invalid operand for '%%%c'", code);
3278 return;
3279 }
3280 asm_fprintf (f, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
3281 break;
3282
3283 case 'm':
3284 /* Print a condition (eq, ne, etc). */
3285
3286 /* CONST_TRUE_RTX means always -- that's the default. */
3287 if (x == const_true_rtx)
3288 return;
3289
3290 if (!COMPARISON_P (x))
3291 {
3292 output_operand_lossage ("invalid operand for '%%%c'", code);
3293 return;
3294 }
3295
3296 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3297 break;
3298
3299 case 'M':
3300 /* Print the inverse of a condition (eq <-> ne, etc). */
3301
3302 /* CONST_TRUE_RTX means never -- that's the default. */
3303 if (x == const_true_rtx)
3304 {
3305 fputs ("nv", f);
3306 return;
3307 }
3308
3309 if (!COMPARISON_P (x))
3310 {
3311 output_operand_lossage ("invalid operand for '%%%c'", code);
3312 return;
3313 }
3314
3315 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3316 (aarch64_get_condition_code (x))], f);
3317 break;
3318
3319 case 'b':
3320 case 'h':
3321 case 's':
3322 case 'd':
3323 case 'q':
3324 /* Print a scalar FP/SIMD register name. */
3325 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3326 {
3327 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3328 return;
3329 }
3330 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3331 break;
3332
3333 case 'S':
3334 case 'T':
3335 case 'U':
3336 case 'V':
3337 /* Print the first FP/SIMD register name in a list. */
3338 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3339 {
3340 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3341 return;
3342 }
3343 asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3344 REGNO (x) - V0_REGNUM + (code - 'S'));
3345 break;
3346
3347 case 'w':
3348 case 'x':
3349 /* Print a general register name or the zero register (32-bit or
3350 64-bit). */
3351 if (x == const0_rtx)
3352 {
3353 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3354 break;
3355 }
3356
3357 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3358 {
3359 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3360 REGNO (x) - R0_REGNUM);
3361 break;
3362 }
3363
3364 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3365 {
3366 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3367 break;
3368 }
3369
3370 /* Fall through */
3371
3372 case 0:
3373 /* Print a normal operand, if it's a general register, then we
3374 assume DImode. */
3375 if (x == NULL)
3376 {
3377 output_operand_lossage ("missing operand");
3378 return;
3379 }
3380
3381 switch (GET_CODE (x))
3382 {
3383 case REG:
3384 asm_fprintf (f, "%r", REGNO (x));
3385 break;
3386
3387 case MEM:
3388 aarch64_memory_reference_mode = GET_MODE (x);
3389 output_address (XEXP (x, 0));
3390 break;
3391
3392 case LABEL_REF:
3393 case SYMBOL_REF:
3394 output_addr_const (asm_out_file, x);
3395 break;
3396
3397 case CONST_INT:
3398 asm_fprintf (f, "%wd", INTVAL (x));
3399 break;
3400
3401 case CONST_VECTOR:
3402 gcc_assert (aarch64_const_vec_all_same_int_p (x, HOST_WIDE_INT_MIN,
3403 HOST_WIDE_INT_MAX));
3404 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3405 break;
3406
3407 default:
3408 output_operand_lossage ("invalid operand");
3409 return;
3410 }
3411 break;
3412
3413 case 'A':
3414 if (GET_CODE (x) == HIGH)
3415 x = XEXP (x, 0);
3416
3417 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3418 {
3419 case SYMBOL_SMALL_GOT:
3420 asm_fprintf (asm_out_file, ":got:");
3421 break;
3422
3423 case SYMBOL_SMALL_TLSGD:
3424 asm_fprintf (asm_out_file, ":tlsgd:");
3425 break;
3426
3427 case SYMBOL_SMALL_TLSDESC:
3428 asm_fprintf (asm_out_file, ":tlsdesc:");
3429 break;
3430
3431 case SYMBOL_SMALL_GOTTPREL:
3432 asm_fprintf (asm_out_file, ":gottprel:");
3433 break;
3434
3435 case SYMBOL_SMALL_TPREL:
3436 asm_fprintf (asm_out_file, ":tprel:");
3437 break;
3438
3439 default:
3440 break;
3441 }
3442 output_addr_const (asm_out_file, x);
3443 break;
3444
3445 case 'L':
3446 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3447 {
3448 case SYMBOL_SMALL_GOT:
3449 asm_fprintf (asm_out_file, ":lo12:");
3450 break;
3451
3452 case SYMBOL_SMALL_TLSGD:
3453 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3454 break;
3455
3456 case SYMBOL_SMALL_TLSDESC:
3457 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3458 break;
3459
3460 case SYMBOL_SMALL_GOTTPREL:
3461 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3462 break;
3463
3464 case SYMBOL_SMALL_TPREL:
3465 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3466 break;
3467
3468 default:
3469 break;
3470 }
3471 output_addr_const (asm_out_file, x);
3472 break;
3473
3474 case 'G':
3475
3476 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3477 {
3478 case SYMBOL_SMALL_TPREL:
3479 asm_fprintf (asm_out_file, ":tprel_hi12:");
3480 break;
3481 default:
3482 break;
3483 }
3484 output_addr_const (asm_out_file, x);
3485 break;
3486
3487 default:
3488 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3489 return;
3490 }
3491}
3492
3493void
3494aarch64_print_operand_address (FILE *f, rtx x)
3495{
3496 struct aarch64_address_info addr;
3497
3498 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3499 MEM, true))
3500 switch (addr.type)
3501 {
3502 case ADDRESS_REG_IMM:
3503 if (addr.offset == const0_rtx)
3504 asm_fprintf (f, "[%r]", REGNO (addr.base));
3505 else
3506 asm_fprintf (f, "[%r,%wd]", REGNO (addr.base),
3507 INTVAL (addr.offset));
3508 return;
3509
3510 case ADDRESS_REG_REG:
3511 if (addr.shift == 0)
3512 asm_fprintf (f, "[%r,%r]", REGNO (addr.base),
3513 REGNO (addr.offset));
3514 else
3515 asm_fprintf (f, "[%r,%r,lsl %u]", REGNO (addr.base),
3516 REGNO (addr.offset), addr.shift);
3517 return;
3518
3519 case ADDRESS_REG_UXTW:
3520 if (addr.shift == 0)
3521 asm_fprintf (f, "[%r,w%d,uxtw]", REGNO (addr.base),
3522 REGNO (addr.offset) - R0_REGNUM);
3523 else
3524 asm_fprintf (f, "[%r,w%d,uxtw %u]", REGNO (addr.base),
3525 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3526 return;
3527
3528 case ADDRESS_REG_SXTW:
3529 if (addr.shift == 0)
3530 asm_fprintf (f, "[%r,w%d,sxtw]", REGNO (addr.base),
3531 REGNO (addr.offset) - R0_REGNUM);
3532 else
3533 asm_fprintf (f, "[%r,w%d,sxtw %u]", REGNO (addr.base),
3534 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3535 return;
3536
3537 case ADDRESS_REG_WB:
3538 switch (GET_CODE (x))
3539 {
3540 case PRE_INC:
3541 asm_fprintf (f, "[%r,%d]!", REGNO (addr.base),
3542 GET_MODE_SIZE (aarch64_memory_reference_mode));
3543 return;
3544 case POST_INC:
3545 asm_fprintf (f, "[%r],%d", REGNO (addr.base),
3546 GET_MODE_SIZE (aarch64_memory_reference_mode));
3547 return;
3548 case PRE_DEC:
3549 asm_fprintf (f, "[%r,-%d]!", REGNO (addr.base),
3550 GET_MODE_SIZE (aarch64_memory_reference_mode));
3551 return;
3552 case POST_DEC:
3553 asm_fprintf (f, "[%r],-%d", REGNO (addr.base),
3554 GET_MODE_SIZE (aarch64_memory_reference_mode));
3555 return;
3556 case PRE_MODIFY:
3557 asm_fprintf (f, "[%r,%wd]!", REGNO (addr.base),
3558 INTVAL (addr.offset));
3559 return;
3560 case POST_MODIFY:
3561 asm_fprintf (f, "[%r],%wd", REGNO (addr.base),
3562 INTVAL (addr.offset));
3563 return;
3564 default:
3565 break;
3566 }
3567 break;
3568
3569 case ADDRESS_LO_SUM:
3570 asm_fprintf (f, "[%r,#:lo12:", REGNO (addr.base));
3571 output_addr_const (f, addr.offset);
3572 asm_fprintf (f, "]");
3573 return;
3574
3575 case ADDRESS_SYMBOLIC:
3576 break;
3577 }
3578
3579 output_addr_const (f, x);
3580}
3581
3582void
3583aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3584 int labelno ATTRIBUTE_UNUSED)
3585{
3586 sorry ("function profiling");
3587}
3588
3589bool
3590aarch64_label_mentioned_p (rtx x)
3591{
3592 const char *fmt;
3593 int i;
3594
3595 if (GET_CODE (x) == LABEL_REF)
3596 return true;
3597
3598 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3599 referencing instruction, but they are constant offsets, not
3600 symbols. */
3601 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3602 return false;
3603
3604 fmt = GET_RTX_FORMAT (GET_CODE (x));
3605 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3606 {
3607 if (fmt[i] == 'E')
3608 {
3609 int j;
3610
3611 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3612 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3613 return 1;
3614 }
3615 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3616 return 1;
3617 }
3618
3619 return 0;
3620}
3621
3622/* Implement REGNO_REG_CLASS. */
3623
3624enum reg_class
3625aarch64_regno_regclass (unsigned regno)
3626{
3627 if (GP_REGNUM_P (regno))
3628 return CORE_REGS;
3629
3630 if (regno == SP_REGNUM)
3631 return STACK_REG;
3632
3633 if (regno == FRAME_POINTER_REGNUM
3634 || regno == ARG_POINTER_REGNUM)
3635 return CORE_REGS;
3636
3637 if (FP_REGNUM_P (regno))
3638 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3639
3640 return NO_REGS;
3641}
3642
3643/* Try a machine-dependent way of reloading an illegitimate address
3644 operand. If we find one, push the reload and return the new rtx. */
3645
3646rtx
3647aarch64_legitimize_reload_address (rtx *x_p,
3648 enum machine_mode mode,
3649 int opnum, int type,
3650 int ind_levels ATTRIBUTE_UNUSED)
3651{
3652 rtx x = *x_p;
3653
3654 /* Do not allow mem (plus (reg, const)) if vector mode. */
3655 if (aarch64_vector_mode_p (mode)
3656 && GET_CODE (x) == PLUS
3657 && REG_P (XEXP (x, 0))
3658 && CONST_INT_P (XEXP (x, 1)))
3659 {
3660 rtx orig_rtx = x;
3661 x = copy_rtx (x);
3662 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3663 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3664 opnum, (enum reload_type) type);
3665 return x;
3666 }
3667
3668 /* We must recognize output that we have already generated ourselves. */
3669 if (GET_CODE (x) == PLUS
3670 && GET_CODE (XEXP (x, 0)) == PLUS
3671 && REG_P (XEXP (XEXP (x, 0), 0))
3672 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3673 && CONST_INT_P (XEXP (x, 1)))
3674 {
3675 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3676 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3677 opnum, (enum reload_type) type);
3678 return x;
3679 }
3680
3681 /* We wish to handle large displacements off a base register by splitting
3682 the addend across an add and the mem insn. This can cut the number of
3683 extra insns needed from 3 to 1. It is only useful for load/store of a
3684 single register with 12 bit offset field. */
3685 if (GET_CODE (x) == PLUS
3686 && REG_P (XEXP (x, 0))
3687 && CONST_INT_P (XEXP (x, 1))
3688 && HARD_REGISTER_P (XEXP (x, 0))
3689 && mode != TImode
3690 && mode != TFmode
3691 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3692 {
3693 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3694 HOST_WIDE_INT low = val & 0xfff;
3695 HOST_WIDE_INT high = val - low;
3696 HOST_WIDE_INT offs;
3697 rtx cst;
3698
3699 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3700 BLKmode alignment. */
3701 if (GET_MODE_SIZE (mode) == 0)
3702 return NULL_RTX;
3703
3704 offs = low % GET_MODE_SIZE (mode);
3705
3706 /* Align misaligned offset by adjusting high part to compensate. */
3707 if (offs != 0)
3708 {
3709 if (aarch64_uimm12_shift (high + offs))
3710 {
3711 /* Align down. */
3712 low = low - offs;
3713 high = high + offs;
3714 }
3715 else
3716 {
3717 /* Align up. */
3718 offs = GET_MODE_SIZE (mode) - offs;
3719 low = low + offs;
3720 high = high + (low & 0x1000) - offs;
3721 low &= 0xfff;
3722 }
3723 }
3724
3725 /* Check for overflow. */
3726 if (high + low != val)
3727 return NULL_RTX;
3728
3729 cst = GEN_INT (high);
3730 if (!aarch64_uimm12_shift (high))
3731 cst = force_const_mem (Pmode, cst);
3732
3733 /* Reload high part into base reg, leaving the low part
3734 in the mem instruction. */
3735 x = gen_rtx_PLUS (Pmode,
3736 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3737 GEN_INT (low));
3738
3739 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3740 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3741 opnum, (enum reload_type) type);
3742 return x;
3743 }
3744
3745 return NULL_RTX;
3746}
3747
3748
3749static reg_class_t
3750aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3751 reg_class_t rclass,
3752 enum machine_mode mode,
3753 secondary_reload_info *sri)
3754{
3755 /* Address expressions of the form PLUS (SP, large_offset) need two
3756 scratch registers, one for the constant, and one for holding a
3757 copy of SP, since SP cannot be used on the RHS of an add-reg
3758 instruction. */
3759 if (mode == DImode
3760 && GET_CODE (x) == PLUS
3761 && XEXP (x, 0) == stack_pointer_rtx
3762 && CONST_INT_P (XEXP (x, 1))
3763 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3764 {
3765 sri->icode = CODE_FOR_reload_sp_immediate;
3766 return NO_REGS;
3767 }
3768
3769 /* Without the TARGET_SIMD instructions we cannot move a Q register
3770 to a Q register directly. We need a scratch. */
3771 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3772 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3773 && reg_class_subset_p (rclass, FP_REGS))
3774 {
3775 if (mode == TFmode)
3776 sri->icode = CODE_FOR_aarch64_reload_movtf;
3777 else if (mode == TImode)
3778 sri->icode = CODE_FOR_aarch64_reload_movti;
3779 return NO_REGS;
3780 }
3781
3782 /* A TFmode or TImode memory access should be handled via an FP_REGS
3783 because AArch64 has richer addressing modes for LDR/STR instructions
3784 than LDP/STP instructions. */
3785 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3786 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3787 return FP_REGS;
3788
3789 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3790 return CORE_REGS;
3791
3792 return NO_REGS;
3793}
3794
3795static bool
3796aarch64_can_eliminate (const int from, const int to)
3797{
3798 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3799 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3800
3801 if (frame_pointer_needed)
3802 {
3803 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3804 return true;
3805 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3806 return false;
3807 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3808 && !cfun->calls_alloca)
3809 return true;
3810 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3811 return true;
3812 return false;
3813 }
3814 else
3815 {
3816 /* If we decided that we didn't need a frame pointer but then used
3817 LR in the function, then we do need a frame pointer after all, so
3818 prevent this elimination to ensure a frame pointer is used. */
3819
3820 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3821 && df_regs_ever_live_p (LR_REGNUM))
3822 return false;
3823 }
3824 return true;
3825}
3826
3827HOST_WIDE_INT
3828aarch64_initial_elimination_offset (unsigned from, unsigned to)
3829{
3830 HOST_WIDE_INT frame_size;
3831 HOST_WIDE_INT offset;
3832
3833 aarch64_layout_frame ();
3834 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3835 + crtl->outgoing_args_size
3836 + cfun->machine->saved_varargs_size);
3837
3838 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3839 offset = frame_size;
3840
3841 if (to == HARD_FRAME_POINTER_REGNUM)
3842 {
3843 if (from == ARG_POINTER_REGNUM)
3844 return offset - crtl->outgoing_args_size;
3845
3846 if (from == FRAME_POINTER_REGNUM)
3847 return cfun->machine->frame.saved_regs_size;
3848 }
3849
3850 if (to == STACK_POINTER_REGNUM)
3851 {
3852 if (from == FRAME_POINTER_REGNUM)
3853 {
3854 HOST_WIDE_INT elim = crtl->outgoing_args_size
3855 + cfun->machine->frame.saved_regs_size
3856 - cfun->machine->frame.fp_lr_offset;
3857 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3858 return elim;
3859 }
3860 }
3861
3862 return offset;
3863}
3864
3865
3866/* Implement RETURN_ADDR_RTX. We do not support moving back to a
3867 previous frame. */
3868
3869rtx
3870aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3871{
3872 if (count != 0)
3873 return const0_rtx;
3874 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3875}
3876
3877
3878static void
3879aarch64_asm_trampoline_template (FILE *f)
3880{
3881 asm_fprintf (f, "\tldr\t%r, .+16\n", IP1_REGNUM);
3882 asm_fprintf (f, "\tldr\t%r, .+20\n", STATIC_CHAIN_REGNUM);
3883 asm_fprintf (f, "\tbr\t%r\n", IP1_REGNUM);
3884 assemble_aligned_integer (4, const0_rtx);
3885 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3886 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3887}
3888
3889unsigned
3890aarch64_trampoline_size (void)
3891{
3892 return 32; /* 3 insns + padding + 2 dwords. */
3893}
3894
3895static void
3896aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3897{
3898 rtx fnaddr, mem, a_tramp;
3899
3900 /* Don't need to copy the trailing D-words, we fill those in below. */
3901 emit_block_move (m_tramp, assemble_trampoline_template (),
3902 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3903 mem = adjust_address (m_tramp, DImode, 16);
3904 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3905 emit_move_insn (mem, fnaddr);
3906
3907 mem = adjust_address (m_tramp, DImode, 24);
3908 emit_move_insn (mem, chain_value);
3909
3910 /* XXX We should really define a "clear_cache" pattern and use
3911 gen_clear_cache(). */
3912 a_tramp = XEXP (m_tramp, 0);
3913 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3914 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3915 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3916}
3917
3918static unsigned char
3919aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3920{
3921 switch (regclass)
3922 {
3923 case CORE_REGS:
3924 case POINTER_REGS:
3925 case GENERAL_REGS:
3926 case ALL_REGS:
3927 case FP_REGS:
3928 case FP_LO_REGS:
3929 return
3930 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
3931 (GET_MODE_SIZE (mode) + 7) / 8;
3932 case STACK_REG:
3933 return 1;
3934
3935 case NO_REGS:
3936 return 0;
3937
3938 default:
3939 break;
3940 }
3941 gcc_unreachable ();
3942}
3943
3944static reg_class_t
3945aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
3946{
3947 return ((regclass == POINTER_REGS || regclass == STACK_REG)
3948 ? GENERAL_REGS : regclass);
3949}
3950
3951void
3952aarch64_asm_output_labelref (FILE* f, const char *name)
3953{
3954 asm_fprintf (f, "%U%s", name);
3955}
3956
3957static void
3958aarch64_elf_asm_constructor (rtx symbol, int priority)
3959{
3960 if (priority == DEFAULT_INIT_PRIORITY)
3961 default_ctor_section_asm_out_constructor (symbol, priority);
3962 else
3963 {
3964 section *s;
3965 char buf[18];
3966 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
3967 s = get_section (buf, SECTION_WRITE, NULL);
3968 switch_to_section (s);
3969 assemble_align (POINTER_SIZE);
3970 fputs ("\t.dword\t", asm_out_file);
3971 output_addr_const (asm_out_file, symbol);
3972 fputc ('\n', asm_out_file);
3973 }
3974}
3975
3976static void
3977aarch64_elf_asm_destructor (rtx symbol, int priority)
3978{
3979 if (priority == DEFAULT_INIT_PRIORITY)
3980 default_dtor_section_asm_out_destructor (symbol, priority);
3981 else
3982 {
3983 section *s;
3984 char buf[18];
3985 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
3986 s = get_section (buf, SECTION_WRITE, NULL);
3987 switch_to_section (s);
3988 assemble_align (POINTER_SIZE);
3989 fputs ("\t.dword\t", asm_out_file);
3990 output_addr_const (asm_out_file, symbol);
3991 fputc ('\n', asm_out_file);
3992 }
3993}
3994
3995const char*
3996aarch64_output_casesi (rtx *operands)
3997{
3998 char buf[100];
3999 char label[100];
4000 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4001 int index;
4002 static const char *const patterns[4][2] =
4003 {
4004 {
4005 "ldrb\t%w3, [%0,%w1,uxtw]",
4006 "add\t%3, %4, %w3, sxtb #2"
4007 },
4008 {
4009 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4010 "add\t%3, %4, %w3, sxth #2"
4011 },
4012 {
4013 "ldr\t%w3, [%0,%w1,uxtw #2]",
4014 "add\t%3, %4, %w3, sxtw #2"
4015 },
4016 /* We assume that DImode is only generated when not optimizing and
4017 that we don't really need 64-bit address offsets. That would
4018 imply an object file with 8GB of code in a single function! */
4019 {
4020 "ldr\t%w3, [%0,%w1,uxtw #2]",
4021 "add\t%3, %4, %w3, sxtw #2"
4022 }
4023 };
4024
4025 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4026
4027 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4028
4029 gcc_assert (index >= 0 && index <= 3);
4030
4031 /* Need to implement table size reduction, by chaning the code below. */
4032 output_asm_insn (patterns[index][0], operands);
4033 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4034 snprintf (buf, sizeof (buf),
4035 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4036 output_asm_insn (buf, operands);
4037 output_asm_insn (patterns[index][1], operands);
4038 output_asm_insn ("br\t%3", operands);
4039 assemble_label (asm_out_file, label);
4040 return "";
4041}
4042
4043
4044/* Return size in bits of an arithmetic operand which is shifted/scaled and
4045 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4046 operator. */
4047
4048int
4049aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4050{
4051 if (shift >= 0 && shift <= 3)
4052 {
4053 int size;
4054 for (size = 8; size <= 32; size *= 2)
4055 {
4056 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4057 if (mask == bits << shift)
4058 return size;
4059 }
4060 }
4061 return 0;
4062}
4063
4064static bool
4065aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4066 const_rtx x ATTRIBUTE_UNUSED)
4067{
4068 /* We can't use blocks for constants when we're using a per-function
4069 constant pool. */
4070 return false;
4071}
4072
4073static section *
4074aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4075 rtx x ATTRIBUTE_UNUSED,
4076 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4077{
4078 /* Force all constant pool entries into the current function section. */
4079 return function_section (current_function_decl);
4080}
4081
4082
4083/* Costs. */
4084
4085/* Helper function for rtx cost calculation. Strip a shift expression
4086 from X. Returns the inner operand if successful, or the original
4087 expression on failure. */
4088static rtx
4089aarch64_strip_shift (rtx x)
4090{
4091 rtx op = x;
4092
4093 if ((GET_CODE (op) == ASHIFT
4094 || GET_CODE (op) == ASHIFTRT
4095 || GET_CODE (op) == LSHIFTRT)
4096 && CONST_INT_P (XEXP (op, 1)))
4097 return XEXP (op, 0);
4098
4099 if (GET_CODE (op) == MULT
4100 && CONST_INT_P (XEXP (op, 1))
4101 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4102 return XEXP (op, 0);
4103
4104 return x;
4105}
4106
4107/* Helper function for rtx cost calculation. Strip a shift or extend
4108 expression from X. Returns the inner operand if successful, or the
4109 original expression on failure. We deal with a number of possible
4110 canonicalization variations here. */
4111static rtx
4112aarch64_strip_shift_or_extend (rtx x)
4113{
4114 rtx op = x;
4115
4116 /* Zero and sign extraction of a widened value. */
4117 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4118 && XEXP (op, 2) == const0_rtx
4119 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4120 XEXP (op, 1)))
4121 return XEXP (XEXP (op, 0), 0);
4122
4123 /* It can also be represented (for zero-extend) as an AND with an
4124 immediate. */
4125 if (GET_CODE (op) == AND
4126 && GET_CODE (XEXP (op, 0)) == MULT
4127 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4128 && CONST_INT_P (XEXP (op, 1))
4129 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4130 INTVAL (XEXP (op, 1))) != 0)
4131 return XEXP (XEXP (op, 0), 0);
4132
4133 /* Now handle extended register, as this may also have an optional
4134 left shift by 1..4. */
4135 if (GET_CODE (op) == ASHIFT
4136 && CONST_INT_P (XEXP (op, 1))
4137 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4138 op = XEXP (op, 0);
4139
4140 if (GET_CODE (op) == ZERO_EXTEND
4141 || GET_CODE (op) == SIGN_EXTEND)
4142 op = XEXP (op, 0);
4143
4144 if (op != x)
4145 return op;
4146
4147 return aarch64_strip_shift (x);
4148}
4149
4150/* Calculate the cost of calculating X, storing it in *COST. Result
4151 is true if the total cost of the operation has now been calculated. */
4152static bool
4153aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4154 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4155{
4156 rtx op0, op1;
4157 const struct cpu_rtx_cost_table *extra_cost
4158 = aarch64_tune_params->insn_extra_cost;
4159
4160 switch (code)
4161 {
4162 case SET:
4163 op0 = SET_DEST (x);
4164 op1 = SET_SRC (x);
4165
4166 switch (GET_CODE (op0))
4167 {
4168 case MEM:
4169 if (speed)
4170 *cost += extra_cost->memory_store;
4171
4172 if (op1 != const0_rtx)
4173 *cost += rtx_cost (op1, SET, 1, speed);
4174 return true;
4175
4176 case SUBREG:
4177 if (! REG_P (SUBREG_REG (op0)))
4178 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4179 /* Fall through. */
4180 case REG:
4181 /* Cost is just the cost of the RHS of the set. */
4182 *cost += rtx_cost (op1, SET, 1, true);
4183 return true;
4184
4185 case ZERO_EXTRACT: /* Bit-field insertion. */
4186 case SIGN_EXTRACT:
4187 /* Strip any redundant widening of the RHS to meet the width of
4188 the target. */
4189 if (GET_CODE (op1) == SUBREG)
4190 op1 = SUBREG_REG (op1);
4191 if ((GET_CODE (op1) == ZERO_EXTEND
4192 || GET_CODE (op1) == SIGN_EXTEND)
4193 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4194 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4195 >= INTVAL (XEXP (op0, 1))))
4196 op1 = XEXP (op1, 0);
4197 *cost += rtx_cost (op1, SET, 1, speed);
4198 return true;
4199
4200 default:
4201 break;
4202 }
4203 return false;
4204
4205 case MEM:
4206 if (speed)
4207 *cost += extra_cost->memory_load;
4208
4209 return true;
4210
4211 case NEG:
4212 op0 = CONST0_RTX (GET_MODE (x));
4213 op1 = XEXP (x, 0);
4214 goto cost_minus;
4215
4216 case COMPARE:
4217 op0 = XEXP (x, 0);
4218 op1 = XEXP (x, 1);
4219
4220 if (op1 == const0_rtx
4221 && GET_CODE (op0) == AND)
4222 {
4223 x = op0;
4224 goto cost_logic;
4225 }
4226
4227 /* Comparisons can work if the order is swapped.
4228 Canonicalization puts the more complex operation first, but
4229 we want it in op1. */
4230 if (! (REG_P (op0)
4231 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4232 {
4233 op0 = XEXP (x, 1);
4234 op1 = XEXP (x, 0);
4235 }
4236 goto cost_minus;
4237
4238 case MINUS:
4239 op0 = XEXP (x, 0);
4240 op1 = XEXP (x, 1);
4241
4242 cost_minus:
4243 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4244 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4245 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4246 {
4247 if (op0 != const0_rtx)
4248 *cost += rtx_cost (op0, MINUS, 0, speed);
4249
4250 if (CONST_INT_P (op1))
4251 {
4252 if (!aarch64_uimm12_shift (INTVAL (op1)))
4253 *cost += rtx_cost (op1, MINUS, 1, speed);
4254 }
4255 else
4256 {
4257 op1 = aarch64_strip_shift_or_extend (op1);
4258 *cost += rtx_cost (op1, MINUS, 1, speed);
4259 }
4260 return true;
4261 }
4262
4263 return false;
4264
4265 case PLUS:
4266 op0 = XEXP (x, 0);
4267 op1 = XEXP (x, 1);
4268
4269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4270 {
4271 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4272 {
4273 *cost += rtx_cost (op0, PLUS, 0, speed);
4274 }
4275 else
4276 {
4277 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4278
4279 if (new_op0 == op0
4280 && GET_CODE (op0) == MULT)
4281 {
4282 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4283 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4284 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4285 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4286 {
4287 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4288 speed)
4289 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4290 speed)
4291 + rtx_cost (op1, PLUS, 1, speed));
4292 if (speed)
4293 *cost += extra_cost->int_multiply_extend_add;
4294 return true;
4295 }
4296 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4297 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4298 + rtx_cost (op1, PLUS, 1, speed));
4299
4300 if (speed)
4301 *cost += extra_cost->int_multiply_add;
4302 }
4303
4304 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4305 + rtx_cost (op1, PLUS, 1, speed));
4306 }
4307 return true;
4308 }
4309
4310 return false;
4311
4312 case IOR:
4313 case XOR:
4314 case AND:
4315 cost_logic:
4316 op0 = XEXP (x, 0);
4317 op1 = XEXP (x, 1);
4318
4319 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4320 {
4321 if (CONST_INT_P (op1)
4322 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4323 {
4324 *cost += rtx_cost (op0, AND, 0, speed);
4325 }
4326 else
4327 {
4328 if (GET_CODE (op0) == NOT)
4329 op0 = XEXP (op0, 0);
4330 op0 = aarch64_strip_shift (op0);
4331 *cost += (rtx_cost (op0, AND, 0, speed)
4332 + rtx_cost (op1, AND, 1, speed));
4333 }
4334 return true;
4335 }
4336 return false;
4337
4338 case ZERO_EXTEND:
4339 if ((GET_MODE (x) == DImode
4340 && GET_MODE (XEXP (x, 0)) == SImode)
4341 || GET_CODE (XEXP (x, 0)) == MEM)
4342 {
4343 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4344 return true;
4345 }
4346 return false;
4347
4348 case SIGN_EXTEND:
4349 if (GET_CODE (XEXP (x, 0)) == MEM)
4350 {
4351 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4352 return true;
4353 }
4354 return false;
4355
4356 case ROTATE:
4357 if (!CONST_INT_P (XEXP (x, 1)))
4358 *cost += COSTS_N_INSNS (2);
4359 /* Fall through. */
4360 case ROTATERT:
4361 case LSHIFTRT:
4362 case ASHIFT:
4363 case ASHIFTRT:
4364
4365 /* Shifting by a register often takes an extra cycle. */
4366 if (speed && !CONST_INT_P (XEXP (x, 1)))
4367 *cost += extra_cost->register_shift;
4368
4369 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4370 return true;
4371
4372 case HIGH:
4373 if (!CONSTANT_P (XEXP (x, 0)))
4374 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4375 return true;
4376
4377 case LO_SUM:
4378 if (!CONSTANT_P (XEXP (x, 1)))
4379 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4380 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4381 return true;
4382
4383 case ZERO_EXTRACT:
4384 case SIGN_EXTRACT:
4385 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4386 return true;
4387
4388 case MULT:
4389 op0 = XEXP (x, 0);
4390 op1 = XEXP (x, 1);
4391
4392 *cost = COSTS_N_INSNS (1);
4393 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4394 {
4395 if (CONST_INT_P (op1)
4396 && exact_log2 (INTVAL (op1)) > 0)
4397 {
4398 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4399 return true;
4400 }
4401
4402 if ((GET_CODE (op0) == ZERO_EXTEND
4403 && GET_CODE (op1) == ZERO_EXTEND)
4404 || (GET_CODE (op0) == SIGN_EXTEND
4405 && GET_CODE (op1) == SIGN_EXTEND))
4406 {
4407 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4408 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4409 if (speed)
4410 *cost += extra_cost->int_multiply_extend;
4411 return true;
4412 }
4413
4414 if (speed)
4415 *cost += extra_cost->int_multiply;
4416 }
4417 else if (speed)
4418 {
4419 if (GET_MODE (x) == DFmode)
4420 *cost += extra_cost->double_multiply;
4421 else if (GET_MODE (x) == SFmode)
4422 *cost += extra_cost->float_multiply;
4423 }
4424
4425 return false; /* All arguments need to be in registers. */
4426
4427 case MOD:
4428 case UMOD:
4429 *cost = COSTS_N_INSNS (2);
4430 if (speed)
4431 {
4432 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4433 *cost += (extra_cost->int_multiply_add
4434 + extra_cost->int_divide);
4435 else if (GET_MODE (x) == DFmode)
4436 *cost += (extra_cost->double_multiply
4437 + extra_cost->double_divide);
4438 else if (GET_MODE (x) == SFmode)
4439 *cost += (extra_cost->float_multiply
4440 + extra_cost->float_divide);
4441 }
4442 return false; /* All arguments need to be in registers. */
4443
4444 case DIV:
4445 case UDIV:
4446 *cost = COSTS_N_INSNS (1);
4447 if (speed)
4448 {
4449 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4450 *cost += extra_cost->int_divide;
4451 else if (GET_MODE (x) == DFmode)
4452 *cost += extra_cost->double_divide;
4453 else if (GET_MODE (x) == SFmode)
4454 *cost += extra_cost->float_divide;
4455 }
4456 return false; /* All arguments need to be in registers. */
4457
4458 default:
4459 break;
4460 }
4461 return false;
4462}
4463
4464static int
4465aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4466 enum machine_mode mode ATTRIBUTE_UNUSED,
4467 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4468{
4469 enum rtx_code c = GET_CODE (x);
4470 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4471
4472 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4473 return addr_cost->pre_modify;
4474
4475 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4476 return addr_cost->post_modify;
4477
4478 if (c == PLUS)
4479 {
4480 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4481 return addr_cost->imm_offset;
4482 else if (GET_CODE (XEXP (x, 0)) == MULT
4483 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4484 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4485 return addr_cost->register_extend;
4486
4487 return addr_cost->register_offset;
4488 }
4489 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4490 return addr_cost->imm_offset;
4491
4492 return 0;
4493}
4494
4495static int
4496aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4497 reg_class_t from, reg_class_t to)
4498{
4499 const struct cpu_regmove_cost *regmove_cost
4500 = aarch64_tune_params->regmove_cost;
4501
4502 if (from == GENERAL_REGS && to == GENERAL_REGS)
4503 return regmove_cost->GP2GP;
4504 else if (from == GENERAL_REGS)
4505 return regmove_cost->GP2FP;
4506 else if (to == GENERAL_REGS)
4507 return regmove_cost->FP2GP;
4508
4509 /* When AdvSIMD instructions are disabled it is not possible to move
4510 a 128-bit value directly between Q registers. This is handled in
4511 secondary reload. A general register is used as a scratch to move
4512 the upper DI value and the lower DI value is moved directly,
4513 hence the cost is the sum of three moves. */
4514
4515 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4516 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4517
4518 return regmove_cost->FP2FP;
4519}
4520
4521static int
4522aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4523 reg_class_t rclass ATTRIBUTE_UNUSED,
4524 bool in ATTRIBUTE_UNUSED)
4525{
4526 return aarch64_tune_params->memmov_cost;
4527}
4528
4529static void initialize_aarch64_code_model (void);
4530
4531/* Parse the architecture extension string. */
4532
4533static void
4534aarch64_parse_extension (char *str)
4535{
4536 /* The extension string is parsed left to right. */
4537 const struct aarch64_option_extension *opt = NULL;
4538
4539 /* Flag to say whether we are adding or removing an extension. */
4540 int adding_ext = -1;
4541
4542 while (str != NULL && *str != 0)
4543 {
4544 char *ext;
4545 size_t len;
4546
4547 str++;
4548 ext = strchr (str, '+');
4549
4550 if (ext != NULL)
4551 len = ext - str;
4552 else
4553 len = strlen (str);
4554
4555 if (len >= 2 && strncmp (str, "no", 2) == 0)
4556 {
4557 adding_ext = 0;
4558 len -= 2;
4559 str += 2;
4560 }
4561 else if (len > 0)
4562 adding_ext = 1;
4563
4564 if (len == 0)
4565 {
4566 error ("missing feature modifier after %qs", "+no");
4567 return;
4568 }
4569
4570 /* Scan over the extensions table trying to find an exact match. */
4571 for (opt = all_extensions; opt->name != NULL; opt++)
4572 {
4573 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4574 {
4575 /* Add or remove the extension. */
4576 if (adding_ext)
4577 aarch64_isa_flags |= opt->flags_on;
4578 else
4579 aarch64_isa_flags &= ~(opt->flags_off);
4580 break;
4581 }
4582 }
4583
4584 if (opt->name == NULL)
4585 {
4586 /* Extension not found in list. */
4587 error ("unknown feature modifier %qs", str);
4588 return;
4589 }
4590
4591 str = ext;
4592 };
4593
4594 return;
4595}
4596
4597/* Parse the ARCH string. */
4598
4599static void
4600aarch64_parse_arch (void)
4601{
4602 char *ext;
4603 const struct processor *arch;
4604 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4605 size_t len;
4606
4607 strcpy (str, aarch64_arch_string);
4608
4609 ext = strchr (str, '+');
4610
4611 if (ext != NULL)
4612 len = ext - str;
4613 else
4614 len = strlen (str);
4615
4616 if (len == 0)
4617 {
4618 error ("missing arch name in -march=%qs", str);
4619 return;
4620 }
4621
4622 /* Loop through the list of supported ARCHs to find a match. */
4623 for (arch = all_architectures; arch->name != NULL; arch++)
4624 {
4625 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4626 {
4627 selected_arch = arch;
4628 aarch64_isa_flags = selected_arch->flags;
4629 selected_cpu = &all_cores[selected_arch->core];
4630
4631 if (ext != NULL)
4632 {
4633 /* ARCH string contains at least one extension. */
4634 aarch64_parse_extension (ext);
4635 }
4636
4637 return;
4638 }
4639 }
4640
4641 /* ARCH name not found in list. */
4642 error ("unknown value %qs for -march", str);
4643 return;
4644}
4645
4646/* Parse the CPU string. */
4647
4648static void
4649aarch64_parse_cpu (void)
4650{
4651 char *ext;
4652 const struct processor *cpu;
4653 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4654 size_t len;
4655
4656 strcpy (str, aarch64_cpu_string);
4657
4658 ext = strchr (str, '+');
4659
4660 if (ext != NULL)
4661 len = ext - str;
4662 else
4663 len = strlen (str);
4664
4665 if (len == 0)
4666 {
4667 error ("missing cpu name in -mcpu=%qs", str);
4668 return;
4669 }
4670
4671 /* Loop through the list of supported CPUs to find a match. */
4672 for (cpu = all_cores; cpu->name != NULL; cpu++)
4673 {
4674 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4675 {
4676 selected_cpu = cpu;
4677 aarch64_isa_flags = selected_cpu->flags;
4678
4679 if (ext != NULL)
4680 {
4681 /* CPU string contains at least one extension. */
4682 aarch64_parse_extension (ext);
4683 }
4684
4685 return;
4686 }
4687 }
4688
4689 /* CPU name not found in list. */
4690 error ("unknown value %qs for -mcpu", str);
4691 return;
4692}
4693
4694/* Parse the TUNE string. */
4695
4696static void
4697aarch64_parse_tune (void)
4698{
4699 const struct processor *cpu;
4700 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4701 strcpy (str, aarch64_tune_string);
4702
4703 /* Loop through the list of supported CPUs to find a match. */
4704 for (cpu = all_cores; cpu->name != NULL; cpu++)
4705 {
4706 if (strcmp (cpu->name, str) == 0)
4707 {
4708 selected_tune = cpu;
4709 return;
4710 }
4711 }
4712
4713 /* CPU name not found in list. */
4714 error ("unknown value %qs for -mtune", str);
4715 return;
4716}
4717
4718
4719/* Implement TARGET_OPTION_OVERRIDE. */
4720
4721static void
4722aarch64_override_options (void)
4723{
4724 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4725 otherwise march remains undefined. mtune can be used with either march or
4726 mcpu. */
4727
4728 if (aarch64_arch_string)
4729 {
4730 aarch64_parse_arch ();
4731 aarch64_cpu_string = NULL;
4732 }
4733
4734 if (aarch64_cpu_string)
4735 {
4736 aarch64_parse_cpu ();
4737 selected_arch = NULL;
4738 }
4739
4740 if (aarch64_tune_string)
4741 {
4742 aarch64_parse_tune ();
4743 }
4744
4745 initialize_aarch64_code_model ();
4746
4747 aarch64_build_bitmask_table ();
4748
4749 /* This target defaults to strict volatile bitfields. */
4750 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4751 flag_strict_volatile_bitfields = 1;
4752
4753 /* If the user did not specify a processor, choose the default
4754 one for them. This will be the CPU set during configuration using
4755 --with-cpu, otherwise it is "generic". */
4756 if (!selected_cpu)
4757 {
4758 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4759 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4760 }
4761
4762 gcc_assert (selected_cpu);
4763
4764 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4765 if (!selected_tune)
4766 selected_tune = &all_cores[selected_cpu->core];
4767
4768 aarch64_tune_flags = selected_tune->flags;
4769 aarch64_tune = selected_tune->core;
4770 aarch64_tune_params = selected_tune->tune;
4771
4772 aarch64_override_options_after_change ();
4773}
4774
4775/* Implement targetm.override_options_after_change. */
4776
4777static void
4778aarch64_override_options_after_change (void)
4779{
4780 faked_omit_frame_pointer = false;
4781
4782 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4783 that aarch64_frame_pointer_required will be called. We need to remember
4784 whether flag_omit_frame_pointer was turned on normally or just faked. */
4785
4786 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4787 {
4788 flag_omit_frame_pointer = true;
4789 faked_omit_frame_pointer = true;
4790 }
4791}
4792
4793static struct machine_function *
4794aarch64_init_machine_status (void)
4795{
4796 struct machine_function *machine;
4797 machine = ggc_alloc_cleared_machine_function ();
4798 return machine;
4799}
4800
4801void
4802aarch64_init_expanders (void)
4803{
4804 init_machine_status = aarch64_init_machine_status;
4805}
4806
4807/* A checking mechanism for the implementation of the various code models. */
4808static void
4809initialize_aarch64_code_model (void)
4810{
4811 if (flag_pic)
4812 {
4813 switch (aarch64_cmodel_var)
4814 {
4815 case AARCH64_CMODEL_TINY:
4816 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4817 break;
4818 case AARCH64_CMODEL_SMALL:
4819 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4820 break;
4821 case AARCH64_CMODEL_LARGE:
4822 sorry ("code model %qs with -f%s", "large",
4823 flag_pic > 1 ? "PIC" : "pic");
4824 default:
4825 gcc_unreachable ();
4826 }
4827 }
4828 else
4829 aarch64_cmodel = aarch64_cmodel_var;
4830}
4831
4832/* Return true if SYMBOL_REF X binds locally. */
4833
4834static bool
4835aarch64_symbol_binds_local_p (const_rtx x)
4836{
4837 return (SYMBOL_REF_DECL (x)
4838 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4839 : SYMBOL_REF_LOCAL_P (x));
4840}
4841
4842/* Return true if SYMBOL_REF X is thread local */
4843static bool
4844aarch64_tls_symbol_p (rtx x)
4845{
4846 if (! TARGET_HAVE_TLS)
4847 return false;
4848
4849 if (GET_CODE (x) != SYMBOL_REF)
4850 return false;
4851
4852 return SYMBOL_REF_TLS_MODEL (x) != 0;
4853}
4854
4855/* Classify a TLS symbol into one of the TLS kinds. */
4856enum aarch64_symbol_type
4857aarch64_classify_tls_symbol (rtx x)
4858{
4859 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4860
4861 switch (tls_kind)
4862 {
4863 case TLS_MODEL_GLOBAL_DYNAMIC:
4864 case TLS_MODEL_LOCAL_DYNAMIC:
4865 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4866
4867 case TLS_MODEL_INITIAL_EXEC:
4868 return SYMBOL_SMALL_GOTTPREL;
4869
4870 case TLS_MODEL_LOCAL_EXEC:
4871 return SYMBOL_SMALL_TPREL;
4872
4873 case TLS_MODEL_EMULATED:
4874 case TLS_MODEL_NONE:
4875 return SYMBOL_FORCE_TO_MEM;
4876
4877 default:
4878 gcc_unreachable ();
4879 }
4880}
4881
4882/* Return the method that should be used to access SYMBOL_REF or
4883 LABEL_REF X in context CONTEXT. */
4884enum aarch64_symbol_type
4885aarch64_classify_symbol (rtx x,
4886 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4887{
4888 if (GET_CODE (x) == LABEL_REF)
4889 {
4890 switch (aarch64_cmodel)
4891 {
4892 case AARCH64_CMODEL_LARGE:
4893 return SYMBOL_FORCE_TO_MEM;
4894
4895 case AARCH64_CMODEL_TINY_PIC:
4896 case AARCH64_CMODEL_TINY:
4897 case AARCH64_CMODEL_SMALL_PIC:
4898 case AARCH64_CMODEL_SMALL:
4899 return SYMBOL_SMALL_ABSOLUTE;
4900
4901 default:
4902 gcc_unreachable ();
4903 }
4904 }
4905
4906 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4907
4908 switch (aarch64_cmodel)
4909 {
4910 case AARCH64_CMODEL_LARGE:
4911 return SYMBOL_FORCE_TO_MEM;
4912
4913 case AARCH64_CMODEL_TINY:
4914 case AARCH64_CMODEL_SMALL:
4915
4916 /* This is needed to get DFmode, TImode constants to be loaded off
4917 the constant pool. Is it necessary to dump TImode values into
4918 the constant pool. We don't handle TImode constant loads properly
4919 yet and hence need to use the constant pool. */
4920 if (CONSTANT_POOL_ADDRESS_P (x))
4921 return SYMBOL_FORCE_TO_MEM;
4922
4923 if (aarch64_tls_symbol_p (x))
4924 return aarch64_classify_tls_symbol (x);
4925
4926 if (SYMBOL_REF_WEAK (x))
4927 return SYMBOL_FORCE_TO_MEM;
4928
4929 return SYMBOL_SMALL_ABSOLUTE;
4930
4931 case AARCH64_CMODEL_TINY_PIC:
4932 case AARCH64_CMODEL_SMALL_PIC:
4933
4934 if (CONSTANT_POOL_ADDRESS_P (x))
4935 return SYMBOL_FORCE_TO_MEM;
4936
4937 if (aarch64_tls_symbol_p (x))
4938 return aarch64_classify_tls_symbol (x);
4939
4940 if (!aarch64_symbol_binds_local_p (x))
4941 return SYMBOL_SMALL_GOT;
4942
4943 return SYMBOL_SMALL_ABSOLUTE;
4944
4945 default:
4946 gcc_unreachable ();
4947 }
4948 /* By default push everything into the constant pool. */
4949 return SYMBOL_FORCE_TO_MEM;
4950}
4951
4952/* Return true if X is a symbolic constant that can be used in context
4953 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
4954
4955bool
4956aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
4957 enum aarch64_symbol_type *symbol_type)
4958{
4959 rtx offset;
4960 split_const (x, &x, &offset);
4961 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
4962 *symbol_type = aarch64_classify_symbol (x, context);
4963 else
4964 return false;
4965
4966 /* No checking of offset at this point. */
4967 return true;
4968}
4969
4970bool
4971aarch64_constant_address_p (rtx x)
4972{
4973 return (CONSTANT_P (x) && memory_address_p (DImode, x));
4974}
4975
4976bool
4977aarch64_legitimate_pic_operand_p (rtx x)
4978{
4979 if (GET_CODE (x) == SYMBOL_REF
4980 || (GET_CODE (x) == CONST
4981 && GET_CODE (XEXP (x, 0)) == PLUS
4982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
4983 return false;
4984
4985 return true;
4986}
4987
4988static bool
4989aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
4990{
4991 /* Do not allow vector struct mode constants. We could support
4992 0 and -1 easily, but they need support in aarch64-simd.md. */
4993 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
4994 return false;
4995
4996 /* This could probably go away because
4997 we now decompose CONST_INTs according to expand_mov_immediate. */
4998 if ((GET_CODE (x) == CONST_VECTOR
4999 && aarch64_simd_valid_immediate (x, mode, false,
5000 NULL, NULL, NULL, NULL, NULL) != -1)
5001 || CONST_INT_P (x))
5002 return !targetm.cannot_force_const_mem (mode, x);
5003
5004 if (GET_CODE (x) == HIGH
5005 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5006 return true;
5007
5008 return aarch64_constant_address_p (x);
5009}
5010
a5bc806c 5011rtx
43e9d192
IB
5012aarch64_load_tp (rtx target)
5013{
5014 if (!target
5015 || GET_MODE (target) != Pmode
5016 || !register_operand (target, Pmode))
5017 target = gen_reg_rtx (Pmode);
5018
5019 /* Can return in any reg. */
5020 emit_insn (gen_aarch64_load_tp_hard (target));
5021 return target;
5022}
5023
43e9d192
IB
5024/* On AAPCS systems, this is the "struct __va_list". */
5025static GTY(()) tree va_list_type;
5026
5027/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5028 Return the type to use as __builtin_va_list.
5029
5030 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5031
5032 struct __va_list
5033 {
5034 void *__stack;
5035 void *__gr_top;
5036 void *__vr_top;
5037 int __gr_offs;
5038 int __vr_offs;
5039 }; */
5040
5041static tree
5042aarch64_build_builtin_va_list (void)
5043{
5044 tree va_list_name;
5045 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5046
5047 /* Create the type. */
5048 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5049 /* Give it the required name. */
5050 va_list_name = build_decl (BUILTINS_LOCATION,
5051 TYPE_DECL,
5052 get_identifier ("__va_list"),
5053 va_list_type);
5054 DECL_ARTIFICIAL (va_list_name) = 1;
5055 TYPE_NAME (va_list_type) = va_list_name;
5056
5057 /* Create the fields. */
5058 f_stack = build_decl (BUILTINS_LOCATION,
5059 FIELD_DECL, get_identifier ("__stack"),
5060 ptr_type_node);
5061 f_grtop = build_decl (BUILTINS_LOCATION,
5062 FIELD_DECL, get_identifier ("__gr_top"),
5063 ptr_type_node);
5064 f_vrtop = build_decl (BUILTINS_LOCATION,
5065 FIELD_DECL, get_identifier ("__vr_top"),
5066 ptr_type_node);
5067 f_groff = build_decl (BUILTINS_LOCATION,
5068 FIELD_DECL, get_identifier ("__gr_offs"),
5069 integer_type_node);
5070 f_vroff = build_decl (BUILTINS_LOCATION,
5071 FIELD_DECL, get_identifier ("__vr_offs"),
5072 integer_type_node);
5073
5074 DECL_ARTIFICIAL (f_stack) = 1;
5075 DECL_ARTIFICIAL (f_grtop) = 1;
5076 DECL_ARTIFICIAL (f_vrtop) = 1;
5077 DECL_ARTIFICIAL (f_groff) = 1;
5078 DECL_ARTIFICIAL (f_vroff) = 1;
5079
5080 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5081 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5082 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5083 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5084 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5085
5086 TYPE_FIELDS (va_list_type) = f_stack;
5087 DECL_CHAIN (f_stack) = f_grtop;
5088 DECL_CHAIN (f_grtop) = f_vrtop;
5089 DECL_CHAIN (f_vrtop) = f_groff;
5090 DECL_CHAIN (f_groff) = f_vroff;
5091
5092 /* Compute its layout. */
5093 layout_type (va_list_type);
5094
5095 return va_list_type;
5096}
5097
5098/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5099static void
5100aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5101{
5102 const CUMULATIVE_ARGS *cum;
5103 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5104 tree stack, grtop, vrtop, groff, vroff;
5105 tree t;
5106 int gr_save_area_size;
5107 int vr_save_area_size;
5108 int vr_offset;
5109
5110 cum = &crtl->args.info;
5111 gr_save_area_size
5112 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5113 vr_save_area_size
5114 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5115
5116 if (TARGET_GENERAL_REGS_ONLY)
5117 {
5118 if (cum->aapcs_nvrn > 0)
5119 sorry ("%qs and floating point or vector arguments",
5120 "-mgeneral-regs-only");
5121 vr_save_area_size = 0;
5122 }
5123
5124 f_stack = TYPE_FIELDS (va_list_type_node);
5125 f_grtop = DECL_CHAIN (f_stack);
5126 f_vrtop = DECL_CHAIN (f_grtop);
5127 f_groff = DECL_CHAIN (f_vrtop);
5128 f_vroff = DECL_CHAIN (f_groff);
5129
5130 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5131 NULL_TREE);
5132 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5133 NULL_TREE);
5134 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5135 NULL_TREE);
5136 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5137 NULL_TREE);
5138 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5139 NULL_TREE);
5140
5141 /* Emit code to initialize STACK, which points to the next varargs stack
5142 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5143 by named arguments. STACK is 8-byte aligned. */
5144 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5145 if (cum->aapcs_stack_size > 0)
5146 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5147 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5148 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5149
5150 /* Emit code to initialize GRTOP, the top of the GR save area.
5151 virtual_incoming_args_rtx should have been 16 byte aligned. */
5152 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5153 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5154 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5155
5156 /* Emit code to initialize VRTOP, the top of the VR save area.
5157 This address is gr_save_area_bytes below GRTOP, rounded
5158 down to the next 16-byte boundary. */
5159 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5160 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5161 STACK_BOUNDARY / BITS_PER_UNIT);
5162
5163 if (vr_offset)
5164 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5165 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5166 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5167
5168 /* Emit code to initialize GROFF, the offset from GRTOP of the
5169 next GPR argument. */
5170 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5171 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5172 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5173
5174 /* Likewise emit code to initialize VROFF, the offset from FTOP
5175 of the next VR argument. */
5176 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5177 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5178 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5179}
5180
5181/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5182
5183static tree
5184aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5185 gimple_seq *post_p ATTRIBUTE_UNUSED)
5186{
5187 tree addr;
5188 bool indirect_p;
5189 bool is_ha; /* is HFA or HVA. */
5190 bool dw_align; /* double-word align. */
5191 enum machine_mode ag_mode = VOIDmode;
5192 int nregs;
5193 enum machine_mode mode;
5194
5195 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5196 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5197 HOST_WIDE_INT size, rsize, adjust, align;
5198 tree t, u, cond1, cond2;
5199
5200 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5201 if (indirect_p)
5202 type = build_pointer_type (type);
5203
5204 mode = TYPE_MODE (type);
5205
5206 f_stack = TYPE_FIELDS (va_list_type_node);
5207 f_grtop = DECL_CHAIN (f_stack);
5208 f_vrtop = DECL_CHAIN (f_grtop);
5209 f_groff = DECL_CHAIN (f_vrtop);
5210 f_vroff = DECL_CHAIN (f_groff);
5211
5212 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5213 f_stack, NULL_TREE);
5214 size = int_size_in_bytes (type);
5215 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5216
5217 dw_align = false;
5218 adjust = 0;
5219 if (aarch64_vfp_is_call_or_return_candidate (mode,
5220 type,
5221 &ag_mode,
5222 &nregs,
5223 &is_ha))
5224 {
5225 /* TYPE passed in fp/simd registers. */
5226 if (TARGET_GENERAL_REGS_ONLY)
5227 sorry ("%qs and floating point or vector arguments",
5228 "-mgeneral-regs-only");
5229
5230 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5231 unshare_expr (valist), f_vrtop, NULL_TREE);
5232 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5233 unshare_expr (valist), f_vroff, NULL_TREE);
5234
5235 rsize = nregs * UNITS_PER_VREG;
5236
5237 if (is_ha)
5238 {
5239 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5240 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5241 }
5242 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5243 && size < UNITS_PER_VREG)
5244 {
5245 adjust = UNITS_PER_VREG - size;
5246 }
5247 }
5248 else
5249 {
5250 /* TYPE passed in general registers. */
5251 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5252 unshare_expr (valist), f_grtop, NULL_TREE);
5253 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5254 unshare_expr (valist), f_groff, NULL_TREE);
5255 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5256 nregs = rsize / UNITS_PER_WORD;
5257
5258 if (align > 8)
5259 dw_align = true;
5260
5261 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5262 && size < UNITS_PER_WORD)
5263 {
5264 adjust = UNITS_PER_WORD - size;
5265 }
5266 }
5267
5268 /* Get a local temporary for the field value. */
5269 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5270
5271 /* Emit code to branch if off >= 0. */
5272 t = build2 (GE_EXPR, boolean_type_node, off,
5273 build_int_cst (TREE_TYPE (off), 0));
5274 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5275
5276 if (dw_align)
5277 {
5278 /* Emit: offs = (offs + 15) & -16. */
5279 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5280 build_int_cst (TREE_TYPE (off), 15));
5281 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5282 build_int_cst (TREE_TYPE (off), -16));
5283 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5284 }
5285 else
5286 roundup = NULL;
5287
5288 /* Update ap.__[g|v]r_offs */
5289 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5290 build_int_cst (TREE_TYPE (off), rsize));
5291 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5292
5293 /* String up. */
5294 if (roundup)
5295 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5296
5297 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5298 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5299 build_int_cst (TREE_TYPE (f_off), 0));
5300 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5301
5302 /* String up: make sure the assignment happens before the use. */
5303 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5304 COND_EXPR_ELSE (cond1) = t;
5305
5306 /* Prepare the trees handling the argument that is passed on the stack;
5307 the top level node will store in ON_STACK. */
5308 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5309 if (align > 8)
5310 {
5311 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5312 t = fold_convert (intDI_type_node, arg);
5313 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5314 build_int_cst (TREE_TYPE (t), 15));
5315 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5316 build_int_cst (TREE_TYPE (t), -16));
5317 t = fold_convert (TREE_TYPE (arg), t);
5318 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5319 }
5320 else
5321 roundup = NULL;
5322 /* Advance ap.__stack */
5323 t = fold_convert (intDI_type_node, arg);
5324 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5325 build_int_cst (TREE_TYPE (t), size + 7));
5326 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5327 build_int_cst (TREE_TYPE (t), -8));
5328 t = fold_convert (TREE_TYPE (arg), t);
5329 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5330 /* String up roundup and advance. */
5331 if (roundup)
5332 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5333 /* String up with arg */
5334 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5335 /* Big-endianness related address adjustment. */
5336 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5337 && size < UNITS_PER_WORD)
5338 {
5339 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5340 size_int (UNITS_PER_WORD - size));
5341 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5342 }
5343
5344 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5345 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5346
5347 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5348 t = off;
5349 if (adjust)
5350 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5351 build_int_cst (TREE_TYPE (off), adjust));
5352
5353 t = fold_convert (sizetype, t);
5354 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5355
5356 if (is_ha)
5357 {
5358 /* type ha; // treat as "struct {ftype field[n];}"
5359 ... [computing offs]
5360 for (i = 0; i <nregs; ++i, offs += 16)
5361 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5362 return ha; */
5363 int i;
5364 tree tmp_ha, field_t, field_ptr_t;
5365
5366 /* Declare a local variable. */
5367 tmp_ha = create_tmp_var_raw (type, "ha");
5368 gimple_add_tmp_var (tmp_ha);
5369
5370 /* Establish the base type. */
5371 switch (ag_mode)
5372 {
5373 case SFmode:
5374 field_t = float_type_node;
5375 field_ptr_t = float_ptr_type_node;
5376 break;
5377 case DFmode:
5378 field_t = double_type_node;
5379 field_ptr_t = double_ptr_type_node;
5380 break;
5381 case TFmode:
5382 field_t = long_double_type_node;
5383 field_ptr_t = long_double_ptr_type_node;
5384 break;
5385/* The half precision and quad precision are not fully supported yet. Enable
5386 the following code after the support is complete. Need to find the correct
5387 type node for __fp16 *. */
5388#if 0
5389 case HFmode:
5390 field_t = float_type_node;
5391 field_ptr_t = float_ptr_type_node;
5392 break;
5393#endif
5394 case V2SImode:
5395 case V4SImode:
5396 {
5397 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5398 field_t = build_vector_type_for_mode (innertype, ag_mode);
5399 field_ptr_t = build_pointer_type (field_t);
5400 }
5401 break;
5402 default:
5403 gcc_assert (0);
5404 }
5405
5406 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5407 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5408 addr = t;
5409 t = fold_convert (field_ptr_t, addr);
5410 t = build2 (MODIFY_EXPR, field_t,
5411 build1 (INDIRECT_REF, field_t, tmp_ha),
5412 build1 (INDIRECT_REF, field_t, t));
5413
5414 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5415 for (i = 1; i < nregs; ++i)
5416 {
5417 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5418 u = fold_convert (field_ptr_t, addr);
5419 u = build2 (MODIFY_EXPR, field_t,
5420 build2 (MEM_REF, field_t, tmp_ha,
5421 build_int_cst (field_ptr_t,
5422 (i *
5423 int_size_in_bytes (field_t)))),
5424 build1 (INDIRECT_REF, field_t, u));
5425 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5426 }
5427
5428 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5429 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5430 }
5431
5432 COND_EXPR_ELSE (cond2) = t;
5433 addr = fold_convert (build_pointer_type (type), cond1);
5434 addr = build_va_arg_indirect_ref (addr);
5435
5436 if (indirect_p)
5437 addr = build_va_arg_indirect_ref (addr);
5438
5439 return addr;
5440}
5441
5442/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5443
5444static void
5445aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5446 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5447 int no_rtl)
5448{
5449 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5450 CUMULATIVE_ARGS local_cum;
5451 int gr_saved, vr_saved;
5452
5453 /* The caller has advanced CUM up to, but not beyond, the last named
5454 argument. Advance a local copy of CUM past the last "real" named
5455 argument, to find out how many registers are left over. */
5456 local_cum = *cum;
5457 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5458
5459 /* Found out how many registers we need to save. */
5460 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5461 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5462
5463 if (TARGET_GENERAL_REGS_ONLY)
5464 {
5465 if (local_cum.aapcs_nvrn > 0)
5466 sorry ("%qs and floating point or vector arguments",
5467 "-mgeneral-regs-only");
5468 vr_saved = 0;
5469 }
5470
5471 if (!no_rtl)
5472 {
5473 if (gr_saved > 0)
5474 {
5475 rtx ptr, mem;
5476
5477 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5478 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5479 - gr_saved * UNITS_PER_WORD);
5480 mem = gen_frame_mem (BLKmode, ptr);
5481 set_mem_alias_set (mem, get_varargs_alias_set ());
5482
5483 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5484 mem, gr_saved);
5485 }
5486 if (vr_saved > 0)
5487 {
5488 /* We can't use move_block_from_reg, because it will use
5489 the wrong mode, storing D regs only. */
5490 enum machine_mode mode = TImode;
5491 int off, i;
5492
5493 /* Set OFF to the offset from virtual_incoming_args_rtx of
5494 the first vector register. The VR save area lies below
5495 the GR one, and is aligned to 16 bytes. */
5496 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5497 STACK_BOUNDARY / BITS_PER_UNIT);
5498 off -= vr_saved * UNITS_PER_VREG;
5499
5500 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5501 {
5502 rtx ptr, mem;
5503
5504 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5505 mem = gen_frame_mem (mode, ptr);
5506 set_mem_alias_set (mem, get_varargs_alias_set ());
5507 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5508 off += UNITS_PER_VREG;
5509 }
5510 }
5511 }
5512
5513 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5514 any complication of having crtl->args.pretend_args_size changed. */
5515 cfun->machine->saved_varargs_size
5516 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5517 STACK_BOUNDARY / BITS_PER_UNIT)
5518 + vr_saved * UNITS_PER_VREG);
5519}
5520
5521static void
5522aarch64_conditional_register_usage (void)
5523{
5524 int i;
5525 if (!TARGET_FLOAT)
5526 {
5527 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5528 {
5529 fixed_regs[i] = 1;
5530 call_used_regs[i] = 1;
5531 }
5532 }
5533}
5534
5535/* Walk down the type tree of TYPE counting consecutive base elements.
5536 If *MODEP is VOIDmode, then set it to the first valid floating point
5537 type. If a non-floating point type is found, or if a floating point
5538 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5539 otherwise return the count in the sub-tree. */
5540static int
5541aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5542{
5543 enum machine_mode mode;
5544 HOST_WIDE_INT size;
5545
5546 switch (TREE_CODE (type))
5547 {
5548 case REAL_TYPE:
5549 mode = TYPE_MODE (type);
5550 if (mode != DFmode && mode != SFmode && mode != TFmode)
5551 return -1;
5552
5553 if (*modep == VOIDmode)
5554 *modep = mode;
5555
5556 if (*modep == mode)
5557 return 1;
5558
5559 break;
5560
5561 case COMPLEX_TYPE:
5562 mode = TYPE_MODE (TREE_TYPE (type));
5563 if (mode != DFmode && mode != SFmode && mode != TFmode)
5564 return -1;
5565
5566 if (*modep == VOIDmode)
5567 *modep = mode;
5568
5569 if (*modep == mode)
5570 return 2;
5571
5572 break;
5573
5574 case VECTOR_TYPE:
5575 /* Use V2SImode and V4SImode as representatives of all 64-bit
5576 and 128-bit vector types. */
5577 size = int_size_in_bytes (type);
5578 switch (size)
5579 {
5580 case 8:
5581 mode = V2SImode;
5582 break;
5583 case 16:
5584 mode = V4SImode;
5585 break;
5586 default:
5587 return -1;
5588 }
5589
5590 if (*modep == VOIDmode)
5591 *modep = mode;
5592
5593 /* Vector modes are considered to be opaque: two vectors are
5594 equivalent for the purposes of being homogeneous aggregates
5595 if they are the same size. */
5596 if (*modep == mode)
5597 return 1;
5598
5599 break;
5600
5601 case ARRAY_TYPE:
5602 {
5603 int count;
5604 tree index = TYPE_DOMAIN (type);
5605
5606 /* Can't handle incomplete types. */
5607 if (!COMPLETE_TYPE_P (type))
5608 return -1;
5609
5610 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5611 if (count == -1
5612 || !index
5613 || !TYPE_MAX_VALUE (index)
5614 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5615 || !TYPE_MIN_VALUE (index)
5616 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5617 || count < 0)
5618 return -1;
5619
5620 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5621 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5622
5623 /* There must be no padding. */
5624 if (!host_integerp (TYPE_SIZE (type), 1)
5625 || (tree_low_cst (TYPE_SIZE (type), 1)
5626 != count * GET_MODE_BITSIZE (*modep)))
5627 return -1;
5628
5629 return count;
5630 }
5631
5632 case RECORD_TYPE:
5633 {
5634 int count = 0;
5635 int sub_count;
5636 tree field;
5637
5638 /* Can't handle incomplete types. */
5639 if (!COMPLETE_TYPE_P (type))
5640 return -1;
5641
5642 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5643 {
5644 if (TREE_CODE (field) != FIELD_DECL)
5645 continue;
5646
5647 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5648 if (sub_count < 0)
5649 return -1;
5650 count += sub_count;
5651 }
5652
5653 /* There must be no padding. */
5654 if (!host_integerp (TYPE_SIZE (type), 1)
5655 || (tree_low_cst (TYPE_SIZE (type), 1)
5656 != count * GET_MODE_BITSIZE (*modep)))
5657 return -1;
5658
5659 return count;
5660 }
5661
5662 case UNION_TYPE:
5663 case QUAL_UNION_TYPE:
5664 {
5665 /* These aren't very interesting except in a degenerate case. */
5666 int count = 0;
5667 int sub_count;
5668 tree field;
5669
5670 /* Can't handle incomplete types. */
5671 if (!COMPLETE_TYPE_P (type))
5672 return -1;
5673
5674 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5675 {
5676 if (TREE_CODE (field) != FIELD_DECL)
5677 continue;
5678
5679 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5680 if (sub_count < 0)
5681 return -1;
5682 count = count > sub_count ? count : sub_count;
5683 }
5684
5685 /* There must be no padding. */
5686 if (!host_integerp (TYPE_SIZE (type), 1)
5687 || (tree_low_cst (TYPE_SIZE (type), 1)
5688 != count * GET_MODE_BITSIZE (*modep)))
5689 return -1;
5690
5691 return count;
5692 }
5693
5694 default:
5695 break;
5696 }
5697
5698 return -1;
5699}
5700
5701/* Return TRUE if the type, as described by TYPE and MODE, is a composite
5702 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5703 array types. The C99 floating-point complex types are also considered
5704 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5705 types, which are GCC extensions and out of the scope of AAPCS64, are
5706 treated as composite types here as well.
5707
5708 Note that MODE itself is not sufficient in determining whether a type
5709 is such a composite type or not. This is because
5710 stor-layout.c:compute_record_mode may have already changed the MODE
5711 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5712 structure with only one field may have its MODE set to the mode of the
5713 field. Also an integer mode whose size matches the size of the
5714 RECORD_TYPE type may be used to substitute the original mode
5715 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5716 solely relied on. */
5717
5718static bool
5719aarch64_composite_type_p (const_tree type,
5720 enum machine_mode mode)
5721{
5722 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5723 return true;
5724
5725 if (mode == BLKmode
5726 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5727 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5728 return true;
5729
5730 return false;
5731}
5732
5733/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5734 type as described in AAPCS64 \S 4.1.2.
5735
5736 See the comment above aarch64_composite_type_p for the notes on MODE. */
5737
5738static bool
5739aarch64_short_vector_p (const_tree type,
5740 enum machine_mode mode)
5741{
5742 HOST_WIDE_INT size = -1;
5743
5744 if (type && TREE_CODE (type) == VECTOR_TYPE)
5745 size = int_size_in_bytes (type);
5746 else if (!aarch64_composite_type_p (type, mode)
5747 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5748 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5749 size = GET_MODE_SIZE (mode);
5750
5751 return (size == 8 || size == 16) ? true : false;
5752}
5753
5754/* Return TRUE if an argument, whose type is described by TYPE and MODE,
5755 shall be passed or returned in simd/fp register(s) (providing these
5756 parameter passing registers are available).
5757
5758 Upon successful return, *COUNT returns the number of needed registers,
5759 *BASE_MODE returns the mode of the individual register and when IS_HAF
5760 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5761 floating-point aggregate or a homogeneous short-vector aggregate. */
5762
5763static bool
5764aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5765 const_tree type,
5766 enum machine_mode *base_mode,
5767 int *count,
5768 bool *is_ha)
5769{
5770 enum machine_mode new_mode = VOIDmode;
5771 bool composite_p = aarch64_composite_type_p (type, mode);
5772
5773 if (is_ha != NULL) *is_ha = false;
5774
5775 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5776 || aarch64_short_vector_p (type, mode))
5777 {
5778 *count = 1;
5779 new_mode = mode;
5780 }
5781 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5782 {
5783 if (is_ha != NULL) *is_ha = true;
5784 *count = 2;
5785 new_mode = GET_MODE_INNER (mode);
5786 }
5787 else if (type && composite_p)
5788 {
5789 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5790
5791 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5792 {
5793 if (is_ha != NULL) *is_ha = true;
5794 *count = ag_count;
5795 }
5796 else
5797 return false;
5798 }
5799 else
5800 return false;
5801
5802 *base_mode = new_mode;
5803 return true;
5804}
5805
5806/* Implement TARGET_STRUCT_VALUE_RTX. */
5807
5808static rtx
5809aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5810 int incoming ATTRIBUTE_UNUSED)
5811{
5812 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5813}
5814
5815/* Implements target hook vector_mode_supported_p. */
5816static bool
5817aarch64_vector_mode_supported_p (enum machine_mode mode)
5818{
5819 if (TARGET_SIMD
5820 && (mode == V4SImode || mode == V8HImode
5821 || mode == V16QImode || mode == V2DImode
5822 || mode == V2SImode || mode == V4HImode
5823 || mode == V8QImode || mode == V2SFmode
5824 || mode == V4SFmode || mode == V2DFmode))
5825 return true;
5826
5827 return false;
5828}
5829
5830/* Return quad mode as the preferred SIMD mode. */
5831static enum machine_mode
5832aarch64_preferred_simd_mode (enum machine_mode mode)
5833{
5834 if (TARGET_SIMD)
5835 switch (mode)
5836 {
5837 case DFmode:
5838 return V2DFmode;
5839 case SFmode:
5840 return V4SFmode;
5841 case SImode:
5842 return V4SImode;
5843 case HImode:
5844 return V8HImode;
5845 case QImode:
5846 return V16QImode;
5847 case DImode:
5848 return V2DImode;
5849 break;
5850
5851 default:;
5852 }
5853 return word_mode;
5854}
5855
43e9d192
IB
5856/* Return the equivalent letter for size. */
5857static unsigned char
5858sizetochar (int size)
5859{
5860 switch (size)
5861 {
5862 case 64: return 'd';
5863 case 32: return 's';
5864 case 16: return 'h';
5865 case 8 : return 'b';
5866 default: gcc_unreachable ();
5867 }
5868}
5869
5870static int
5871aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5872 rtx *modconst, int *elementwidth,
5873 unsigned char *elementchar,
5874 int *mvn, int *shift)
5875{
5876#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
5877 matches = 1; \
5878 for (i = 0; i < idx; i += (STRIDE)) \
5879 if (!(TEST)) \
5880 matches = 0; \
5881 if (matches) \
5882 { \
5883 immtype = (CLASS); \
5884 elsize = (ELSIZE); \
5885 elchar = sizetochar (elsize); \
5886 eshift = (SHIFT); \
5887 emvn = (NEG); \
5888 break; \
5889 }
5890
5891 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5892 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5893 unsigned char bytes[16];
5894 unsigned char elchar = 0;
5895 int immtype = -1, matches;
5896 unsigned int invmask = inverse ? 0xff : 0;
5897 int eshift, emvn;
5898
5899 /* TODO: Vectors of float constants. */
5900 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5901 return -1;
5902
5903 /* Splat vector constant out into a byte vector. */
5904 for (i = 0; i < n_elts; i++)
5905 {
5906 rtx el = CONST_VECTOR_ELT (op, i);
5907 unsigned HOST_WIDE_INT elpart;
5908 unsigned int part, parts;
5909
5910 if (GET_CODE (el) == CONST_INT)
5911 {
5912 elpart = INTVAL (el);
5913 parts = 1;
5914 }
5915 else if (GET_CODE (el) == CONST_DOUBLE)
5916 {
5917 elpart = CONST_DOUBLE_LOW (el);
5918 parts = 2;
5919 }
5920 else
5921 gcc_unreachable ();
5922
5923 for (part = 0; part < parts; part++)
5924 {
5925 unsigned int byte;
5926 for (byte = 0; byte < innersize; byte++)
5927 {
5928 bytes[idx++] = (elpart & 0xff) ^ invmask;
5929 elpart >>= BITS_PER_UNIT;
5930 }
5931 if (GET_CODE (el) == CONST_DOUBLE)
5932 elpart = CONST_DOUBLE_HIGH (el);
5933 }
5934 }
5935
5936 /* Sanity check. */
5937 gcc_assert (idx == GET_MODE_SIZE (mode));
5938
5939 do
5940 {
5941 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
5942 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
5943
5944 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
5945 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
5946
5947 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
5948 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
5949
5950 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
5951 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
5952
5953 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
5954
5955 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
5956
5957 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
5958 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
5959
5960 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
5961 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
5962
5963 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
5964 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
5965
5966 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
5967 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
5968
5969 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
5970
5971 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
5972
5973 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
5974 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
5975
5976 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
5977 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
5978
5979 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
5980 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
5981
5982 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
5983 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
5984
5985 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
5986
5987 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
5988 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
5989 }
5990 while (0);
5991
5992 /* TODO: Currently the assembler cannot handle types 12 to 15.
5993 And there is no way to specify cmode through the compiler.
5994 Disable them till there is support in the assembler. */
5995 if (immtype == -1
5996 || (immtype >= 12 && immtype <= 15)
5997 || immtype == 18)
5998 return -1;
5999
6000
6001 if (elementwidth)
6002 *elementwidth = elsize;
6003
6004 if (elementchar)
6005 *elementchar = elchar;
6006
6007 if (mvn)
6008 *mvn = emvn;
6009
6010 if (shift)
6011 *shift = eshift;
6012
6013 if (modconst)
6014 {
6015 unsigned HOST_WIDE_INT imm = 0;
6016
6017 /* Un-invert bytes of recognized vector, if necessary. */
6018 if (invmask != 0)
6019 for (i = 0; i < idx; i++)
6020 bytes[i] ^= invmask;
6021
6022 if (immtype == 17)
6023 {
6024 /* FIXME: Broken on 32-bit H_W_I hosts. */
6025 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6026
6027 for (i = 0; i < 8; i++)
6028 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6029 << (i * BITS_PER_UNIT);
6030
6031 *modconst = GEN_INT (imm);
6032 }
6033 else
6034 {
6035 unsigned HOST_WIDE_INT imm = 0;
6036
6037 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6038 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6039
6040 /* Construct 'abcdefgh' because the assembler cannot handle
6041 generic constants. */
6042 gcc_assert (shift != NULL && mvn != NULL);
6043 if (*mvn)
6044 imm = ~imm;
6045 imm = (imm >> *shift) & 0xff;
6046 *modconst = GEN_INT (imm);
6047 }
6048 }
6049
6050 return immtype;
6051#undef CHECK
6052}
6053
6054/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6055 (or, implicitly, MVNI) immediate. Write back width per element
6056 to *ELEMENTWIDTH (or zero for float elements), and a modified constant
6057 (whatever should be output for a MOVI instruction) in *MODCONST. */
6058int
6059aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6060 rtx *modconst, int *elementwidth,
6061 unsigned char *elementchar,
6062 int *mvn, int *shift)
6063{
6064 rtx tmpconst;
6065 int tmpwidth;
6066 unsigned char tmpwidthc;
6067 int tmpmvn = 0, tmpshift = 0;
6068 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6069 &tmpwidth, &tmpwidthc,
6070 &tmpmvn, &tmpshift);
6071
6072 if (retval == -1)
6073 return 0;
6074
6075 if (modconst)
6076 *modconst = tmpconst;
6077
6078 if (elementwidth)
6079 *elementwidth = tmpwidth;
6080
6081 if (elementchar)
6082 *elementchar = tmpwidthc;
6083
6084 if (mvn)
6085 *mvn = tmpmvn;
6086
6087 if (shift)
6088 *shift = tmpshift;
6089
6090 return 1;
6091}
6092
6093static bool
6094aarch64_const_vec_all_same_int_p (rtx x,
6095 HOST_WIDE_INT minval,
6096 HOST_WIDE_INT maxval)
6097{
6098 HOST_WIDE_INT firstval;
6099 int count, i;
6100
6101 if (GET_CODE (x) != CONST_VECTOR
6102 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6103 return false;
6104
6105 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6106 if (firstval < minval || firstval > maxval)
6107 return false;
6108
6109 count = CONST_VECTOR_NUNITS (x);
6110 for (i = 1; i < count; i++)
6111 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6112 return false;
6113
6114 return true;
6115}
6116
6117/* Check of immediate shift constants are within range. */
6118bool
6119aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6120{
6121 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6122 if (left)
6123 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6124 else
6125 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6126}
6127
6128bool
6129aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6130{
6131 int nunits;
6132 int i;
6133
6134 if (GET_CODE (x) != CONST_VECTOR)
6135 return false;
6136
6137 nunits = GET_MODE_NUNITS (mode);
6138
6139 for (i = 0; i < nunits; i++)
6140 if (INTVAL (CONST_VECTOR_ELT (x, i)) != 0)
6141 return false;
6142
6143 return true;
6144}
6145
6146bool
6147aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6148{
6149 HOST_WIDE_INT imm = INTVAL (x);
6150 int i;
6151
6152 for (i = 0; i < 8; i++)
6153 {
6154 unsigned int byte = imm & 0xff;
6155 if (byte != 0xff && byte != 0)
6156 return false;
6157 imm >>= 8;
6158 }
6159
6160 return true;
6161}
6162
6163/* Return a const_int vector of VAL. */
6164rtx
6165aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6166{
6167 int nunits = GET_MODE_NUNITS (mode);
6168 rtvec v = rtvec_alloc (nunits);
6169 int i;
6170
6171 for (i=0; i < nunits; i++)
6172 RTVEC_ELT (v, i) = GEN_INT (val);
6173
6174 return gen_rtx_CONST_VECTOR (mode, v);
6175}
6176
6177/* Construct and return a PARALLEL RTX vector. */
6178rtx
6179aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6180{
6181 int nunits = GET_MODE_NUNITS (mode);
6182 rtvec v = rtvec_alloc (nunits / 2);
6183 int base = high ? nunits / 2 : 0;
6184 rtx t1;
6185 int i;
6186
6187 for (i=0; i < nunits / 2; i++)
6188 RTVEC_ELT (v, i) = GEN_INT (base + i);
6189
6190 t1 = gen_rtx_PARALLEL (mode, v);
6191 return t1;
6192}
6193
6194/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6195 HIGH (exclusive). */
6196void
6197aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6198{
6199 HOST_WIDE_INT lane;
6200 gcc_assert (GET_CODE (operand) == CONST_INT);
6201 lane = INTVAL (operand);
6202
6203 if (lane < low || lane >= high)
6204 error ("lane out of range");
6205}
6206
6207void
6208aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6209{
6210 gcc_assert (GET_CODE (operand) == CONST_INT);
6211 HOST_WIDE_INT lane = INTVAL (operand);
6212
6213 if (lane < low || lane >= high)
6214 error ("constant out of range");
6215}
6216
6217/* Emit code to reinterpret one AdvSIMD type as another,
6218 without altering bits. */
6219void
6220aarch64_simd_reinterpret (rtx dest, rtx src)
6221{
6222 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6223}
6224
6225/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6226 registers). */
6227void
6228aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6229 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6230 rtx op1)
6231{
6232 rtx mem = gen_rtx_MEM (mode, destaddr);
6233 rtx tmp1 = gen_reg_rtx (mode);
6234 rtx tmp2 = gen_reg_rtx (mode);
6235
6236 emit_insn (intfn (tmp1, op1, tmp2));
6237
6238 emit_move_insn (mem, tmp1);
6239 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6240 emit_move_insn (mem, tmp2);
6241}
6242
6243/* Return TRUE if OP is a valid vector addressing mode. */
6244bool
6245aarch64_simd_mem_operand_p (rtx op)
6246{
6247 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6248 || GET_CODE (XEXP (op, 0)) == REG);
6249}
6250
6251/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6252 not to early-clobber SRC registers in the process.
6253
6254 We assume that the operands described by SRC and DEST represent a
6255 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6256 number of components into which the copy has been decomposed. */
6257void
6258aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6259 rtx *src, unsigned int count)
6260{
6261 unsigned int i;
6262
6263 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6264 || REGNO (operands[0]) < REGNO (operands[1]))
6265 {
6266 for (i = 0; i < count; i++)
6267 {
6268 operands[2 * i] = dest[i];
6269 operands[2 * i + 1] = src[i];
6270 }
6271 }
6272 else
6273 {
6274 for (i = 0; i < count; i++)
6275 {
6276 operands[2 * i] = dest[count - i - 1];
6277 operands[2 * i + 1] = src[count - i - 1];
6278 }
6279 }
6280}
6281
6282/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6283 one of VSTRUCT modes: OI, CI or XI. */
6284int
6285aarch64_simd_attr_length_move (rtx insn)
6286{
6287 rtx reg, mem, addr;
6288 int load;
6289 enum machine_mode mode;
6290
6291 extract_insn_cached (insn);
6292
6293 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6294 {
6295 mode = GET_MODE (recog_data.operand[0]);
6296 switch (mode)
6297 {
6298 case OImode:
6299 return 8;
6300 case CImode:
6301 return 12;
6302 case XImode:
6303 return 16;
6304 default:
6305 gcc_unreachable ();
6306 }
6307 }
6308 return 4;
6309}
6310
6311static unsigned HOST_WIDE_INT
6312aarch64_shift_truncation_mask (enum machine_mode mode)
6313{
6314 return
6315 (aarch64_vector_mode_supported_p (mode)
6316 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6317}
6318
6319#ifndef TLS_SECTION_ASM_FLAG
6320#define TLS_SECTION_ASM_FLAG 'T'
6321#endif
6322
6323void
6324aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6325 tree decl ATTRIBUTE_UNUSED)
6326{
6327 char flagchars[10], *f = flagchars;
6328
6329 /* If we have already declared this section, we can use an
6330 abbreviated form to switch back to it -- unless this section is
6331 part of a COMDAT groups, in which case GAS requires the full
6332 declaration every time. */
6333 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6334 && (flags & SECTION_DECLARED))
6335 {
6336 fprintf (asm_out_file, "\t.section\t%s\n", name);
6337 return;
6338 }
6339
6340 if (!(flags & SECTION_DEBUG))
6341 *f++ = 'a';
6342 if (flags & SECTION_WRITE)
6343 *f++ = 'w';
6344 if (flags & SECTION_CODE)
6345 *f++ = 'x';
6346 if (flags & SECTION_SMALL)
6347 *f++ = 's';
6348 if (flags & SECTION_MERGE)
6349 *f++ = 'M';
6350 if (flags & SECTION_STRINGS)
6351 *f++ = 'S';
6352 if (flags & SECTION_TLS)
6353 *f++ = TLS_SECTION_ASM_FLAG;
6354 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6355 *f++ = 'G';
6356 *f = '\0';
6357
6358 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6359
6360 if (!(flags & SECTION_NOTYPE))
6361 {
6362 const char *type;
6363 const char *format;
6364
6365 if (flags & SECTION_BSS)
6366 type = "nobits";
6367 else
6368 type = "progbits";
6369
6370#ifdef TYPE_OPERAND_FMT
6371 format = "," TYPE_OPERAND_FMT;
6372#else
6373 format = ",@%s";
6374#endif
6375
6376 fprintf (asm_out_file, format, type);
6377
6378 if (flags & SECTION_ENTSIZE)
6379 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6380 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6381 {
6382 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6383 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6384 else
6385 fprintf (asm_out_file, ",%s,comdat",
6386 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6387 }
6388 }
6389
6390 putc ('\n', asm_out_file);
6391}
6392
6393/* Select a format to encode pointers in exception handling data. */
6394int
6395aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6396{
6397 int type;
6398 switch (aarch64_cmodel)
6399 {
6400 case AARCH64_CMODEL_TINY:
6401 case AARCH64_CMODEL_TINY_PIC:
6402 case AARCH64_CMODEL_SMALL:
6403 case AARCH64_CMODEL_SMALL_PIC:
6404 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6405 for everything. */
6406 type = DW_EH_PE_sdata4;
6407 break;
6408 default:
6409 /* No assumptions here. 8-byte relocs required. */
6410 type = DW_EH_PE_sdata8;
6411 break;
6412 }
6413 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6414}
6415
0462169c
SN
6416/* Emit load exclusive. */
6417
6418static void
6419aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6420 rtx mem, rtx model_rtx)
6421{
6422 rtx (*gen) (rtx, rtx, rtx);
6423
6424 switch (mode)
6425 {
6426 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6427 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6428 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6429 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6430 default:
6431 gcc_unreachable ();
6432 }
6433
6434 emit_insn (gen (rval, mem, model_rtx));
6435}
6436
6437/* Emit store exclusive. */
6438
6439static void
6440aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6441 rtx rval, rtx mem, rtx model_rtx)
6442{
6443 rtx (*gen) (rtx, rtx, rtx, rtx);
6444
6445 switch (mode)
6446 {
6447 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6448 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6449 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6450 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6451 default:
6452 gcc_unreachable ();
6453 }
6454
6455 emit_insn (gen (bval, rval, mem, model_rtx));
6456}
6457
6458/* Mark the previous jump instruction as unlikely. */
6459
6460static void
6461aarch64_emit_unlikely_jump (rtx insn)
6462{
6463 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6464
6465 insn = emit_jump_insn (insn);
6466 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6467}
6468
6469/* Expand a compare and swap pattern. */
6470
6471void
6472aarch64_expand_compare_and_swap (rtx operands[])
6473{
6474 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6475 enum machine_mode mode, cmp_mode;
6476 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6477
6478 bval = operands[0];
6479 rval = operands[1];
6480 mem = operands[2];
6481 oldval = operands[3];
6482 newval = operands[4];
6483 is_weak = operands[5];
6484 mod_s = operands[6];
6485 mod_f = operands[7];
6486 mode = GET_MODE (mem);
6487 cmp_mode = mode;
6488
6489 /* Normally the succ memory model must be stronger than fail, but in the
6490 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6491 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6492
6493 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6494 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6495 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6496
6497 switch (mode)
6498 {
6499 case QImode:
6500 case HImode:
6501 /* For short modes, we're going to perform the comparison in SImode,
6502 so do the zero-extension now. */
6503 cmp_mode = SImode;
6504 rval = gen_reg_rtx (SImode);
6505 oldval = convert_modes (SImode, mode, oldval, true);
6506 /* Fall through. */
6507
6508 case SImode:
6509 case DImode:
6510 /* Force the value into a register if needed. */
6511 if (!aarch64_plus_operand (oldval, mode))
6512 oldval = force_reg (cmp_mode, oldval);
6513 break;
6514
6515 default:
6516 gcc_unreachable ();
6517 }
6518
6519 switch (mode)
6520 {
6521 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6522 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6523 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6524 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6525 default:
6526 gcc_unreachable ();
6527 }
6528
6529 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6530
6531 if (mode == QImode || mode == HImode)
6532 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6533
6534 x = gen_rtx_REG (CCmode, CC_REGNUM);
6535 x = gen_rtx_EQ (SImode, x, const0_rtx);
6536 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6537}
6538
6539/* Split a compare and swap pattern. */
6540
6541void
6542aarch64_split_compare_and_swap (rtx operands[])
6543{
6544 rtx rval, mem, oldval, newval, scratch;
6545 enum machine_mode mode;
6546 enum memmodel mod_s;
6547 bool is_weak;
6548 rtx label1, label2, x, cond;
6549
6550 rval = operands[0];
6551 mem = operands[1];
6552 oldval = operands[2];
6553 newval = operands[3];
6554 is_weak = (operands[4] != const0_rtx);
6555 mod_s = (enum memmodel) INTVAL (operands[5]);
6556 scratch = operands[7];
6557 mode = GET_MODE (mem);
6558
6559 label1 = NULL_RTX;
6560 if (!is_weak)
6561 {
6562 label1 = gen_label_rtx ();
6563 emit_label (label1);
6564 }
6565 label2 = gen_label_rtx ();
6566
6567 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6568
6569 cond = aarch64_gen_compare_reg (NE, rval, oldval);
6570 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6571 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6572 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6573 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6574
6575 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6576
6577 if (!is_weak)
6578 {
6579 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6580 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6581 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6582 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6583 }
6584 else
6585 {
6586 cond = gen_rtx_REG (CCmode, CC_REGNUM);
6587 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
6588 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
6589 }
6590
6591 emit_label (label2);
6592}
6593
6594/* Split an atomic operation. */
6595
6596void
6597aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
6598 rtx value, rtx model_rtx, rtx cond)
6599{
6600 enum machine_mode mode = GET_MODE (mem);
6601 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
6602 rtx label, x;
6603
6604 label = gen_label_rtx ();
6605 emit_label (label);
6606
6607 if (new_out)
6608 new_out = gen_lowpart (wmode, new_out);
6609 if (old_out)
6610 old_out = gen_lowpart (wmode, old_out);
6611 else
6612 old_out = new_out;
6613 value = simplify_gen_subreg (wmode, value, mode, 0);
6614
6615 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
6616
6617 switch (code)
6618 {
6619 case SET:
6620 new_out = value;
6621 break;
6622
6623 case NOT:
6624 x = gen_rtx_AND (wmode, old_out, value);
6625 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6626 x = gen_rtx_NOT (wmode, new_out);
6627 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6628 break;
6629
6630 case MINUS:
6631 if (CONST_INT_P (value))
6632 {
6633 value = GEN_INT (-INTVAL (value));
6634 code = PLUS;
6635 }
6636 /* Fall through. */
6637
6638 default:
6639 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
6640 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
6641 break;
6642 }
6643
6644 aarch64_emit_store_exclusive (mode, cond, mem,
6645 gen_lowpart (mode, new_out), model_rtx);
6646
6647 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6648 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6649 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
6650 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6651}
6652
43e9d192
IB
6653static void
6654aarch64_start_file (void)
6655{
6656 if (selected_arch)
6657 asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
6658 else if (selected_cpu)
6659 asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
6660 default_file_start();
6661}
6662
6663/* Target hook for c_mode_for_suffix. */
6664static enum machine_mode
6665aarch64_c_mode_for_suffix (char suffix)
6666{
6667 if (suffix == 'q')
6668 return TFmode;
6669
6670 return VOIDmode;
6671}
6672
6673#undef TARGET_ADDRESS_COST
6674#define TARGET_ADDRESS_COST aarch64_address_cost
6675
6676/* This hook will determines whether unnamed bitfields affect the alignment
6677 of the containing structure. The hook returns true if the structure
6678 should inherit the alignment requirements of an unnamed bitfield's
6679 type. */
6680#undef TARGET_ALIGN_ANON_BITFIELD
6681#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
6682
6683#undef TARGET_ASM_ALIGNED_DI_OP
6684#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
6685
6686#undef TARGET_ASM_ALIGNED_HI_OP
6687#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
6688
6689#undef TARGET_ASM_ALIGNED_SI_OP
6690#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
6691
6692#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6693#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
6694 hook_bool_const_tree_hwi_hwi_const_tree_true
6695
6696#undef TARGET_ASM_FILE_START
6697#define TARGET_ASM_FILE_START aarch64_start_file
6698
6699#undef TARGET_ASM_OUTPUT_MI_THUNK
6700#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
6701
6702#undef TARGET_ASM_SELECT_RTX_SECTION
6703#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
6704
6705#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6706#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
6707
6708#undef TARGET_BUILD_BUILTIN_VA_LIST
6709#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
6710
6711#undef TARGET_CALLEE_COPIES
6712#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
6713
6714#undef TARGET_CAN_ELIMINATE
6715#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
6716
6717#undef TARGET_CANNOT_FORCE_CONST_MEM
6718#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
6719
6720#undef TARGET_CONDITIONAL_REGISTER_USAGE
6721#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
6722
6723/* Only the least significant bit is used for initialization guard
6724 variables. */
6725#undef TARGET_CXX_GUARD_MASK_BIT
6726#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
6727
6728#undef TARGET_C_MODE_FOR_SUFFIX
6729#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
6730
6731#ifdef TARGET_BIG_ENDIAN_DEFAULT
6732#undef TARGET_DEFAULT_TARGET_FLAGS
6733#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
6734#endif
6735
6736#undef TARGET_CLASS_MAX_NREGS
6737#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
6738
119103ca
JG
6739#undef TARGET_BUILTIN_DECL
6740#define TARGET_BUILTIN_DECL aarch64_builtin_decl
6741
43e9d192
IB
6742#undef TARGET_EXPAND_BUILTIN
6743#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
6744
6745#undef TARGET_EXPAND_BUILTIN_VA_START
6746#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
6747
6748#undef TARGET_FUNCTION_ARG
6749#define TARGET_FUNCTION_ARG aarch64_function_arg
6750
6751#undef TARGET_FUNCTION_ARG_ADVANCE
6752#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
6753
6754#undef TARGET_FUNCTION_ARG_BOUNDARY
6755#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
6756
6757#undef TARGET_FUNCTION_OK_FOR_SIBCALL
6758#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
6759
6760#undef TARGET_FUNCTION_VALUE
6761#define TARGET_FUNCTION_VALUE aarch64_function_value
6762
6763#undef TARGET_FUNCTION_VALUE_REGNO_P
6764#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
6765
6766#undef TARGET_FRAME_POINTER_REQUIRED
6767#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
6768
6769#undef TARGET_GIMPLIFY_VA_ARG_EXPR
6770#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
6771
6772#undef TARGET_INIT_BUILTINS
6773#define TARGET_INIT_BUILTINS aarch64_init_builtins
6774
6775#undef TARGET_LEGITIMATE_ADDRESS_P
6776#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
6777
6778#undef TARGET_LEGITIMATE_CONSTANT_P
6779#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
6780
6781#undef TARGET_LIBGCC_CMP_RETURN_MODE
6782#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
6783
6784#undef TARGET_MEMORY_MOVE_COST
6785#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
6786
6787#undef TARGET_MUST_PASS_IN_STACK
6788#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
6789
6790/* This target hook should return true if accesses to volatile bitfields
6791 should use the narrowest mode possible. It should return false if these
6792 accesses should use the bitfield container type. */
6793#undef TARGET_NARROW_VOLATILE_BITFIELD
6794#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
6795
6796#undef TARGET_OPTION_OVERRIDE
6797#define TARGET_OPTION_OVERRIDE aarch64_override_options
6798
6799#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
6800#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
6801 aarch64_override_options_after_change
6802
6803#undef TARGET_PASS_BY_REFERENCE
6804#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
6805
6806#undef TARGET_PREFERRED_RELOAD_CLASS
6807#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
6808
6809#undef TARGET_SECONDARY_RELOAD
6810#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
6811
6812#undef TARGET_SHIFT_TRUNCATION_MASK
6813#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
6814
6815#undef TARGET_SETUP_INCOMING_VARARGS
6816#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
6817
6818#undef TARGET_STRUCT_VALUE_RTX
6819#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
6820
6821#undef TARGET_REGISTER_MOVE_COST
6822#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
6823
6824#undef TARGET_RETURN_IN_MEMORY
6825#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
6826
6827#undef TARGET_RETURN_IN_MSB
6828#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
6829
6830#undef TARGET_RTX_COSTS
6831#define TARGET_RTX_COSTS aarch64_rtx_costs
6832
6833#undef TARGET_TRAMPOLINE_INIT
6834#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
6835
6836#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
6837#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
6838
6839#undef TARGET_VECTOR_MODE_SUPPORTED_P
6840#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
6841
6842#undef TARGET_ARRAY_MODE_SUPPORTED_P
6843#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
6844
6845#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6846#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
6847
6848/* Section anchor support. */
6849
6850#undef TARGET_MIN_ANCHOR_OFFSET
6851#define TARGET_MIN_ANCHOR_OFFSET -256
6852
6853/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
6854 byte offset; we can do much more for larger data types, but have no way
6855 to determine the size of the access. We assume accesses are aligned. */
6856#undef TARGET_MAX_ANCHOR_OFFSET
6857#define TARGET_MAX_ANCHOR_OFFSET 4095
6858
6859struct gcc_target targetm = TARGET_INITIALIZER;
6860
6861#include "gt-aarch64.h"