]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[multiple changes]
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
43e9d192 66
28514dda
YZ
67/* Defined for convenience. */
68#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
69
43e9d192
IB
70/* Classifies an address.
71
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
74
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
77
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
80
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
83
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
86
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
89
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
92
93enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
101};
102
103struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
109};
110
48063b9d
IB
111struct simd_immediate_info
112{
113 rtx value;
114 int shift;
115 int element_width;
48063b9d 116 bool mvn;
e4f0f84d 117 bool msl;
48063b9d
IB
118};
119
43e9d192
IB
120/* The current code model. */
121enum aarch64_code_model aarch64_cmodel;
122
123#ifdef HAVE_AS_TLS
124#undef TARGET_HAVE_TLS
125#define TARGET_HAVE_TLS 1
126#endif
127
38e8f663 128static bool aarch64_lra_p (void);
43e9d192
IB
129static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 136static void aarch64_override_options_after_change (void);
43e9d192
IB
137static bool aarch64_vector_mode_supported_p (enum machine_mode);
138static unsigned bit_count (unsigned HOST_WIDE_INT);
139static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
141
88b08073
JG
142static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
144
43e9d192 145/* The processor for which instructions should be scheduled. */
02fdbd5b 146enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
147
148/* The current tuning set. */
149const struct tune_params *aarch64_tune_params;
150
151/* Mask to specify which instructions we are allowed to generate. */
152unsigned long aarch64_isa_flags = 0;
153
154/* Mask to specify which instruction scheduling options should be used. */
155unsigned long aarch64_tune_flags = 0;
156
157/* Tuning parameters. */
158
159#if HAVE_DESIGNATED_INITIALIZERS
160#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161#else
162#define NAMED_PARAM(NAME, VAL) (VAL)
163#endif
164
165#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166__extension__
167#endif
43e9d192
IB
168
169#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170__extension__
171#endif
172static const struct cpu_addrcost_table generic_addrcost_table =
173{
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
179};
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct cpu_regmove_cost generic_regmove_cost =
185{
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
193};
194
8990e73a
TB
195/* Generic costs for vector insn classes. */
196#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197__extension__
198#endif
199static const struct cpu_vector_cost generic_vector_cost =
200{
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213};
214
43e9d192
IB
215#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216__extension__
217#endif
218static const struct tune_params generic_tunings =
219{
73250c4c 220 &generic_extra_costs,
43e9d192
IB
221 &generic_addrcost_table,
222 &generic_regmove_cost,
8990e73a 223 &generic_vector_cost,
d126a4ae
AP
224 NAMED_PARAM (memmov_cost, 4),
225 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
226};
227
984239ad
KT
228static const struct tune_params cortexa53_tunings =
229{
230 &cortexa53_extra_costs,
231 &generic_addrcost_table,
232 &generic_regmove_cost,
233 &generic_vector_cost,
d126a4ae
AP
234 NAMED_PARAM (memmov_cost, 4),
235 NAMED_PARAM (issue_rate, 2)
984239ad
KT
236};
237
4fd92af6
KT
238static const struct tune_params cortexa57_tunings =
239{
240 &cortexa57_extra_costs,
241 &generic_addrcost_table,
242 &generic_regmove_cost,
243 &generic_vector_cost,
244 NAMED_PARAM (memmov_cost, 4),
245 NAMED_PARAM (issue_rate, 3)
246};
247
43e9d192
IB
248/* A processor implementing AArch64. */
249struct processor
250{
251 const char *const name;
252 enum aarch64_processor core;
253 const char *arch;
254 const unsigned long flags;
255 const struct tune_params *const tune;
256};
257
258/* Processor cores implementing AArch64. */
259static const struct processor all_cores[] =
260{
192ed1dd 261#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
262 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
263#include "aarch64-cores.def"
264#undef AARCH64_CORE
02fdbd5b 265 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
266 {NULL, aarch64_none, NULL, 0, NULL}
267};
268
269/* Architectures implementing AArch64. */
270static const struct processor all_architectures[] =
271{
272#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
273 {NAME, CORE, #ARCH, FLAGS, NULL},
274#include "aarch64-arches.def"
275#undef AARCH64_ARCH
43e9d192
IB
276 {NULL, aarch64_none, NULL, 0, NULL}
277};
278
279/* Target specification. These are populated as commandline arguments
280 are processed, or NULL if not specified. */
281static const struct processor *selected_arch;
282static const struct processor *selected_cpu;
283static const struct processor *selected_tune;
284
285#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
286
287/* An ISA extension in the co-processor and main instruction set space. */
288struct aarch64_option_extension
289{
290 const char *const name;
291 const unsigned long flags_on;
292 const unsigned long flags_off;
293};
294
295/* ISA extensions in AArch64. */
296static const struct aarch64_option_extension all_extensions[] =
297{
298#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
299 {NAME, FLAGS_ON, FLAGS_OFF},
300#include "aarch64-option-extensions.def"
301#undef AARCH64_OPT_EXTENSION
302 {NULL, 0, 0}
303};
304
305/* Used to track the size of an address when generating a pre/post
306 increment address. */
307static enum machine_mode aarch64_memory_reference_mode;
308
309/* Used to force GTY into this file. */
310static GTY(()) int gty_dummy;
311
312/* A table of valid AArch64 "bitmask immediate" values for
313 logical instructions. */
314
315#define AARCH64_NUM_BITMASKS 5334
316static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
317
318/* Did we set flag_omit_frame_pointer just so
319 aarch64_frame_pointer_required would be called? */
320static bool faked_omit_frame_pointer;
321
322typedef enum aarch64_cond_code
323{
324 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
325 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
326 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
327}
328aarch64_cc;
329
330#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
331
332/* The condition codes of the processor, and the inverse function. */
333static const char * const aarch64_condition_codes[] =
334{
335 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
336 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
337};
338
339/* Provide a mapping from gcc register numbers to dwarf register numbers. */
340unsigned
341aarch64_dbx_register_number (unsigned regno)
342{
343 if (GP_REGNUM_P (regno))
344 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
345 else if (regno == SP_REGNUM)
346 return AARCH64_DWARF_SP;
347 else if (FP_REGNUM_P (regno))
348 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
349
350 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
351 equivalent DWARF register. */
352 return DWARF_FRAME_REGISTERS;
353}
354
355/* Return TRUE if MODE is any of the large INT modes. */
356static bool
357aarch64_vect_struct_mode_p (enum machine_mode mode)
358{
359 return mode == OImode || mode == CImode || mode == XImode;
360}
361
362/* Return TRUE if MODE is any of the vector modes. */
363static bool
364aarch64_vector_mode_p (enum machine_mode mode)
365{
366 return aarch64_vector_mode_supported_p (mode)
367 || aarch64_vect_struct_mode_p (mode);
368}
369
370/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
371static bool
372aarch64_array_mode_supported_p (enum machine_mode mode,
373 unsigned HOST_WIDE_INT nelems)
374{
375 if (TARGET_SIMD
376 && AARCH64_VALID_SIMD_QREG_MODE (mode)
377 && (nelems >= 2 && nelems <= 4))
378 return true;
379
380 return false;
381}
382
383/* Implement HARD_REGNO_NREGS. */
384
385int
386aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
387{
388 switch (aarch64_regno_regclass (regno))
389 {
390 case FP_REGS:
391 case FP_LO_REGS:
392 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
393 default:
394 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
395 }
396 gcc_unreachable ();
397}
398
399/* Implement HARD_REGNO_MODE_OK. */
400
401int
402aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
403{
404 if (GET_MODE_CLASS (mode) == MODE_CC)
405 return regno == CC_REGNUM;
406
9259db42
YZ
407 if (regno == SP_REGNUM)
408 /* The purpose of comparing with ptr_mode is to support the
409 global register variable associated with the stack pointer
410 register via the syntax of asm ("wsp") in ILP32. */
411 return mode == Pmode || mode == ptr_mode;
412
413 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
414 return mode == Pmode;
415
416 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
417 return 1;
418
419 if (FP_REGNUM_P (regno))
420 {
421 if (aarch64_vect_struct_mode_p (mode))
422 return
423 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
424 else
425 return 1;
426 }
427
428 return 0;
429}
430
431/* Return true if calls to DECL should be treated as
432 long-calls (ie called via a register). */
433static bool
434aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
435{
436 return false;
437}
438
439/* Return true if calls to symbol-ref SYM should be treated as
440 long-calls (ie called via a register). */
441bool
442aarch64_is_long_call_p (rtx sym)
443{
444 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
445}
446
447/* Return true if the offsets to a zero/sign-extract operation
448 represent an expression that matches an extend operation. The
449 operands represent the paramters from
450
451 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
452bool
453aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
454 rtx extract_imm)
455{
456 HOST_WIDE_INT mult_val, extract_val;
457
458 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
459 return false;
460
461 mult_val = INTVAL (mult_imm);
462 extract_val = INTVAL (extract_imm);
463
464 if (extract_val > 8
465 && extract_val < GET_MODE_BITSIZE (mode)
466 && exact_log2 (extract_val & ~7) > 0
467 && (extract_val & 7) <= 4
468 && mult_val == (1 << (extract_val & 7)))
469 return true;
470
471 return false;
472}
473
474/* Emit an insn that's a simple single-set. Both the operands must be
475 known to be valid. */
476inline static rtx
477emit_set_insn (rtx x, rtx y)
478{
479 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
480}
481
482/* X and Y are two things to compare using CODE. Emit the compare insn and
483 return the rtx for register 0 in the proper mode. */
484rtx
485aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
486{
487 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
488 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
489
490 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
491 return cc_reg;
492}
493
494/* Build the SYMBOL_REF for __tls_get_addr. */
495
496static GTY(()) rtx tls_get_addr_libfunc;
497
498rtx
499aarch64_tls_get_addr (void)
500{
501 if (!tls_get_addr_libfunc)
502 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
503 return tls_get_addr_libfunc;
504}
505
506/* Return the TLS model to use for ADDR. */
507
508static enum tls_model
509tls_symbolic_operand_type (rtx addr)
510{
511 enum tls_model tls_kind = TLS_MODEL_NONE;
512 rtx sym, addend;
513
514 if (GET_CODE (addr) == CONST)
515 {
516 split_const (addr, &sym, &addend);
517 if (GET_CODE (sym) == SYMBOL_REF)
518 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
519 }
520 else if (GET_CODE (addr) == SYMBOL_REF)
521 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
522
523 return tls_kind;
524}
525
526/* We'll allow lo_sum's in addresses in our legitimate addresses
527 so that combine would take care of combining addresses where
528 necessary, but for generation purposes, we'll generate the address
529 as :
530 RTL Absolute
531 tmp = hi (symbol_ref); adrp x1, foo
532 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
533 nop
534
535 PIC TLS
536 adrp x1, :got:foo adrp tmp, :tlsgd:foo
537 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
538 bl __tls_get_addr
539 nop
540
541 Load TLS symbol, depending on TLS mechanism and TLS access model.
542
543 Global Dynamic - Traditional TLS:
544 adrp tmp, :tlsgd:imm
545 add dest, tmp, #:tlsgd_lo12:imm
546 bl __tls_get_addr
547
548 Global Dynamic - TLS Descriptors:
549 adrp dest, :tlsdesc:imm
550 ldr tmp, [dest, #:tlsdesc_lo12:imm]
551 add dest, dest, #:tlsdesc_lo12:imm
552 blr tmp
553 mrs tp, tpidr_el0
554 add dest, dest, tp
555
556 Initial Exec:
557 mrs tp, tpidr_el0
558 adrp tmp, :gottprel:imm
559 ldr dest, [tmp, #:gottprel_lo12:imm]
560 add dest, dest, tp
561
562 Local Exec:
563 mrs tp, tpidr_el0
564 add t0, tp, #:tprel_hi12:imm
565 add t0, #:tprel_lo12_nc:imm
566*/
567
568static void
569aarch64_load_symref_appropriately (rtx dest, rtx imm,
570 enum aarch64_symbol_type type)
571{
572 switch (type)
573 {
574 case SYMBOL_SMALL_ABSOLUTE:
575 {
28514dda 576 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 577 rtx tmp_reg = dest;
28514dda
YZ
578 enum machine_mode mode = GET_MODE (dest);
579
580 gcc_assert (mode == Pmode || mode == ptr_mode);
581
43e9d192 582 if (can_create_pseudo_p ())
28514dda 583 tmp_reg = gen_reg_rtx (mode);
43e9d192 584
28514dda 585 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
586 emit_insn (gen_add_losym (dest, tmp_reg, imm));
587 return;
588 }
589
a5350ddc
CSS
590 case SYMBOL_TINY_ABSOLUTE:
591 emit_insn (gen_rtx_SET (Pmode, dest, imm));
592 return;
593
43e9d192
IB
594 case SYMBOL_SMALL_GOT:
595 {
28514dda
YZ
596 /* In ILP32, the mode of dest can be either SImode or DImode,
597 while the got entry is always of SImode size. The mode of
598 dest depends on how dest is used: if dest is assigned to a
599 pointer (e.g. in the memory), it has SImode; it may have
600 DImode if dest is dereferenced to access the memeory.
601 This is why we have to handle three different ldr_got_small
602 patterns here (two patterns for ILP32). */
43e9d192 603 rtx tmp_reg = dest;
28514dda
YZ
604 enum machine_mode mode = GET_MODE (dest);
605
43e9d192 606 if (can_create_pseudo_p ())
28514dda
YZ
607 tmp_reg = gen_reg_rtx (mode);
608
609 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
610 if (mode == ptr_mode)
611 {
612 if (mode == DImode)
613 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
614 else
615 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
616 }
617 else
618 {
619 gcc_assert (mode == Pmode);
620 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
621 }
622
43e9d192
IB
623 return;
624 }
625
626 case SYMBOL_SMALL_TLSGD:
627 {
628 rtx insns;
629 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
630
631 start_sequence ();
632 emit_call_insn (gen_tlsgd_small (result, imm));
633 insns = get_insns ();
634 end_sequence ();
635
636 RTL_CONST_CALL_P (insns) = 1;
637 emit_libcall_block (insns, dest, result, imm);
638 return;
639 }
640
641 case SYMBOL_SMALL_TLSDESC:
642 {
643 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
644 rtx tp;
645
646 emit_insn (gen_tlsdesc_small (imm));
647 tp = aarch64_load_tp (NULL);
648 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
649 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
650 return;
651 }
652
653 case SYMBOL_SMALL_GOTTPREL:
654 {
655 rtx tmp_reg = gen_reg_rtx (Pmode);
656 rtx tp = aarch64_load_tp (NULL);
657 emit_insn (gen_tlsie_small (tmp_reg, imm));
658 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
659 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
660 return;
661 }
662
663 case SYMBOL_SMALL_TPREL:
664 {
665 rtx tp = aarch64_load_tp (NULL);
666 emit_insn (gen_tlsle_small (dest, tp, imm));
667 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
668 return;
669 }
670
87dd8ab0
MS
671 case SYMBOL_TINY_GOT:
672 emit_insn (gen_ldr_got_tiny (dest, imm));
673 return;
674
43e9d192
IB
675 default:
676 gcc_unreachable ();
677 }
678}
679
680/* Emit a move from SRC to DEST. Assume that the move expanders can
681 handle all moves if !can_create_pseudo_p (). The distinction is
682 important because, unlike emit_move_insn, the move expanders know
683 how to force Pmode objects into the constant pool even when the
684 constant pool address is not itself legitimate. */
685static rtx
686aarch64_emit_move (rtx dest, rtx src)
687{
688 return (can_create_pseudo_p ()
689 ? emit_move_insn (dest, src)
690 : emit_move_insn_1 (dest, src));
691}
692
030d03b8
RE
693/* Split a 128-bit move operation into two 64-bit move operations,
694 taking care to handle partial overlap of register to register
695 copies. Special cases are needed when moving between GP regs and
696 FP regs. SRC can be a register, constant or memory; DST a register
697 or memory. If either operand is memory it must not have any side
698 effects. */
43e9d192
IB
699void
700aarch64_split_128bit_move (rtx dst, rtx src)
701{
030d03b8
RE
702 rtx dst_lo, dst_hi;
703 rtx src_lo, src_hi;
43e9d192 704
030d03b8 705 enum machine_mode mode = GET_MODE (dst);
12dc6974 706
030d03b8
RE
707 gcc_assert (mode == TImode || mode == TFmode);
708 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
709 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
710
711 if (REG_P (dst) && REG_P (src))
712 {
030d03b8
RE
713 int src_regno = REGNO (src);
714 int dst_regno = REGNO (dst);
43e9d192 715
030d03b8 716 /* Handle FP <-> GP regs. */
43e9d192
IB
717 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
718 {
030d03b8
RE
719 src_lo = gen_lowpart (word_mode, src);
720 src_hi = gen_highpart (word_mode, src);
721
722 if (mode == TImode)
723 {
724 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
725 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
726 }
727 else
728 {
729 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
730 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
731 }
732 return;
43e9d192
IB
733 }
734 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
735 {
030d03b8
RE
736 dst_lo = gen_lowpart (word_mode, dst);
737 dst_hi = gen_highpart (word_mode, dst);
738
739 if (mode == TImode)
740 {
741 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
742 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
743 }
744 else
745 {
746 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
747 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
748 }
749 return;
43e9d192 750 }
43e9d192
IB
751 }
752
030d03b8
RE
753 dst_lo = gen_lowpart (word_mode, dst);
754 dst_hi = gen_highpart (word_mode, dst);
755 src_lo = gen_lowpart (word_mode, src);
756 src_hi = gen_highpart_mode (word_mode, mode, src);
757
758 /* At most one pairing may overlap. */
759 if (reg_overlap_mentioned_p (dst_lo, src_hi))
760 {
761 aarch64_emit_move (dst_hi, src_hi);
762 aarch64_emit_move (dst_lo, src_lo);
763 }
764 else
765 {
766 aarch64_emit_move (dst_lo, src_lo);
767 aarch64_emit_move (dst_hi, src_hi);
768 }
43e9d192
IB
769}
770
771bool
772aarch64_split_128bit_move_p (rtx dst, rtx src)
773{
774 return (! REG_P (src)
775 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
776}
777
8b033a8a
SN
778/* Split a complex SIMD combine. */
779
780void
781aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
782{
783 enum machine_mode src_mode = GET_MODE (src1);
784 enum machine_mode dst_mode = GET_MODE (dst);
785
786 gcc_assert (VECTOR_MODE_P (dst_mode));
787
788 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
789 {
790 rtx (*gen) (rtx, rtx, rtx);
791
792 switch (src_mode)
793 {
794 case V8QImode:
795 gen = gen_aarch64_simd_combinev8qi;
796 break;
797 case V4HImode:
798 gen = gen_aarch64_simd_combinev4hi;
799 break;
800 case V2SImode:
801 gen = gen_aarch64_simd_combinev2si;
802 break;
803 case V2SFmode:
804 gen = gen_aarch64_simd_combinev2sf;
805 break;
806 case DImode:
807 gen = gen_aarch64_simd_combinedi;
808 break;
809 case DFmode:
810 gen = gen_aarch64_simd_combinedf;
811 break;
812 default:
813 gcc_unreachable ();
814 }
815
816 emit_insn (gen (dst, src1, src2));
817 return;
818 }
819}
820
fd4842cd
SN
821/* Split a complex SIMD move. */
822
823void
824aarch64_split_simd_move (rtx dst, rtx src)
825{
826 enum machine_mode src_mode = GET_MODE (src);
827 enum machine_mode dst_mode = GET_MODE (dst);
828
829 gcc_assert (VECTOR_MODE_P (dst_mode));
830
831 if (REG_P (dst) && REG_P (src))
832 {
c59b7e28
SN
833 rtx (*gen) (rtx, rtx);
834
fd4842cd
SN
835 gcc_assert (VECTOR_MODE_P (src_mode));
836
837 switch (src_mode)
838 {
839 case V16QImode:
c59b7e28 840 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
841 break;
842 case V8HImode:
c59b7e28 843 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
844 break;
845 case V4SImode:
c59b7e28 846 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
847 break;
848 case V2DImode:
c59b7e28 849 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
850 break;
851 case V4SFmode:
c59b7e28 852 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
853 break;
854 case V2DFmode:
c59b7e28 855 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
856 break;
857 default:
858 gcc_unreachable ();
859 }
c59b7e28
SN
860
861 emit_insn (gen (dst, src));
fd4842cd
SN
862 return;
863 }
864}
865
43e9d192 866static rtx
e18b4a81 867aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
868{
869 if (can_create_pseudo_p ())
e18b4a81 870 return force_reg (mode, value);
43e9d192
IB
871 else
872 {
873 x = aarch64_emit_move (x, value);
874 return x;
875 }
876}
877
878
879static rtx
880aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
881{
9c023bf0 882 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
883 {
884 rtx high;
885 /* Load the full offset into a register. This
886 might be improvable in the future. */
887 high = GEN_INT (offset);
888 offset = 0;
e18b4a81
YZ
889 high = aarch64_force_temporary (mode, temp, high);
890 reg = aarch64_force_temporary (mode, temp,
891 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
892 }
893 return plus_constant (mode, reg, offset);
894}
895
896void
897aarch64_expand_mov_immediate (rtx dest, rtx imm)
898{
899 enum machine_mode mode = GET_MODE (dest);
900 unsigned HOST_WIDE_INT mask;
901 int i;
902 bool first;
903 unsigned HOST_WIDE_INT val;
904 bool subtargets;
905 rtx subtarget;
906 int one_match, zero_match;
907
908 gcc_assert (mode == SImode || mode == DImode);
909
910 /* Check on what type of symbol it is. */
911 if (GET_CODE (imm) == SYMBOL_REF
912 || GET_CODE (imm) == LABEL_REF
913 || GET_CODE (imm) == CONST)
914 {
915 rtx mem, base, offset;
916 enum aarch64_symbol_type sty;
917
918 /* If we have (const (plus symbol offset)), separate out the offset
919 before we start classifying the symbol. */
920 split_const (imm, &base, &offset);
921
922 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
923 switch (sty)
924 {
925 case SYMBOL_FORCE_TO_MEM:
926 if (offset != const0_rtx
927 && targetm.cannot_force_const_mem (mode, imm))
928 {
aef66c94 929 gcc_assert (can_create_pseudo_p ());
e18b4a81 930 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
931 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
932 aarch64_emit_move (dest, base);
933 return;
934 }
28514dda 935 mem = force_const_mem (ptr_mode, imm);
43e9d192 936 gcc_assert (mem);
28514dda
YZ
937 if (mode != ptr_mode)
938 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
939 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
940 return;
941
942 case SYMBOL_SMALL_TLSGD:
943 case SYMBOL_SMALL_TLSDESC:
944 case SYMBOL_SMALL_GOTTPREL:
945 case SYMBOL_SMALL_GOT:
87dd8ab0 946 case SYMBOL_TINY_GOT:
43e9d192
IB
947 if (offset != const0_rtx)
948 {
949 gcc_assert(can_create_pseudo_p ());
e18b4a81 950 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
951 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
952 aarch64_emit_move (dest, base);
953 return;
954 }
955 /* FALLTHRU */
956
957 case SYMBOL_SMALL_TPREL:
958 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 959 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
960 aarch64_load_symref_appropriately (dest, imm, sty);
961 return;
962
963 default:
964 gcc_unreachable ();
965 }
966 }
967
968 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
969 {
970 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
971 return;
972 }
973
974 if (!CONST_INT_P (imm))
975 {
976 if (GET_CODE (imm) == HIGH)
977 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
978 else
979 {
980 rtx mem = force_const_mem (mode, imm);
981 gcc_assert (mem);
982 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
983 }
984
985 return;
986 }
987
988 if (mode == SImode)
989 {
990 /* We know we can't do this in 1 insn, and we must be able to do it
991 in two; so don't mess around looking for sequences that don't buy
992 us anything. */
993 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
994 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
995 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
996 return;
997 }
998
999 /* Remaining cases are all for DImode. */
1000
1001 val = INTVAL (imm);
1002 subtargets = optimize && can_create_pseudo_p ();
1003
1004 one_match = 0;
1005 zero_match = 0;
1006 mask = 0xffff;
1007
1008 for (i = 0; i < 64; i += 16, mask <<= 16)
1009 {
1010 if ((val & mask) == 0)
1011 zero_match++;
1012 else if ((val & mask) == mask)
1013 one_match++;
1014 }
1015
1016 if (one_match == 2)
1017 {
1018 mask = 0xffff;
1019 for (i = 0; i < 64; i += 16, mask <<= 16)
1020 {
1021 if ((val & mask) != mask)
1022 {
1023 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1024 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1025 GEN_INT ((val >> i) & 0xffff)));
1026 return;
1027 }
1028 }
1029 gcc_unreachable ();
1030 }
1031
1032 if (zero_match == 2)
1033 goto simple_sequence;
1034
1035 mask = 0x0ffff0000UL;
1036 for (i = 16; i < 64; i += 16, mask <<= 16)
1037 {
1038 HOST_WIDE_INT comp = mask & ~(mask - 1);
1039
1040 if (aarch64_uimm12_shift (val - (val & mask)))
1041 {
1042 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1043
1044 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1045 emit_insn (gen_adddi3 (dest, subtarget,
1046 GEN_INT (val - (val & mask))));
1047 return;
1048 }
1049 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1050 {
1051 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1052
1053 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1054 GEN_INT ((val + comp) & mask)));
1055 emit_insn (gen_adddi3 (dest, subtarget,
1056 GEN_INT (val - ((val + comp) & mask))));
1057 return;
1058 }
1059 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1060 {
1061 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1062
1063 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1064 GEN_INT ((val - comp) | ~mask)));
1065 emit_insn (gen_adddi3 (dest, subtarget,
1066 GEN_INT (val - ((val - comp) | ~mask))));
1067 return;
1068 }
1069 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1070 {
1071 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1072
1073 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1074 GEN_INT (val | ~mask)));
1075 emit_insn (gen_adddi3 (dest, subtarget,
1076 GEN_INT (val - (val | ~mask))));
1077 return;
1078 }
1079 }
1080
1081 /* See if we can do it by arithmetically combining two
1082 immediates. */
1083 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1084 {
1085 int j;
1086 mask = 0xffff;
1087
1088 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1089 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1090 {
1091 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1092 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1093 GEN_INT (aarch64_bitmasks[i])));
1094 emit_insn (gen_adddi3 (dest, subtarget,
1095 GEN_INT (val - aarch64_bitmasks[i])));
1096 return;
1097 }
1098
1099 for (j = 0; j < 64; j += 16, mask <<= 16)
1100 {
1101 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1102 {
1103 emit_insn (gen_rtx_SET (VOIDmode, dest,
1104 GEN_INT (aarch64_bitmasks[i])));
1105 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1106 GEN_INT ((val >> j) & 0xffff)));
1107 return;
1108 }
1109 }
1110 }
1111
1112 /* See if we can do it by logically combining two immediates. */
1113 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1114 {
1115 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1116 {
1117 int j;
1118
1119 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1120 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1121 {
1122 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1123 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1124 GEN_INT (aarch64_bitmasks[i])));
1125 emit_insn (gen_iordi3 (dest, subtarget,
1126 GEN_INT (aarch64_bitmasks[j])));
1127 return;
1128 }
1129 }
1130 else if ((val & aarch64_bitmasks[i]) == val)
1131 {
1132 int j;
1133
1134 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1135 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1136 {
1137
1138 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1139 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1140 GEN_INT (aarch64_bitmasks[j])));
1141 emit_insn (gen_anddi3 (dest, subtarget,
1142 GEN_INT (aarch64_bitmasks[i])));
1143 return;
1144 }
1145 }
1146 }
1147
1148 simple_sequence:
1149 first = true;
1150 mask = 0xffff;
1151 for (i = 0; i < 64; i += 16, mask <<= 16)
1152 {
1153 if ((val & mask) != 0)
1154 {
1155 if (first)
1156 {
1157 emit_insn (gen_rtx_SET (VOIDmode, dest,
1158 GEN_INT (val & mask)));
1159 first = false;
1160 }
1161 else
1162 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1163 GEN_INT ((val >> i) & 0xffff)));
1164 }
1165 }
1166}
1167
1168static bool
1169aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1170{
1171 /* Indirect calls are not currently supported. */
1172 if (decl == NULL)
1173 return false;
1174
1175 /* Cannot tail-call to long-calls, since these are outside of the
1176 range of a branch instruction (we could handle this if we added
1177 support for indirect tail-calls. */
1178 if (aarch64_decl_is_long_call_p (decl))
1179 return false;
1180
1181 return true;
1182}
1183
1184/* Implement TARGET_PASS_BY_REFERENCE. */
1185
1186static bool
1187aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1188 enum machine_mode mode,
1189 const_tree type,
1190 bool named ATTRIBUTE_UNUSED)
1191{
1192 HOST_WIDE_INT size;
1193 enum machine_mode dummymode;
1194 int nregs;
1195
1196 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1197 size = (mode == BLKmode && type)
1198 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1199
1200 if (type)
1201 {
1202 /* Arrays always passed by reference. */
1203 if (TREE_CODE (type) == ARRAY_TYPE)
1204 return true;
1205 /* Other aggregates based on their size. */
1206 if (AGGREGATE_TYPE_P (type))
1207 size = int_size_in_bytes (type);
1208 }
1209
1210 /* Variable sized arguments are always returned by reference. */
1211 if (size < 0)
1212 return true;
1213
1214 /* Can this be a candidate to be passed in fp/simd register(s)? */
1215 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1216 &dummymode, &nregs,
1217 NULL))
1218 return false;
1219
1220 /* Arguments which are variable sized or larger than 2 registers are
1221 passed by reference unless they are a homogenous floating point
1222 aggregate. */
1223 return size > 2 * UNITS_PER_WORD;
1224}
1225
1226/* Return TRUE if VALTYPE is padded to its least significant bits. */
1227static bool
1228aarch64_return_in_msb (const_tree valtype)
1229{
1230 enum machine_mode dummy_mode;
1231 int dummy_int;
1232
1233 /* Never happens in little-endian mode. */
1234 if (!BYTES_BIG_ENDIAN)
1235 return false;
1236
1237 /* Only composite types smaller than or equal to 16 bytes can
1238 be potentially returned in registers. */
1239 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1240 || int_size_in_bytes (valtype) <= 0
1241 || int_size_in_bytes (valtype) > 16)
1242 return false;
1243
1244 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1245 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1246 is always passed/returned in the least significant bits of fp/simd
1247 register(s). */
1248 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1249 &dummy_mode, &dummy_int, NULL))
1250 return false;
1251
1252 return true;
1253}
1254
1255/* Implement TARGET_FUNCTION_VALUE.
1256 Define how to find the value returned by a function. */
1257
1258static rtx
1259aarch64_function_value (const_tree type, const_tree func,
1260 bool outgoing ATTRIBUTE_UNUSED)
1261{
1262 enum machine_mode mode;
1263 int unsignedp;
1264 int count;
1265 enum machine_mode ag_mode;
1266
1267 mode = TYPE_MODE (type);
1268 if (INTEGRAL_TYPE_P (type))
1269 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1270
1271 if (aarch64_return_in_msb (type))
1272 {
1273 HOST_WIDE_INT size = int_size_in_bytes (type);
1274
1275 if (size % UNITS_PER_WORD != 0)
1276 {
1277 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1278 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1279 }
1280 }
1281
1282 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1283 &ag_mode, &count, NULL))
1284 {
1285 if (!aarch64_composite_type_p (type, mode))
1286 {
1287 gcc_assert (count == 1 && mode == ag_mode);
1288 return gen_rtx_REG (mode, V0_REGNUM);
1289 }
1290 else
1291 {
1292 int i;
1293 rtx par;
1294
1295 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1296 for (i = 0; i < count; i++)
1297 {
1298 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1299 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1300 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1301 XVECEXP (par, 0, i) = tmp;
1302 }
1303 return par;
1304 }
1305 }
1306 else
1307 return gen_rtx_REG (mode, R0_REGNUM);
1308}
1309
1310/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1311 Return true if REGNO is the number of a hard register in which the values
1312 of called function may come back. */
1313
1314static bool
1315aarch64_function_value_regno_p (const unsigned int regno)
1316{
1317 /* Maximum of 16 bytes can be returned in the general registers. Examples
1318 of 16-byte return values are: 128-bit integers and 16-byte small
1319 structures (excluding homogeneous floating-point aggregates). */
1320 if (regno == R0_REGNUM || regno == R1_REGNUM)
1321 return true;
1322
1323 /* Up to four fp/simd registers can return a function value, e.g. a
1324 homogeneous floating-point aggregate having four members. */
1325 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1326 return !TARGET_GENERAL_REGS_ONLY;
1327
1328 return false;
1329}
1330
1331/* Implement TARGET_RETURN_IN_MEMORY.
1332
1333 If the type T of the result of a function is such that
1334 void func (T arg)
1335 would require that arg be passed as a value in a register (or set of
1336 registers) according to the parameter passing rules, then the result
1337 is returned in the same registers as would be used for such an
1338 argument. */
1339
1340static bool
1341aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1342{
1343 HOST_WIDE_INT size;
1344 enum machine_mode ag_mode;
1345 int count;
1346
1347 if (!AGGREGATE_TYPE_P (type)
1348 && TREE_CODE (type) != COMPLEX_TYPE
1349 && TREE_CODE (type) != VECTOR_TYPE)
1350 /* Simple scalar types always returned in registers. */
1351 return false;
1352
1353 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1354 type,
1355 &ag_mode,
1356 &count,
1357 NULL))
1358 return false;
1359
1360 /* Types larger than 2 registers returned in memory. */
1361 size = int_size_in_bytes (type);
1362 return (size < 0 || size > 2 * UNITS_PER_WORD);
1363}
1364
1365static bool
1366aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1367 const_tree type, int *nregs)
1368{
1369 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1370 return aarch64_vfp_is_call_or_return_candidate (mode,
1371 type,
1372 &pcum->aapcs_vfp_rmode,
1373 nregs,
1374 NULL);
1375}
1376
1377/* Given MODE and TYPE of a function argument, return the alignment in
1378 bits. The idea is to suppress any stronger alignment requested by
1379 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1380 This is a helper function for local use only. */
1381
1382static unsigned int
1383aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1384{
1385 unsigned int alignment;
1386
1387 if (type)
1388 {
1389 if (!integer_zerop (TYPE_SIZE (type)))
1390 {
1391 if (TYPE_MODE (type) == mode)
1392 alignment = TYPE_ALIGN (type);
1393 else
1394 alignment = GET_MODE_ALIGNMENT (mode);
1395 }
1396 else
1397 alignment = 0;
1398 }
1399 else
1400 alignment = GET_MODE_ALIGNMENT (mode);
1401
1402 return alignment;
1403}
1404
1405/* Layout a function argument according to the AAPCS64 rules. The rule
1406 numbers refer to the rule numbers in the AAPCS64. */
1407
1408static void
1409aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1410 const_tree type,
1411 bool named ATTRIBUTE_UNUSED)
1412{
1413 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1414 int ncrn, nvrn, nregs;
1415 bool allocate_ncrn, allocate_nvrn;
1416
1417 /* We need to do this once per argument. */
1418 if (pcum->aapcs_arg_processed)
1419 return;
1420
1421 pcum->aapcs_arg_processed = true;
1422
1423 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1424 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1425 mode,
1426 type,
1427 &nregs);
1428
1429 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1430 The following code thus handles passing by SIMD/FP registers first. */
1431
1432 nvrn = pcum->aapcs_nvrn;
1433
1434 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1435 and homogenous short-vector aggregates (HVA). */
1436 if (allocate_nvrn)
1437 {
1438 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1439 {
1440 pcum->aapcs_nextnvrn = nvrn + nregs;
1441 if (!aarch64_composite_type_p (type, mode))
1442 {
1443 gcc_assert (nregs == 1);
1444 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1445 }
1446 else
1447 {
1448 rtx par;
1449 int i;
1450 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1451 for (i = 0; i < nregs; i++)
1452 {
1453 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1454 V0_REGNUM + nvrn + i);
1455 tmp = gen_rtx_EXPR_LIST
1456 (VOIDmode, tmp,
1457 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1458 XVECEXP (par, 0, i) = tmp;
1459 }
1460 pcum->aapcs_reg = par;
1461 }
1462 return;
1463 }
1464 else
1465 {
1466 /* C.3 NSRN is set to 8. */
1467 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1468 goto on_stack;
1469 }
1470 }
1471
1472 ncrn = pcum->aapcs_ncrn;
1473 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1474 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1475
1476
1477 /* C6 - C9. though the sign and zero extension semantics are
1478 handled elsewhere. This is the case where the argument fits
1479 entirely general registers. */
1480 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1481 {
1482 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1483
1484 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1485
1486 /* C.8 if the argument has an alignment of 16 then the NGRN is
1487 rounded up to the next even number. */
1488 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1489 {
1490 ++ncrn;
1491 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1492 }
1493 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1494 A reg is still generated for it, but the caller should be smart
1495 enough not to use it. */
1496 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1497 {
1498 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1499 }
1500 else
1501 {
1502 rtx par;
1503 int i;
1504
1505 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1506 for (i = 0; i < nregs; i++)
1507 {
1508 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1509 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1510 GEN_INT (i * UNITS_PER_WORD));
1511 XVECEXP (par, 0, i) = tmp;
1512 }
1513 pcum->aapcs_reg = par;
1514 }
1515
1516 pcum->aapcs_nextncrn = ncrn + nregs;
1517 return;
1518 }
1519
1520 /* C.11 */
1521 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1522
1523 /* The argument is passed on stack; record the needed number of words for
1524 this argument (we can re-use NREGS) and align the total size if
1525 necessary. */
1526on_stack:
1527 pcum->aapcs_stack_words = nregs;
1528 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1529 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1530 16 / UNITS_PER_WORD) + 1;
1531 return;
1532}
1533
1534/* Implement TARGET_FUNCTION_ARG. */
1535
1536static rtx
1537aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1538 const_tree type, bool named)
1539{
1540 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1541 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1542
1543 if (mode == VOIDmode)
1544 return NULL_RTX;
1545
1546 aarch64_layout_arg (pcum_v, mode, type, named);
1547 return pcum->aapcs_reg;
1548}
1549
1550void
1551aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1552 const_tree fntype ATTRIBUTE_UNUSED,
1553 rtx libname ATTRIBUTE_UNUSED,
1554 const_tree fndecl ATTRIBUTE_UNUSED,
1555 unsigned n_named ATTRIBUTE_UNUSED)
1556{
1557 pcum->aapcs_ncrn = 0;
1558 pcum->aapcs_nvrn = 0;
1559 pcum->aapcs_nextncrn = 0;
1560 pcum->aapcs_nextnvrn = 0;
1561 pcum->pcs_variant = ARM_PCS_AAPCS64;
1562 pcum->aapcs_reg = NULL_RTX;
1563 pcum->aapcs_arg_processed = false;
1564 pcum->aapcs_stack_words = 0;
1565 pcum->aapcs_stack_size = 0;
1566
1567 return;
1568}
1569
1570static void
1571aarch64_function_arg_advance (cumulative_args_t pcum_v,
1572 enum machine_mode mode,
1573 const_tree type,
1574 bool named)
1575{
1576 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1577 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1578 {
1579 aarch64_layout_arg (pcum_v, mode, type, named);
1580 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1581 != (pcum->aapcs_stack_words != 0));
1582 pcum->aapcs_arg_processed = false;
1583 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1584 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1585 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1586 pcum->aapcs_stack_words = 0;
1587 pcum->aapcs_reg = NULL_RTX;
1588 }
1589}
1590
1591bool
1592aarch64_function_arg_regno_p (unsigned regno)
1593{
1594 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1595 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1596}
1597
1598/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1599 PARM_BOUNDARY bits of alignment, but will be given anything up
1600 to STACK_BOUNDARY bits if the type requires it. This makes sure
1601 that both before and after the layout of each argument, the Next
1602 Stacked Argument Address (NSAA) will have a minimum alignment of
1603 8 bytes. */
1604
1605static unsigned int
1606aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1607{
1608 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1609
1610 if (alignment < PARM_BOUNDARY)
1611 alignment = PARM_BOUNDARY;
1612 if (alignment > STACK_BOUNDARY)
1613 alignment = STACK_BOUNDARY;
1614 return alignment;
1615}
1616
1617/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1618
1619 Return true if an argument passed on the stack should be padded upwards,
1620 i.e. if the least-significant byte of the stack slot has useful data.
1621
1622 Small aggregate types are placed in the lowest memory address.
1623
1624 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1625
1626bool
1627aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1628{
1629 /* On little-endian targets, the least significant byte of every stack
1630 argument is passed at the lowest byte address of the stack slot. */
1631 if (!BYTES_BIG_ENDIAN)
1632 return true;
1633
00edcfbe 1634 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1635 the least significant byte of a stack argument is passed at the highest
1636 byte address of the stack slot. */
1637 if (type
00edcfbe
YZ
1638 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1639 || POINTER_TYPE_P (type))
43e9d192
IB
1640 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1641 return false;
1642
1643 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1644 return true;
1645}
1646
1647/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1648
1649 It specifies padding for the last (may also be the only)
1650 element of a block move between registers and memory. If
1651 assuming the block is in the memory, padding upward means that
1652 the last element is padded after its highest significant byte,
1653 while in downward padding, the last element is padded at the
1654 its least significant byte side.
1655
1656 Small aggregates and small complex types are always padded
1657 upwards.
1658
1659 We don't need to worry about homogeneous floating-point or
1660 short-vector aggregates; their move is not affected by the
1661 padding direction determined here. Regardless of endianness,
1662 each element of such an aggregate is put in the least
1663 significant bits of a fp/simd register.
1664
1665 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1666 register has useful data, and return the opposite if the most
1667 significant byte does. */
1668
1669bool
1670aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1671 bool first ATTRIBUTE_UNUSED)
1672{
1673
1674 /* Small composite types are always padded upward. */
1675 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1676 {
1677 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1678 : GET_MODE_SIZE (mode));
1679 if (size < 2 * UNITS_PER_WORD)
1680 return true;
1681 }
1682
1683 /* Otherwise, use the default padding. */
1684 return !BYTES_BIG_ENDIAN;
1685}
1686
1687static enum machine_mode
1688aarch64_libgcc_cmp_return_mode (void)
1689{
1690 return SImode;
1691}
1692
1693static bool
1694aarch64_frame_pointer_required (void)
1695{
1696 /* If the function contains dynamic stack allocations, we need to
1697 use the frame pointer to access the static parts of the frame. */
1698 if (cfun->calls_alloca)
1699 return true;
1700
1701 /* We may have turned flag_omit_frame_pointer on in order to have this
1702 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1703 and we'll check it here.
1704 If we really did set flag_omit_frame_pointer normally, then we return false
1705 (no frame pointer required) in all cases. */
1706
1707 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1708 return false;
1709 else if (flag_omit_leaf_frame_pointer)
77436791 1710 return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
43e9d192
IB
1711 return true;
1712}
1713
1714/* Mark the registers that need to be saved by the callee and calculate
1715 the size of the callee-saved registers area and frame record (both FP
1716 and LR may be omitted). */
1717static void
1718aarch64_layout_frame (void)
1719{
1720 HOST_WIDE_INT offset = 0;
1721 int regno;
1722
1723 if (reload_completed && cfun->machine->frame.laid_out)
1724 return;
1725
1726 cfun->machine->frame.fp_lr_offset = 0;
1727
1728 /* First mark all the registers that really need to be saved... */
1729 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1730 cfun->machine->frame.reg_offset[regno] = -1;
1731
1732 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1733 cfun->machine->frame.reg_offset[regno] = -1;
1734
1735 /* ... that includes the eh data registers (if needed)... */
1736 if (crtl->calls_eh_return)
1737 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1738 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1739
1740 /* ... and any callee saved register that dataflow says is live. */
1741 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1742 if (df_regs_ever_live_p (regno)
1743 && !call_used_regs[regno])
1744 cfun->machine->frame.reg_offset[regno] = 0;
1745
1746 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1747 if (df_regs_ever_live_p (regno)
1748 && !call_used_regs[regno])
1749 cfun->machine->frame.reg_offset[regno] = 0;
1750
1751 if (frame_pointer_needed)
1752 {
1753 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1754 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1755 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1756 }
1757
1758 /* Now assign stack slots for them. */
1759 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1760 if (cfun->machine->frame.reg_offset[regno] != -1)
1761 {
1762 cfun->machine->frame.reg_offset[regno] = offset;
1763 offset += UNITS_PER_WORD;
1764 }
1765
1766 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1767 if (cfun->machine->frame.reg_offset[regno] != -1)
1768 {
1769 cfun->machine->frame.reg_offset[regno] = offset;
1770 offset += UNITS_PER_WORD;
1771 }
1772
1773 if (frame_pointer_needed)
1774 {
1775 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1776 offset += UNITS_PER_WORD;
1777 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1778 }
1779
1780 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1781 {
1782 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1783 offset += UNITS_PER_WORD;
1784 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1785 }
1786
1787 cfun->machine->frame.padding0 =
1788 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1789 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1790
1791 cfun->machine->frame.saved_regs_size = offset;
1792 cfun->machine->frame.laid_out = true;
1793}
1794
1795/* Make the last instruction frame-related and note that it performs
1796 the operation described by FRAME_PATTERN. */
1797
1798static void
1799aarch64_set_frame_expr (rtx frame_pattern)
1800{
1801 rtx insn;
1802
1803 insn = get_last_insn ();
1804 RTX_FRAME_RELATED_P (insn) = 1;
1805 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1806 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1807 frame_pattern,
1808 REG_NOTES (insn));
1809}
1810
1811static bool
1812aarch64_register_saved_on_entry (int regno)
1813{
1814 return cfun->machine->frame.reg_offset[regno] != -1;
1815}
1816
1817
1818static void
1819aarch64_save_or_restore_fprs (int start_offset, int increment,
1820 bool restore, rtx base_rtx)
1821
1822{
1823 unsigned regno;
1824 unsigned regno2;
1825 rtx insn;
e0f396bc
MS
1826 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1827 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1828
1829
1830 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1831 {
1832 if (aarch64_register_saved_on_entry (regno))
1833 {
1834 rtx mem;
1835 mem = gen_mem_ref (DFmode,
1836 plus_constant (Pmode,
1837 base_rtx,
1838 start_offset));
1839
1840 for (regno2 = regno + 1;
1841 regno2 <= V31_REGNUM
1842 && !aarch64_register_saved_on_entry (regno2);
1843 regno2++)
1844 {
1845 /* Empty loop. */
1846 }
1847 if (regno2 <= V31_REGNUM &&
1848 aarch64_register_saved_on_entry (regno2))
1849 {
1850 rtx mem2;
1851 /* Next highest register to be saved. */
1852 mem2 = gen_mem_ref (DFmode,
1853 plus_constant
1854 (Pmode,
1855 base_rtx,
1856 start_offset + increment));
1857 if (restore == false)
1858 {
1859 insn = emit_insn
1860 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1861 mem2, gen_rtx_REG (DFmode, regno2)));
1862
1863 }
1864 else
1865 {
1866 insn = emit_insn
1867 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1868 gen_rtx_REG (DFmode, regno2), mem2));
1869
e0f396bc
MS
1870 add_reg_note (insn, REG_CFA_RESTORE,
1871 gen_rtx_REG (DFmode, regno));
1872 add_reg_note (insn, REG_CFA_RESTORE,
1873 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1874 }
1875
1876 /* The first part of a frame-related parallel insn
1877 is always assumed to be relevant to the frame
1878 calculations; subsequent parts, are only
1879 frame-related if explicitly marked. */
e0f396bc 1880 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1881 regno = regno2;
1882 start_offset += increment * 2;
1883 }
1884 else
1885 {
1886 if (restore == false)
1887 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1888 else
1889 {
1890 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1891 add_reg_note (insn, REG_CFA_RESTORE,
1892 gen_rtx_REG (DImode, regno));
43e9d192
IB
1893 }
1894 start_offset += increment;
1895 }
1896 RTX_FRAME_RELATED_P (insn) = 1;
1897 }
1898 }
1899
1900}
1901
1902
1903/* offset from the stack pointer of where the saves and
1904 restore's have to happen. */
1905static void
1906aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1907 bool restore)
1908{
1909 rtx insn;
1910 rtx base_rtx = stack_pointer_rtx;
1911 HOST_WIDE_INT start_offset = offset;
1912 HOST_WIDE_INT increment = UNITS_PER_WORD;
1913 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1914 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1915 unsigned regno;
1916 unsigned regno2;
1917
1918 for (regno = R0_REGNUM; regno <= limit; regno++)
1919 {
1920 if (aarch64_register_saved_on_entry (regno))
1921 {
1922 rtx mem;
1923 mem = gen_mem_ref (Pmode,
1924 plus_constant (Pmode,
1925 base_rtx,
1926 start_offset));
1927
1928 for (regno2 = regno + 1;
1929 regno2 <= limit
1930 && !aarch64_register_saved_on_entry (regno2);
1931 regno2++)
1932 {
1933 /* Empty loop. */
1934 }
1935 if (regno2 <= limit &&
1936 aarch64_register_saved_on_entry (regno2))
1937 {
1938 rtx mem2;
1939 /* Next highest register to be saved. */
1940 mem2 = gen_mem_ref (Pmode,
1941 plus_constant
1942 (Pmode,
1943 base_rtx,
1944 start_offset + increment));
1945 if (restore == false)
1946 {
1947 insn = emit_insn
1948 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1949 mem2, gen_rtx_REG (DImode, regno2)));
1950
1951 }
1952 else
1953 {
1954 insn = emit_insn
1955 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1956 gen_rtx_REG (DImode, regno2), mem2));
1957
1958 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1959 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1960 }
1961
1962 /* The first part of a frame-related parallel insn
1963 is always assumed to be relevant to the frame
1964 calculations; subsequent parts, are only
1965 frame-related if explicitly marked. */
1966 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1967 1)) = 1;
1968 regno = regno2;
1969 start_offset += increment * 2;
1970 }
1971 else
1972 {
1973 if (restore == false)
1974 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1975 else
1976 {
1977 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1978 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1979 }
1980 start_offset += increment;
1981 }
1982 RTX_FRAME_RELATED_P (insn) = 1;
1983 }
1984 }
1985
1986 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1987
1988}
1989
1990/* AArch64 stack frames generated by this compiler look like:
1991
1992 +-------------------------------+
1993 | |
1994 | incoming stack arguments |
1995 | |
1996 +-------------------------------+ <-- arg_pointer_rtx
1997 | |
1998 | callee-allocated save area |
1999 | for register varargs |
2000 | |
2001 +-------------------------------+
2002 | |
2003 | local variables |
2004 | |
2005 +-------------------------------+ <-- frame_pointer_rtx
2006 | |
2007 | callee-saved registers |
2008 | |
2009 +-------------------------------+
2010 | LR' |
2011 +-------------------------------+
2012 | FP' |
2013 P +-------------------------------+ <-- hard_frame_pointer_rtx
2014 | dynamic allocation |
2015 +-------------------------------+
2016 | |
2017 | outgoing stack arguments |
2018 | |
2019 +-------------------------------+ <-- stack_pointer_rtx
2020
2021 Dynamic stack allocations such as alloca insert data at point P.
2022 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2023 hard_frame_pointer_rtx unchanged. */
2024
2025/* Generate the prologue instructions for entry into a function.
2026 Establish the stack frame by decreasing the stack pointer with a
2027 properly calculated size and, if necessary, create a frame record
2028 filled with the values of LR and previous frame pointer. The
6991c977 2029 current FP is also set up if it is in use. */
43e9d192
IB
2030
2031void
2032aarch64_expand_prologue (void)
2033{
2034 /* sub sp, sp, #<frame_size>
2035 stp {fp, lr}, [sp, #<frame_size> - 16]
2036 add fp, sp, #<frame_size> - hardfp_offset
2037 stp {cs_reg}, [fp, #-16] etc.
2038
2039 sub sp, sp, <final_adjustment_if_any>
2040 */
2041 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2042 HOST_WIDE_INT frame_size, offset;
2043 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2044 rtx insn;
2045
2046 aarch64_layout_frame ();
2047 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2048 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2049 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2050 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2051 + crtl->outgoing_args_size);
2052 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2053 STACK_BOUNDARY / BITS_PER_UNIT);
2054
2055 if (flag_stack_usage_info)
2056 current_function_static_stack_size = frame_size;
2057
2058 fp_offset = (offset
2059 - original_frame_size
2060 - cfun->machine->frame.saved_regs_size);
2061
44c0e7b9 2062 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2063 if (offset >= 512)
2064 {
2065 /* When the frame has a large size, an initial decrease is done on
2066 the stack pointer to jump over the callee-allocated save area for
2067 register varargs, the local variable area and/or the callee-saved
2068 register area. This will allow the pre-index write-back
2069 store pair instructions to be used for setting up the stack frame
2070 efficiently. */
2071 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2072 if (offset >= 512)
2073 offset = cfun->machine->frame.saved_regs_size;
2074
2075 frame_size -= (offset + crtl->outgoing_args_size);
2076 fp_offset = 0;
2077
2078 if (frame_size >= 0x1000000)
2079 {
2080 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2081 emit_move_insn (op0, GEN_INT (-frame_size));
2082 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2083 aarch64_set_frame_expr (gen_rtx_SET
2084 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2085 plus_constant (Pmode,
2086 stack_pointer_rtx,
2087 -frame_size)));
43e9d192
IB
2088 }
2089 else if (frame_size > 0)
2090 {
2091 if ((frame_size & 0xfff) != frame_size)
2092 {
2093 insn = emit_insn (gen_add2_insn
2094 (stack_pointer_rtx,
2095 GEN_INT (-(frame_size
2096 & ~(HOST_WIDE_INT)0xfff))));
2097 RTX_FRAME_RELATED_P (insn) = 1;
2098 }
2099 if ((frame_size & 0xfff) != 0)
2100 {
2101 insn = emit_insn (gen_add2_insn
2102 (stack_pointer_rtx,
2103 GEN_INT (-(frame_size
2104 & (HOST_WIDE_INT)0xfff))));
2105 RTX_FRAME_RELATED_P (insn) = 1;
2106 }
2107 }
2108 }
2109 else
2110 frame_size = -1;
2111
2112 if (offset > 0)
2113 {
2114 /* Save the frame pointer and lr if the frame pointer is needed
2115 first. Make the frame pointer point to the location of the
2116 old frame pointer on the stack. */
2117 if (frame_pointer_needed)
2118 {
2119 rtx mem_fp, mem_lr;
2120
2121 if (fp_offset)
2122 {
2123 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2124 GEN_INT (-offset)));
2125 RTX_FRAME_RELATED_P (insn) = 1;
2126 aarch64_set_frame_expr (gen_rtx_SET
2127 (Pmode, stack_pointer_rtx,
2128 gen_rtx_MINUS (Pmode,
2129 stack_pointer_rtx,
2130 GEN_INT (offset))));
2131 mem_fp = gen_frame_mem (DImode,
2132 plus_constant (Pmode,
2133 stack_pointer_rtx,
2134 fp_offset));
2135 mem_lr = gen_frame_mem (DImode,
2136 plus_constant (Pmode,
2137 stack_pointer_rtx,
2138 fp_offset
2139 + UNITS_PER_WORD));
2140 insn = emit_insn (gen_store_pairdi (mem_fp,
2141 hard_frame_pointer_rtx,
2142 mem_lr,
2143 gen_rtx_REG (DImode,
2144 LR_REGNUM)));
2145 }
2146 else
2147 {
2148 insn = emit_insn (gen_storewb_pairdi_di
2149 (stack_pointer_rtx, stack_pointer_rtx,
2150 hard_frame_pointer_rtx,
2151 gen_rtx_REG (DImode, LR_REGNUM),
2152 GEN_INT (-offset),
2153 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2154 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2155 }
2156
2157 /* The first part of a frame-related parallel insn is always
2158 assumed to be relevant to the frame calculations;
2159 subsequent parts, are only frame-related if explicitly
2160 marked. */
2161 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2162 RTX_FRAME_RELATED_P (insn) = 1;
2163
2164 /* Set up frame pointer to point to the location of the
2165 previous frame pointer on the stack. */
2166 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2167 stack_pointer_rtx,
2168 GEN_INT (fp_offset)));
2169 aarch64_set_frame_expr (gen_rtx_SET
2170 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2171 plus_constant (Pmode,
2172 stack_pointer_rtx,
2173 fp_offset)));
43e9d192
IB
2174 RTX_FRAME_RELATED_P (insn) = 1;
2175 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2176 hard_frame_pointer_rtx));
2177 }
2178 else
2179 {
2180 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2181 GEN_INT (-offset)));
2182 RTX_FRAME_RELATED_P (insn) = 1;
2183 }
2184
2185 aarch64_save_or_restore_callee_save_registers
2186 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2187 }
2188
2189 /* when offset >= 512,
2190 sub sp, sp, #<outgoing_args_size> */
2191 if (frame_size > -1)
2192 {
2193 if (crtl->outgoing_args_size > 0)
2194 {
2195 insn = emit_insn (gen_add2_insn
2196 (stack_pointer_rtx,
2197 GEN_INT (- crtl->outgoing_args_size)));
2198 RTX_FRAME_RELATED_P (insn) = 1;
2199 }
2200 }
2201}
2202
2203/* Generate the epilogue instructions for returning from a function. */
2204void
2205aarch64_expand_epilogue (bool for_sibcall)
2206{
2207 HOST_WIDE_INT original_frame_size, frame_size, offset;
2208 HOST_WIDE_INT fp_offset;
2209 rtx insn;
44c0e7b9 2210 rtx cfa_reg;
43e9d192
IB
2211
2212 aarch64_layout_frame ();
2213 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2214 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2215 + crtl->outgoing_args_size);
2216 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2217 STACK_BOUNDARY / BITS_PER_UNIT);
2218
2219 fp_offset = (offset
2220 - original_frame_size
2221 - cfun->machine->frame.saved_regs_size);
2222
44c0e7b9
YZ
2223 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2224
2225 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2226 if (offset >= 512)
2227 {
2228 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2229 if (offset >= 512)
2230 offset = cfun->machine->frame.saved_regs_size;
2231
2232 frame_size -= (offset + crtl->outgoing_args_size);
2233 fp_offset = 0;
2234 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2235 {
2236 insn = emit_insn (gen_add2_insn
2237 (stack_pointer_rtx,
2238 GEN_INT (crtl->outgoing_args_size)));
2239 RTX_FRAME_RELATED_P (insn) = 1;
2240 }
2241 }
2242 else
2243 frame_size = -1;
2244
2245 /* If there were outgoing arguments or we've done dynamic stack
2246 allocation, then restore the stack pointer from the frame
2247 pointer. This is at most one insn and more efficient than using
2248 GCC's internal mechanism. */
2249 if (frame_pointer_needed
2250 && (crtl->outgoing_args_size || cfun->calls_alloca))
2251 {
2252 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2253 hard_frame_pointer_rtx,
2254 GEN_INT (- fp_offset)));
2255 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2256 /* As SP is set to (FP - fp_offset), according to the rules in
2257 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2258 from the value of SP from now on. */
2259 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2260 }
2261
2262 aarch64_save_or_restore_callee_save_registers
2263 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2264
2265 /* Restore the frame pointer and lr if the frame pointer is needed. */
2266 if (offset > 0)
2267 {
2268 if (frame_pointer_needed)
2269 {
2270 rtx mem_fp, mem_lr;
2271
2272 if (fp_offset)
2273 {
2274 mem_fp = gen_frame_mem (DImode,
2275 plus_constant (Pmode,
2276 stack_pointer_rtx,
2277 fp_offset));
2278 mem_lr = gen_frame_mem (DImode,
2279 plus_constant (Pmode,
2280 stack_pointer_rtx,
2281 fp_offset
2282 + UNITS_PER_WORD));
2283 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2284 mem_fp,
2285 gen_rtx_REG (DImode,
2286 LR_REGNUM),
2287 mem_lr));
2288 }
2289 else
2290 {
2291 insn = emit_insn (gen_loadwb_pairdi_di
2292 (stack_pointer_rtx,
2293 stack_pointer_rtx,
2294 hard_frame_pointer_rtx,
2295 gen_rtx_REG (DImode, LR_REGNUM),
2296 GEN_INT (offset),
2297 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2298 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2299 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2300 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2301 plus_constant (Pmode, cfa_reg,
2302 offset))));
43e9d192
IB
2303 }
2304
2305 /* The first part of a frame-related parallel insn
2306 is always assumed to be relevant to the frame
2307 calculations; subsequent parts, are only
2308 frame-related if explicitly marked. */
2309 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2310 RTX_FRAME_RELATED_P (insn) = 1;
2311 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2312 add_reg_note (insn, REG_CFA_RESTORE,
2313 gen_rtx_REG (DImode, LR_REGNUM));
2314
2315 if (fp_offset)
2316 {
2317 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2318 GEN_INT (offset)));
2319 RTX_FRAME_RELATED_P (insn) = 1;
2320 }
2321 }
43e9d192
IB
2322 else
2323 {
2324 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2325 GEN_INT (offset)));
2326 RTX_FRAME_RELATED_P (insn) = 1;
2327 }
2328 }
2329
2330 /* Stack adjustment for exception handler. */
2331 if (crtl->calls_eh_return)
2332 {
2333 /* We need to unwind the stack by the offset computed by
2334 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2335 based on SP. Ideally we would update the SP and define the
2336 CFA along the lines of:
2337
2338 SP = SP + EH_RETURN_STACKADJ_RTX
2339 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2340
2341 However the dwarf emitter only understands a constant
2342 register offset.
2343
631b20a7 2344 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2345 as a temporary register to hold the current SP value. The
2346 CFA is described using IP0 then SP is modified. */
2347
2348 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2349
2350 insn = emit_move_insn (ip0, stack_pointer_rtx);
2351 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2352 RTX_FRAME_RELATED_P (insn) = 1;
2353
2354 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2355
2356 /* Ensure the assignment to IP0 does not get optimized away. */
2357 emit_use (ip0);
2358 }
2359
2360 if (frame_size > -1)
2361 {
2362 if (frame_size >= 0x1000000)
2363 {
2364 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2365 emit_move_insn (op0, GEN_INT (frame_size));
2366 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2367 aarch64_set_frame_expr (gen_rtx_SET
2368 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2369 plus_constant (Pmode,
2370 stack_pointer_rtx,
2371 frame_size)));
43e9d192
IB
2372 }
2373 else if (frame_size > 0)
2374 {
2375 if ((frame_size & 0xfff) != 0)
2376 {
2377 insn = emit_insn (gen_add2_insn
2378 (stack_pointer_rtx,
2379 GEN_INT ((frame_size
2380 & (HOST_WIDE_INT) 0xfff))));
2381 RTX_FRAME_RELATED_P (insn) = 1;
2382 }
2383 if ((frame_size & 0xfff) != frame_size)
2384 {
2385 insn = emit_insn (gen_add2_insn
2386 (stack_pointer_rtx,
2387 GEN_INT ((frame_size
2388 & ~ (HOST_WIDE_INT) 0xfff))));
2389 RTX_FRAME_RELATED_P (insn) = 1;
2390 }
2391 }
2392
f6fe771a
RL
2393 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2394 plus_constant (Pmode,
2395 stack_pointer_rtx,
2396 offset)));
43e9d192
IB
2397 }
2398
2399 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2400 if (!for_sibcall)
2401 emit_jump_insn (ret_rtx);
2402}
2403
2404/* Return the place to copy the exception unwinding return address to.
2405 This will probably be a stack slot, but could (in theory be the
2406 return register). */
2407rtx
2408aarch64_final_eh_return_addr (void)
2409{
2410 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2411 aarch64_layout_frame ();
2412 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2413 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2414 + crtl->outgoing_args_size);
2415 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2416 STACK_BOUNDARY / BITS_PER_UNIT);
2417 fp_offset = offset
2418 - original_frame_size
2419 - cfun->machine->frame.saved_regs_size;
2420
2421 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2422 return gen_rtx_REG (DImode, LR_REGNUM);
2423
2424 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2425 result in a store to save LR introduced by builtin_eh_return () being
2426 incorrectly deleted because the alias is not detected.
2427 So in the calculation of the address to copy the exception unwinding
2428 return address to, we note 2 cases.
2429 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2430 we return a SP-relative location since all the addresses are SP-relative
2431 in this case. This prevents the store from being optimized away.
2432 If the fp_offset is not 0, then the addresses will be FP-relative and
2433 therefore we return a FP-relative location. */
2434
2435 if (frame_pointer_needed)
2436 {
2437 if (fp_offset)
2438 return gen_frame_mem (DImode,
2439 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2440 else
2441 return gen_frame_mem (DImode,
2442 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2443 }
2444
2445 /* If FP is not needed, we calculate the location of LR, which would be
2446 at the top of the saved registers block. */
2447
2448 return gen_frame_mem (DImode,
2449 plus_constant (Pmode,
2450 stack_pointer_rtx,
2451 fp_offset
2452 + cfun->machine->frame.saved_regs_size
2453 - 2 * UNITS_PER_WORD));
2454}
2455
2456/* Output code to build up a constant in a register. */
2457static void
d9600ae5 2458aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2459{
2460 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2461 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2462 else
2463 {
2464 int i;
2465 int ncount = 0;
2466 int zcount = 0;
2467 HOST_WIDE_INT valp = val >> 16;
2468 HOST_WIDE_INT valm;
2469 HOST_WIDE_INT tval;
2470
2471 for (i = 16; i < 64; i += 16)
2472 {
2473 valm = (valp & 0xffff);
2474
2475 if (valm != 0)
2476 ++ zcount;
2477
2478 if (valm != 0xffff)
2479 ++ ncount;
2480
2481 valp >>= 16;
2482 }
2483
2484 /* zcount contains the number of additional MOVK instructions
2485 required if the constant is built up with an initial MOVZ instruction,
2486 while ncount is the number of MOVK instructions required if starting
2487 with a MOVN instruction. Choose the sequence that yields the fewest
2488 number of instructions, preferring MOVZ instructions when they are both
2489 the same. */
2490 if (ncount < zcount)
2491 {
d9600ae5 2492 emit_move_insn (gen_rtx_REG (Pmode, regnum),
d103f29b 2493 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192
IB
2494 tval = 0xffff;
2495 }
2496 else
2497 {
d9600ae5
SN
2498 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2499 GEN_INT (val & 0xffff));
43e9d192
IB
2500 tval = 0;
2501 }
2502
2503 val >>= 16;
2504
2505 for (i = 16; i < 64; i += 16)
2506 {
2507 if ((val & 0xffff) != tval)
d9600ae5
SN
2508 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2509 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2510 val >>= 16;
2511 }
2512 }
2513}
2514
2515static void
d9600ae5 2516aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2517{
2518 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2519 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2520 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2521
2522 if (mdelta < 0)
2523 mdelta = -mdelta;
2524
2525 if (mdelta >= 4096 * 4096)
2526 {
d9600ae5
SN
2527 aarch64_build_constant (scratchreg, delta);
2528 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2529 }
2530 else if (mdelta > 0)
2531 {
43e9d192 2532 if (mdelta >= 4096)
d9600ae5
SN
2533 {
2534 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2535 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2536 if (delta < 0)
2537 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2538 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2539 else
2540 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2541 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2542 }
43e9d192 2543 if (mdelta % 4096 != 0)
d9600ae5
SN
2544 {
2545 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2546 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2547 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2548 }
43e9d192
IB
2549 }
2550}
2551
2552/* Output code to add DELTA to the first argument, and then jump
2553 to FUNCTION. Used for C++ multiple inheritance. */
2554static void
2555aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2556 HOST_WIDE_INT delta,
2557 HOST_WIDE_INT vcall_offset,
2558 tree function)
2559{
2560 /* The this pointer is always in x0. Note that this differs from
2561 Arm where the this pointer maybe bumped to r1 if r0 is required
2562 to return a pointer to an aggregate. On AArch64 a result value
2563 pointer will be in x8. */
2564 int this_regno = R0_REGNUM;
75f1d6fc 2565 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2566
75f1d6fc
SN
2567 reload_completed = 1;
2568 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2569
2570 if (vcall_offset == 0)
d9600ae5 2571 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2572 else
2573 {
28514dda 2574 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2575
75f1d6fc
SN
2576 this_rtx = gen_rtx_REG (Pmode, this_regno);
2577 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2578 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2579
75f1d6fc
SN
2580 addr = this_rtx;
2581 if (delta != 0)
2582 {
2583 if (delta >= -256 && delta < 256)
2584 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2585 plus_constant (Pmode, this_rtx, delta));
2586 else
d9600ae5 2587 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2588 }
2589
28514dda
YZ
2590 if (Pmode == ptr_mode)
2591 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2592 else
2593 aarch64_emit_move (temp0,
2594 gen_rtx_ZERO_EXTEND (Pmode,
2595 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2596
28514dda 2597 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2598 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2599 else
2600 {
d9600ae5 2601 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2602 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2603 }
2604
28514dda
YZ
2605 if (Pmode == ptr_mode)
2606 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2607 else
2608 aarch64_emit_move (temp1,
2609 gen_rtx_SIGN_EXTEND (Pmode,
2610 gen_rtx_MEM (ptr_mode, addr)));
2611
75f1d6fc 2612 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2613 }
2614
75f1d6fc
SN
2615 /* Generate a tail call to the target function. */
2616 if (!TREE_USED (function))
2617 {
2618 assemble_external (function);
2619 TREE_USED (function) = 1;
2620 }
2621 funexp = XEXP (DECL_RTL (function), 0);
2622 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2623 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2624 SIBLING_CALL_P (insn) = 1;
2625
2626 insn = get_insns ();
2627 shorten_branches (insn);
2628 final_start_function (insn, file, 1);
2629 final (insn, file, 1);
43e9d192 2630 final_end_function ();
75f1d6fc
SN
2631
2632 /* Stop pretending to be a post-reload pass. */
2633 reload_completed = 0;
43e9d192
IB
2634}
2635
43e9d192
IB
2636static int
2637aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2638{
2639 if (GET_CODE (*x) == SYMBOL_REF)
2640 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2641
2642 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2643 TLS offsets, not real symbol references. */
2644 if (GET_CODE (*x) == UNSPEC
2645 && XINT (*x, 1) == UNSPEC_TLS)
2646 return -1;
2647
2648 return 0;
2649}
2650
2651static bool
2652aarch64_tls_referenced_p (rtx x)
2653{
2654 if (!TARGET_HAVE_TLS)
2655 return false;
2656
2657 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2658}
2659
2660
2661static int
2662aarch64_bitmasks_cmp (const void *i1, const void *i2)
2663{
2664 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2665 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2666
2667 if (*imm1 < *imm2)
2668 return -1;
2669 if (*imm1 > *imm2)
2670 return +1;
2671 return 0;
2672}
2673
2674
2675static void
2676aarch64_build_bitmask_table (void)
2677{
2678 unsigned HOST_WIDE_INT mask, imm;
2679 unsigned int log_e, e, s, r;
2680 unsigned int nimms = 0;
2681
2682 for (log_e = 1; log_e <= 6; log_e++)
2683 {
2684 e = 1 << log_e;
2685 if (e == 64)
2686 mask = ~(HOST_WIDE_INT) 0;
2687 else
2688 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2689 for (s = 1; s < e; s++)
2690 {
2691 for (r = 0; r < e; r++)
2692 {
2693 /* set s consecutive bits to 1 (s < 64) */
2694 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2695 /* rotate right by r */
2696 if (r != 0)
2697 imm = ((imm >> r) | (imm << (e - r))) & mask;
2698 /* replicate the constant depending on SIMD size */
2699 switch (log_e) {
2700 case 1: imm |= (imm << 2);
2701 case 2: imm |= (imm << 4);
2702 case 3: imm |= (imm << 8);
2703 case 4: imm |= (imm << 16);
2704 case 5: imm |= (imm << 32);
2705 case 6:
2706 break;
2707 default:
2708 gcc_unreachable ();
2709 }
2710 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2711 aarch64_bitmasks[nimms++] = imm;
2712 }
2713 }
2714 }
2715
2716 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2717 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2718 aarch64_bitmasks_cmp);
2719}
2720
2721
2722/* Return true if val can be encoded as a 12-bit unsigned immediate with
2723 a left shift of 0 or 12 bits. */
2724bool
2725aarch64_uimm12_shift (HOST_WIDE_INT val)
2726{
2727 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2728 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2729 );
2730}
2731
2732
2733/* Return true if val is an immediate that can be loaded into a
2734 register by a MOVZ instruction. */
2735static bool
2736aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2737{
2738 if (GET_MODE_SIZE (mode) > 4)
2739 {
2740 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2741 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2742 return 1;
2743 }
2744 else
2745 {
2746 /* Ignore sign extension. */
2747 val &= (HOST_WIDE_INT) 0xffffffff;
2748 }
2749 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2750 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2751}
2752
2753
2754/* Return true if val is a valid bitmask immediate. */
2755bool
2756aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2757{
2758 if (GET_MODE_SIZE (mode) < 8)
2759 {
2760 /* Replicate bit pattern. */
2761 val &= (HOST_WIDE_INT) 0xffffffff;
2762 val |= val << 32;
2763 }
2764 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2765 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2766}
2767
2768
2769/* Return true if val is an immediate that can be loaded into a
2770 register in a single instruction. */
2771bool
2772aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2773{
2774 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2775 return 1;
2776 return aarch64_bitmask_imm (val, mode);
2777}
2778
2779static bool
2780aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2781{
2782 rtx base, offset;
7eda14e1 2783
43e9d192
IB
2784 if (GET_CODE (x) == HIGH)
2785 return true;
2786
2787 split_const (x, &base, &offset);
2788 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2789 {
2790 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2791 != SYMBOL_FORCE_TO_MEM)
2792 return true;
2793 else
2794 /* Avoid generating a 64-bit relocation in ILP32; leave
2795 to aarch64_expand_mov_immediate to handle it properly. */
2796 return mode != ptr_mode;
2797 }
43e9d192
IB
2798
2799 return aarch64_tls_referenced_p (x);
2800}
2801
2802/* Return true if register REGNO is a valid index register.
2803 STRICT_P is true if REG_OK_STRICT is in effect. */
2804
2805bool
2806aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2807{
2808 if (!HARD_REGISTER_NUM_P (regno))
2809 {
2810 if (!strict_p)
2811 return true;
2812
2813 if (!reg_renumber)
2814 return false;
2815
2816 regno = reg_renumber[regno];
2817 }
2818 return GP_REGNUM_P (regno);
2819}
2820
2821/* Return true if register REGNO is a valid base register for mode MODE.
2822 STRICT_P is true if REG_OK_STRICT is in effect. */
2823
2824bool
2825aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2826{
2827 if (!HARD_REGISTER_NUM_P (regno))
2828 {
2829 if (!strict_p)
2830 return true;
2831
2832 if (!reg_renumber)
2833 return false;
2834
2835 regno = reg_renumber[regno];
2836 }
2837
2838 /* The fake registers will be eliminated to either the stack or
2839 hard frame pointer, both of which are usually valid base registers.
2840 Reload deals with the cases where the eliminated form isn't valid. */
2841 return (GP_REGNUM_P (regno)
2842 || regno == SP_REGNUM
2843 || regno == FRAME_POINTER_REGNUM
2844 || regno == ARG_POINTER_REGNUM);
2845}
2846
2847/* Return true if X is a valid base register for mode MODE.
2848 STRICT_P is true if REG_OK_STRICT is in effect. */
2849
2850static bool
2851aarch64_base_register_rtx_p (rtx x, bool strict_p)
2852{
2853 if (!strict_p && GET_CODE (x) == SUBREG)
2854 x = SUBREG_REG (x);
2855
2856 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2857}
2858
2859/* Return true if address offset is a valid index. If it is, fill in INFO
2860 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2861
2862static bool
2863aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2864 enum machine_mode mode, bool strict_p)
2865{
2866 enum aarch64_address_type type;
2867 rtx index;
2868 int shift;
2869
2870 /* (reg:P) */
2871 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2872 && GET_MODE (x) == Pmode)
2873 {
2874 type = ADDRESS_REG_REG;
2875 index = x;
2876 shift = 0;
2877 }
2878 /* (sign_extend:DI (reg:SI)) */
2879 else if ((GET_CODE (x) == SIGN_EXTEND
2880 || GET_CODE (x) == ZERO_EXTEND)
2881 && GET_MODE (x) == DImode
2882 && GET_MODE (XEXP (x, 0)) == SImode)
2883 {
2884 type = (GET_CODE (x) == SIGN_EXTEND)
2885 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2886 index = XEXP (x, 0);
2887 shift = 0;
2888 }
2889 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2890 else if (GET_CODE (x) == MULT
2891 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2892 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2893 && GET_MODE (XEXP (x, 0)) == DImode
2894 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2895 && CONST_INT_P (XEXP (x, 1)))
2896 {
2897 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2898 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2899 index = XEXP (XEXP (x, 0), 0);
2900 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2901 }
2902 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2903 else if (GET_CODE (x) == ASHIFT
2904 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2905 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2906 && GET_MODE (XEXP (x, 0)) == DImode
2907 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2908 && CONST_INT_P (XEXP (x, 1)))
2909 {
2910 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2911 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2912 index = XEXP (XEXP (x, 0), 0);
2913 shift = INTVAL (XEXP (x, 1));
2914 }
2915 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2916 else if ((GET_CODE (x) == SIGN_EXTRACT
2917 || GET_CODE (x) == ZERO_EXTRACT)
2918 && GET_MODE (x) == DImode
2919 && GET_CODE (XEXP (x, 0)) == MULT
2920 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2921 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2922 {
2923 type = (GET_CODE (x) == SIGN_EXTRACT)
2924 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2925 index = XEXP (XEXP (x, 0), 0);
2926 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2927 if (INTVAL (XEXP (x, 1)) != 32 + shift
2928 || INTVAL (XEXP (x, 2)) != 0)
2929 shift = -1;
2930 }
2931 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2932 (const_int 0xffffffff<<shift)) */
2933 else if (GET_CODE (x) == AND
2934 && GET_MODE (x) == DImode
2935 && GET_CODE (XEXP (x, 0)) == MULT
2936 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2937 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2938 && CONST_INT_P (XEXP (x, 1)))
2939 {
2940 type = ADDRESS_REG_UXTW;
2941 index = XEXP (XEXP (x, 0), 0);
2942 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2943 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2944 shift = -1;
2945 }
2946 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2947 else if ((GET_CODE (x) == SIGN_EXTRACT
2948 || GET_CODE (x) == ZERO_EXTRACT)
2949 && GET_MODE (x) == DImode
2950 && GET_CODE (XEXP (x, 0)) == ASHIFT
2951 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2952 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2953 {
2954 type = (GET_CODE (x) == SIGN_EXTRACT)
2955 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2956 index = XEXP (XEXP (x, 0), 0);
2957 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2958 if (INTVAL (XEXP (x, 1)) != 32 + shift
2959 || INTVAL (XEXP (x, 2)) != 0)
2960 shift = -1;
2961 }
2962 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2963 (const_int 0xffffffff<<shift)) */
2964 else if (GET_CODE (x) == AND
2965 && GET_MODE (x) == DImode
2966 && GET_CODE (XEXP (x, 0)) == ASHIFT
2967 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2968 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2969 && CONST_INT_P (XEXP (x, 1)))
2970 {
2971 type = ADDRESS_REG_UXTW;
2972 index = XEXP (XEXP (x, 0), 0);
2973 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2974 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2975 shift = -1;
2976 }
2977 /* (mult:P (reg:P) (const_int scale)) */
2978 else if (GET_CODE (x) == MULT
2979 && GET_MODE (x) == Pmode
2980 && GET_MODE (XEXP (x, 0)) == Pmode
2981 && CONST_INT_P (XEXP (x, 1)))
2982 {
2983 type = ADDRESS_REG_REG;
2984 index = XEXP (x, 0);
2985 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2986 }
2987 /* (ashift:P (reg:P) (const_int shift)) */
2988 else if (GET_CODE (x) == ASHIFT
2989 && GET_MODE (x) == Pmode
2990 && GET_MODE (XEXP (x, 0)) == Pmode
2991 && CONST_INT_P (XEXP (x, 1)))
2992 {
2993 type = ADDRESS_REG_REG;
2994 index = XEXP (x, 0);
2995 shift = INTVAL (XEXP (x, 1));
2996 }
2997 else
2998 return false;
2999
3000 if (GET_CODE (index) == SUBREG)
3001 index = SUBREG_REG (index);
3002
3003 if ((shift == 0 ||
3004 (shift > 0 && shift <= 3
3005 && (1 << shift) == GET_MODE_SIZE (mode)))
3006 && REG_P (index)
3007 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3008 {
3009 info->type = type;
3010 info->offset = index;
3011 info->shift = shift;
3012 return true;
3013 }
3014
3015 return false;
3016}
3017
3018static inline bool
3019offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3020{
3021 return (offset >= -64 * GET_MODE_SIZE (mode)
3022 && offset < 64 * GET_MODE_SIZE (mode)
3023 && offset % GET_MODE_SIZE (mode) == 0);
3024}
3025
3026static inline bool
3027offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3028 HOST_WIDE_INT offset)
3029{
3030 return offset >= -256 && offset < 256;
3031}
3032
3033static inline bool
3034offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3035{
3036 return (offset >= 0
3037 && offset < 4096 * GET_MODE_SIZE (mode)
3038 && offset % GET_MODE_SIZE (mode) == 0);
3039}
3040
3041/* Return true if X is a valid address for machine mode MODE. If it is,
3042 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3043 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3044
3045static bool
3046aarch64_classify_address (struct aarch64_address_info *info,
3047 rtx x, enum machine_mode mode,
3048 RTX_CODE outer_code, bool strict_p)
3049{
3050 enum rtx_code code = GET_CODE (x);
3051 rtx op0, op1;
3052 bool allow_reg_index_p =
3053 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3054
3055 /* Don't support anything other than POST_INC or REG addressing for
3056 AdvSIMD. */
3057 if (aarch64_vector_mode_p (mode)
3058 && (code != POST_INC && code != REG))
3059 return false;
3060
3061 switch (code)
3062 {
3063 case REG:
3064 case SUBREG:
3065 info->type = ADDRESS_REG_IMM;
3066 info->base = x;
3067 info->offset = const0_rtx;
3068 return aarch64_base_register_rtx_p (x, strict_p);
3069
3070 case PLUS:
3071 op0 = XEXP (x, 0);
3072 op1 = XEXP (x, 1);
3073 if (GET_MODE_SIZE (mode) != 0
3074 && CONST_INT_P (op1)
3075 && aarch64_base_register_rtx_p (op0, strict_p))
3076 {
3077 HOST_WIDE_INT offset = INTVAL (op1);
3078
3079 info->type = ADDRESS_REG_IMM;
3080 info->base = op0;
3081 info->offset = op1;
3082
3083 /* TImode and TFmode values are allowed in both pairs of X
3084 registers and individual Q registers. The available
3085 address modes are:
3086 X,X: 7-bit signed scaled offset
3087 Q: 9-bit signed offset
3088 We conservatively require an offset representable in either mode.
3089 */
3090 if (mode == TImode || mode == TFmode)
3091 return (offset_7bit_signed_scaled_p (mode, offset)
3092 && offset_9bit_signed_unscaled_p (mode, offset));
3093
3094 if (outer_code == PARALLEL)
3095 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3096 && offset_7bit_signed_scaled_p (mode, offset));
3097 else
3098 return (offset_9bit_signed_unscaled_p (mode, offset)
3099 || offset_12bit_unsigned_scaled_p (mode, offset));
3100 }
3101
3102 if (allow_reg_index_p)
3103 {
3104 /* Look for base + (scaled/extended) index register. */
3105 if (aarch64_base_register_rtx_p (op0, strict_p)
3106 && aarch64_classify_index (info, op1, mode, strict_p))
3107 {
3108 info->base = op0;
3109 return true;
3110 }
3111 if (aarch64_base_register_rtx_p (op1, strict_p)
3112 && aarch64_classify_index (info, op0, mode, strict_p))
3113 {
3114 info->base = op1;
3115 return true;
3116 }
3117 }
3118
3119 return false;
3120
3121 case POST_INC:
3122 case POST_DEC:
3123 case PRE_INC:
3124 case PRE_DEC:
3125 info->type = ADDRESS_REG_WB;
3126 info->base = XEXP (x, 0);
3127 info->offset = NULL_RTX;
3128 return aarch64_base_register_rtx_p (info->base, strict_p);
3129
3130 case POST_MODIFY:
3131 case PRE_MODIFY:
3132 info->type = ADDRESS_REG_WB;
3133 info->base = XEXP (x, 0);
3134 if (GET_CODE (XEXP (x, 1)) == PLUS
3135 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3136 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3137 && aarch64_base_register_rtx_p (info->base, strict_p))
3138 {
3139 HOST_WIDE_INT offset;
3140 info->offset = XEXP (XEXP (x, 1), 1);
3141 offset = INTVAL (info->offset);
3142
3143 /* TImode and TFmode values are allowed in both pairs of X
3144 registers and individual Q registers. The available
3145 address modes are:
3146 X,X: 7-bit signed scaled offset
3147 Q: 9-bit signed offset
3148 We conservatively require an offset representable in either mode.
3149 */
3150 if (mode == TImode || mode == TFmode)
3151 return (offset_7bit_signed_scaled_p (mode, offset)
3152 && offset_9bit_signed_unscaled_p (mode, offset));
3153
3154 if (outer_code == PARALLEL)
3155 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3156 && offset_7bit_signed_scaled_p (mode, offset));
3157 else
3158 return offset_9bit_signed_unscaled_p (mode, offset);
3159 }
3160 return false;
3161
3162 case CONST:
3163 case SYMBOL_REF:
3164 case LABEL_REF:
79517551
SN
3165 /* load literal: pc-relative constant pool entry. Only supported
3166 for SI mode or larger. */
43e9d192 3167 info->type = ADDRESS_SYMBOLIC;
79517551 3168 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3169 {
3170 rtx sym, addend;
3171
3172 split_const (x, &sym, &addend);
3173 return (GET_CODE (sym) == LABEL_REF
3174 || (GET_CODE (sym) == SYMBOL_REF
3175 && CONSTANT_POOL_ADDRESS_P (sym)));
3176 }
3177 return false;
3178
3179 case LO_SUM:
3180 info->type = ADDRESS_LO_SUM;
3181 info->base = XEXP (x, 0);
3182 info->offset = XEXP (x, 1);
3183 if (allow_reg_index_p
3184 && aarch64_base_register_rtx_p (info->base, strict_p))
3185 {
3186 rtx sym, offs;
3187 split_const (info->offset, &sym, &offs);
3188 if (GET_CODE (sym) == SYMBOL_REF
3189 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3190 == SYMBOL_SMALL_ABSOLUTE))
3191 {
3192 /* The symbol and offset must be aligned to the access size. */
3193 unsigned int align;
3194 unsigned int ref_size;
3195
3196 if (CONSTANT_POOL_ADDRESS_P (sym))
3197 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3198 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3199 {
3200 tree exp = SYMBOL_REF_DECL (sym);
3201 align = TYPE_ALIGN (TREE_TYPE (exp));
3202 align = CONSTANT_ALIGNMENT (exp, align);
3203 }
3204 else if (SYMBOL_REF_DECL (sym))
3205 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3206 else
3207 align = BITS_PER_UNIT;
3208
3209 ref_size = GET_MODE_SIZE (mode);
3210 if (ref_size == 0)
3211 ref_size = GET_MODE_SIZE (DImode);
3212
3213 return ((INTVAL (offs) & (ref_size - 1)) == 0
3214 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3215 }
3216 }
3217 return false;
3218
3219 default:
3220 return false;
3221 }
3222}
3223
3224bool
3225aarch64_symbolic_address_p (rtx x)
3226{
3227 rtx offset;
3228
3229 split_const (x, &x, &offset);
3230 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3231}
3232
3233/* Classify the base of symbolic expression X, given that X appears in
3234 context CONTEXT. */
da4f13a4
MS
3235
3236enum aarch64_symbol_type
3237aarch64_classify_symbolic_expression (rtx x,
3238 enum aarch64_symbol_context context)
43e9d192
IB
3239{
3240 rtx offset;
da4f13a4 3241
43e9d192
IB
3242 split_const (x, &x, &offset);
3243 return aarch64_classify_symbol (x, context);
3244}
3245
3246
3247/* Return TRUE if X is a legitimate address for accessing memory in
3248 mode MODE. */
3249static bool
3250aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3251{
3252 struct aarch64_address_info addr;
3253
3254 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3255}
3256
3257/* Return TRUE if X is a legitimate address for accessing memory in
3258 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3259 pair operation. */
3260bool
3261aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3262 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3263{
3264 struct aarch64_address_info addr;
3265
3266 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3267}
3268
3269/* Return TRUE if rtx X is immediate constant 0.0 */
3270bool
3520f7cc 3271aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3272{
3273 REAL_VALUE_TYPE r;
3274
3275 if (GET_MODE (x) == VOIDmode)
3276 return false;
3277
3278 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3279 if (REAL_VALUE_MINUS_ZERO (r))
3280 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3281 return REAL_VALUES_EQUAL (r, dconst0);
3282}
3283
70f09188
AP
3284/* Return the fixed registers used for condition codes. */
3285
3286static bool
3287aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3288{
3289 *p1 = CC_REGNUM;
3290 *p2 = INVALID_REGNUM;
3291 return true;
3292}
3293
43e9d192
IB
3294enum machine_mode
3295aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3296{
3297 /* All floating point compares return CCFP if it is an equality
3298 comparison, and CCFPE otherwise. */
3299 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3300 {
3301 switch (code)
3302 {
3303 case EQ:
3304 case NE:
3305 case UNORDERED:
3306 case ORDERED:
3307 case UNLT:
3308 case UNLE:
3309 case UNGT:
3310 case UNGE:
3311 case UNEQ:
3312 case LTGT:
3313 return CCFPmode;
3314
3315 case LT:
3316 case LE:
3317 case GT:
3318 case GE:
3319 return CCFPEmode;
3320
3321 default:
3322 gcc_unreachable ();
3323 }
3324 }
3325
3326 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3327 && y == const0_rtx
3328 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3329 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3330 || GET_CODE (x) == NEG))
43e9d192
IB
3331 return CC_NZmode;
3332
1c992d1e 3333 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3334 the comparison will have to be swapped when we emit the assembly
3335 code. */
3336 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3337 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3338 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3339 || GET_CODE (x) == LSHIFTRT
1c992d1e 3340 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3341 return CC_SWPmode;
3342
1c992d1e
RE
3343 /* Similarly for a negated operand, but we can only do this for
3344 equalities. */
3345 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3346 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3347 && (code == EQ || code == NE)
3348 && GET_CODE (x) == NEG)
3349 return CC_Zmode;
3350
43e9d192
IB
3351 /* A compare of a mode narrower than SI mode against zero can be done
3352 by extending the value in the comparison. */
3353 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3354 && y == const0_rtx)
3355 /* Only use sign-extension if we really need it. */
3356 return ((code == GT || code == GE || code == LE || code == LT)
3357 ? CC_SESWPmode : CC_ZESWPmode);
3358
3359 /* For everything else, return CCmode. */
3360 return CCmode;
3361}
3362
3363static unsigned
3364aarch64_get_condition_code (rtx x)
3365{
3366 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3367 enum rtx_code comp_code = GET_CODE (x);
3368
3369 if (GET_MODE_CLASS (mode) != MODE_CC)
3370 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3371
3372 switch (mode)
3373 {
3374 case CCFPmode:
3375 case CCFPEmode:
3376 switch (comp_code)
3377 {
3378 case GE: return AARCH64_GE;
3379 case GT: return AARCH64_GT;
3380 case LE: return AARCH64_LS;
3381 case LT: return AARCH64_MI;
3382 case NE: return AARCH64_NE;
3383 case EQ: return AARCH64_EQ;
3384 case ORDERED: return AARCH64_VC;
3385 case UNORDERED: return AARCH64_VS;
3386 case UNLT: return AARCH64_LT;
3387 case UNLE: return AARCH64_LE;
3388 case UNGT: return AARCH64_HI;
3389 case UNGE: return AARCH64_PL;
3390 default: gcc_unreachable ();
3391 }
3392 break;
3393
3394 case CCmode:
3395 switch (comp_code)
3396 {
3397 case NE: return AARCH64_NE;
3398 case EQ: return AARCH64_EQ;
3399 case GE: return AARCH64_GE;
3400 case GT: return AARCH64_GT;
3401 case LE: return AARCH64_LE;
3402 case LT: return AARCH64_LT;
3403 case GEU: return AARCH64_CS;
3404 case GTU: return AARCH64_HI;
3405 case LEU: return AARCH64_LS;
3406 case LTU: return AARCH64_CC;
3407 default: gcc_unreachable ();
3408 }
3409 break;
3410
3411 case CC_SWPmode:
3412 case CC_ZESWPmode:
3413 case CC_SESWPmode:
3414 switch (comp_code)
3415 {
3416 case NE: return AARCH64_NE;
3417 case EQ: return AARCH64_EQ;
3418 case GE: return AARCH64_LE;
3419 case GT: return AARCH64_LT;
3420 case LE: return AARCH64_GE;
3421 case LT: return AARCH64_GT;
3422 case GEU: return AARCH64_LS;
3423 case GTU: return AARCH64_CC;
3424 case LEU: return AARCH64_CS;
3425 case LTU: return AARCH64_HI;
3426 default: gcc_unreachable ();
3427 }
3428 break;
3429
3430 case CC_NZmode:
3431 switch (comp_code)
3432 {
3433 case NE: return AARCH64_NE;
3434 case EQ: return AARCH64_EQ;
3435 case GE: return AARCH64_PL;
3436 case LT: return AARCH64_MI;
3437 default: gcc_unreachable ();
3438 }
3439 break;
3440
1c992d1e
RE
3441 case CC_Zmode:
3442 switch (comp_code)
3443 {
3444 case NE: return AARCH64_NE;
3445 case EQ: return AARCH64_EQ;
3446 default: gcc_unreachable ();
3447 }
3448 break;
3449
43e9d192
IB
3450 default:
3451 gcc_unreachable ();
3452 break;
3453 }
3454}
3455
3456static unsigned
3457bit_count (unsigned HOST_WIDE_INT value)
3458{
3459 unsigned count = 0;
3460
3461 while (value)
3462 {
3463 count++;
3464 value &= value - 1;
3465 }
3466
3467 return count;
3468}
3469
3470void
3471aarch64_print_operand (FILE *f, rtx x, char code)
3472{
3473 switch (code)
3474 {
f541a481
KT
3475 /* An integer or symbol address without a preceding # sign. */
3476 case 'c':
3477 switch (GET_CODE (x))
3478 {
3479 case CONST_INT:
3480 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3481 break;
3482
3483 case SYMBOL_REF:
3484 output_addr_const (f, x);
3485 break;
3486
3487 case CONST:
3488 if (GET_CODE (XEXP (x, 0)) == PLUS
3489 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3490 {
3491 output_addr_const (f, x);
3492 break;
3493 }
3494 /* Fall through. */
3495
3496 default:
3497 output_operand_lossage ("Unsupported operand for code '%c'", code);
3498 }
3499 break;
3500
43e9d192
IB
3501 case 'e':
3502 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3503 {
3504 int n;
3505
3506 if (GET_CODE (x) != CONST_INT
3507 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3508 {
3509 output_operand_lossage ("invalid operand for '%%%c'", code);
3510 return;
3511 }
3512
3513 switch (n)
3514 {
3515 case 3:
3516 fputc ('b', f);
3517 break;
3518 case 4:
3519 fputc ('h', f);
3520 break;
3521 case 5:
3522 fputc ('w', f);
3523 break;
3524 default:
3525 output_operand_lossage ("invalid operand for '%%%c'", code);
3526 return;
3527 }
3528 }
3529 break;
3530
3531 case 'p':
3532 {
3533 int n;
3534
3535 /* Print N such that 2^N == X. */
3536 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3537 {
3538 output_operand_lossage ("invalid operand for '%%%c'", code);
3539 return;
3540 }
3541
3542 asm_fprintf (f, "%d", n);
3543 }
3544 break;
3545
3546 case 'P':
3547 /* Print the number of non-zero bits in X (a const_int). */
3548 if (GET_CODE (x) != CONST_INT)
3549 {
3550 output_operand_lossage ("invalid operand for '%%%c'", code);
3551 return;
3552 }
3553
3554 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3555 break;
3556
3557 case 'H':
3558 /* Print the higher numbered register of a pair (TImode) of regs. */
3559 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3560 {
3561 output_operand_lossage ("invalid operand for '%%%c'", code);
3562 return;
3563 }
3564
01a3a324 3565 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3566 break;
3567
43e9d192
IB
3568 case 'm':
3569 /* Print a condition (eq, ne, etc). */
3570
3571 /* CONST_TRUE_RTX means always -- that's the default. */
3572 if (x == const_true_rtx)
3573 return;
3574
3575 if (!COMPARISON_P (x))
3576 {
3577 output_operand_lossage ("invalid operand for '%%%c'", code);
3578 return;
3579 }
3580
3581 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3582 break;
3583
3584 case 'M':
3585 /* Print the inverse of a condition (eq <-> ne, etc). */
3586
3587 /* CONST_TRUE_RTX means never -- that's the default. */
3588 if (x == const_true_rtx)
3589 {
3590 fputs ("nv", f);
3591 return;
3592 }
3593
3594 if (!COMPARISON_P (x))
3595 {
3596 output_operand_lossage ("invalid operand for '%%%c'", code);
3597 return;
3598 }
3599
3600 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3601 (aarch64_get_condition_code (x))], f);
3602 break;
3603
3604 case 'b':
3605 case 'h':
3606 case 's':
3607 case 'd':
3608 case 'q':
3609 /* Print a scalar FP/SIMD register name. */
3610 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3611 {
3612 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3613 return;
3614 }
50ce6f88 3615 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3616 break;
3617
3618 case 'S':
3619 case 'T':
3620 case 'U':
3621 case 'V':
3622 /* Print the first FP/SIMD register name in a list. */
3623 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3624 {
3625 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3626 return;
3627 }
50ce6f88 3628 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3629 break;
3630
a05c0ddf 3631 case 'X':
50d38551 3632 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3633 if (GET_CODE (x) != CONST_INT)
3634 {
3635 output_operand_lossage ("invalid operand for '%%%c'", code);
3636 return;
3637 }
50d38551 3638 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3639 break;
3640
43e9d192
IB
3641 case 'w':
3642 case 'x':
3643 /* Print a general register name or the zero register (32-bit or
3644 64-bit). */
3520f7cc
JG
3645 if (x == const0_rtx
3646 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3647 {
50ce6f88 3648 asm_fprintf (f, "%czr", code);
43e9d192
IB
3649 break;
3650 }
3651
3652 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3653 {
50ce6f88 3654 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3655 break;
3656 }
3657
3658 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3659 {
50ce6f88 3660 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3661 break;
3662 }
3663
3664 /* Fall through */
3665
3666 case 0:
3667 /* Print a normal operand, if it's a general register, then we
3668 assume DImode. */
3669 if (x == NULL)
3670 {
3671 output_operand_lossage ("missing operand");
3672 return;
3673 }
3674
3675 switch (GET_CODE (x))
3676 {
3677 case REG:
01a3a324 3678 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3679 break;
3680
3681 case MEM:
3682 aarch64_memory_reference_mode = GET_MODE (x);
3683 output_address (XEXP (x, 0));
3684 break;
3685
3686 case LABEL_REF:
3687 case SYMBOL_REF:
3688 output_addr_const (asm_out_file, x);
3689 break;
3690
3691 case CONST_INT:
3692 asm_fprintf (f, "%wd", INTVAL (x));
3693 break;
3694
3695 case CONST_VECTOR:
3520f7cc
JG
3696 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3697 {
3698 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3699 HOST_WIDE_INT_MIN,
3700 HOST_WIDE_INT_MAX));
3701 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3702 }
3703 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3704 {
3705 fputc ('0', f);
3706 }
3707 else
3708 gcc_unreachable ();
43e9d192
IB
3709 break;
3710
3520f7cc
JG
3711 case CONST_DOUBLE:
3712 /* CONST_DOUBLE can represent a double-width integer.
3713 In this case, the mode of x is VOIDmode. */
3714 if (GET_MODE (x) == VOIDmode)
3715 ; /* Do Nothing. */
3716 else if (aarch64_float_const_zero_rtx_p (x))
3717 {
3718 fputc ('0', f);
3719 break;
3720 }
3721 else if (aarch64_float_const_representable_p (x))
3722 {
3723#define buf_size 20
3724 char float_buf[buf_size] = {'\0'};
3725 REAL_VALUE_TYPE r;
3726 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3727 real_to_decimal_for_mode (float_buf, &r,
3728 buf_size, buf_size,
3729 1, GET_MODE (x));
3730 asm_fprintf (asm_out_file, "%s", float_buf);
3731 break;
3732#undef buf_size
3733 }
3734 output_operand_lossage ("invalid constant");
3735 return;
43e9d192
IB
3736 default:
3737 output_operand_lossage ("invalid operand");
3738 return;
3739 }
3740 break;
3741
3742 case 'A':
3743 if (GET_CODE (x) == HIGH)
3744 x = XEXP (x, 0);
3745
3746 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3747 {
3748 case SYMBOL_SMALL_GOT:
3749 asm_fprintf (asm_out_file, ":got:");
3750 break;
3751
3752 case SYMBOL_SMALL_TLSGD:
3753 asm_fprintf (asm_out_file, ":tlsgd:");
3754 break;
3755
3756 case SYMBOL_SMALL_TLSDESC:
3757 asm_fprintf (asm_out_file, ":tlsdesc:");
3758 break;
3759
3760 case SYMBOL_SMALL_GOTTPREL:
3761 asm_fprintf (asm_out_file, ":gottprel:");
3762 break;
3763
3764 case SYMBOL_SMALL_TPREL:
3765 asm_fprintf (asm_out_file, ":tprel:");
3766 break;
3767
87dd8ab0
MS
3768 case SYMBOL_TINY_GOT:
3769 gcc_unreachable ();
3770 break;
3771
43e9d192
IB
3772 default:
3773 break;
3774 }
3775 output_addr_const (asm_out_file, x);
3776 break;
3777
3778 case 'L':
3779 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3780 {
3781 case SYMBOL_SMALL_GOT:
3782 asm_fprintf (asm_out_file, ":lo12:");
3783 break;
3784
3785 case SYMBOL_SMALL_TLSGD:
3786 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3787 break;
3788
3789 case SYMBOL_SMALL_TLSDESC:
3790 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3791 break;
3792
3793 case SYMBOL_SMALL_GOTTPREL:
3794 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3795 break;
3796
3797 case SYMBOL_SMALL_TPREL:
3798 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3799 break;
3800
87dd8ab0
MS
3801 case SYMBOL_TINY_GOT:
3802 asm_fprintf (asm_out_file, ":got:");
3803 break;
3804
43e9d192
IB
3805 default:
3806 break;
3807 }
3808 output_addr_const (asm_out_file, x);
3809 break;
3810
3811 case 'G':
3812
3813 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3814 {
3815 case SYMBOL_SMALL_TPREL:
3816 asm_fprintf (asm_out_file, ":tprel_hi12:");
3817 break;
3818 default:
3819 break;
3820 }
3821 output_addr_const (asm_out_file, x);
3822 break;
3823
3824 default:
3825 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3826 return;
3827 }
3828}
3829
3830void
3831aarch64_print_operand_address (FILE *f, rtx x)
3832{
3833 struct aarch64_address_info addr;
3834
3835 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3836 MEM, true))
3837 switch (addr.type)
3838 {
3839 case ADDRESS_REG_IMM:
3840 if (addr.offset == const0_rtx)
01a3a324 3841 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3842 else
01a3a324 3843 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3844 INTVAL (addr.offset));
3845 return;
3846
3847 case ADDRESS_REG_REG:
3848 if (addr.shift == 0)
01a3a324
N
3849 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3850 reg_names [REGNO (addr.offset)]);
43e9d192 3851 else
01a3a324
N
3852 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3853 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3854 return;
3855
3856 case ADDRESS_REG_UXTW:
3857 if (addr.shift == 0)
01a3a324 3858 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3859 REGNO (addr.offset) - R0_REGNUM);
3860 else
01a3a324 3861 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3862 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3863 return;
3864
3865 case ADDRESS_REG_SXTW:
3866 if (addr.shift == 0)
01a3a324 3867 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3868 REGNO (addr.offset) - R0_REGNUM);
3869 else
01a3a324 3870 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3871 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3872 return;
3873
3874 case ADDRESS_REG_WB:
3875 switch (GET_CODE (x))
3876 {
3877 case PRE_INC:
01a3a324 3878 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3879 GET_MODE_SIZE (aarch64_memory_reference_mode));
3880 return;
3881 case POST_INC:
01a3a324 3882 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3883 GET_MODE_SIZE (aarch64_memory_reference_mode));
3884 return;
3885 case PRE_DEC:
01a3a324 3886 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3887 GET_MODE_SIZE (aarch64_memory_reference_mode));
3888 return;
3889 case POST_DEC:
01a3a324 3890 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3891 GET_MODE_SIZE (aarch64_memory_reference_mode));
3892 return;
3893 case PRE_MODIFY:
01a3a324 3894 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3895 INTVAL (addr.offset));
3896 return;
3897 case POST_MODIFY:
01a3a324 3898 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3899 INTVAL (addr.offset));
3900 return;
3901 default:
3902 break;
3903 }
3904 break;
3905
3906 case ADDRESS_LO_SUM:
01a3a324 3907 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3908 output_addr_const (f, addr.offset);
3909 asm_fprintf (f, "]");
3910 return;
3911
3912 case ADDRESS_SYMBOLIC:
3913 break;
3914 }
3915
3916 output_addr_const (f, x);
3917}
3918
43e9d192
IB
3919bool
3920aarch64_label_mentioned_p (rtx x)
3921{
3922 const char *fmt;
3923 int i;
3924
3925 if (GET_CODE (x) == LABEL_REF)
3926 return true;
3927
3928 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3929 referencing instruction, but they are constant offsets, not
3930 symbols. */
3931 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3932 return false;
3933
3934 fmt = GET_RTX_FORMAT (GET_CODE (x));
3935 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3936 {
3937 if (fmt[i] == 'E')
3938 {
3939 int j;
3940
3941 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3942 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3943 return 1;
3944 }
3945 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3946 return 1;
3947 }
3948
3949 return 0;
3950}
3951
3952/* Implement REGNO_REG_CLASS. */
3953
3954enum reg_class
3955aarch64_regno_regclass (unsigned regno)
3956{
3957 if (GP_REGNUM_P (regno))
3958 return CORE_REGS;
3959
3960 if (regno == SP_REGNUM)
3961 return STACK_REG;
3962
3963 if (regno == FRAME_POINTER_REGNUM
3964 || regno == ARG_POINTER_REGNUM)
f24bb080 3965 return POINTER_REGS;
43e9d192
IB
3966
3967 if (FP_REGNUM_P (regno))
3968 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3969
3970 return NO_REGS;
3971}
3972
3973/* Try a machine-dependent way of reloading an illegitimate address
3974 operand. If we find one, push the reload and return the new rtx. */
3975
3976rtx
3977aarch64_legitimize_reload_address (rtx *x_p,
3978 enum machine_mode mode,
3979 int opnum, int type,
3980 int ind_levels ATTRIBUTE_UNUSED)
3981{
3982 rtx x = *x_p;
3983
3984 /* Do not allow mem (plus (reg, const)) if vector mode. */
3985 if (aarch64_vector_mode_p (mode)
3986 && GET_CODE (x) == PLUS
3987 && REG_P (XEXP (x, 0))
3988 && CONST_INT_P (XEXP (x, 1)))
3989 {
3990 rtx orig_rtx = x;
3991 x = copy_rtx (x);
3992 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3993 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3994 opnum, (enum reload_type) type);
3995 return x;
3996 }
3997
3998 /* We must recognize output that we have already generated ourselves. */
3999 if (GET_CODE (x) == PLUS
4000 && GET_CODE (XEXP (x, 0)) == PLUS
4001 && REG_P (XEXP (XEXP (x, 0), 0))
4002 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4003 && CONST_INT_P (XEXP (x, 1)))
4004 {
4005 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4006 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4007 opnum, (enum reload_type) type);
4008 return x;
4009 }
4010
4011 /* We wish to handle large displacements off a base register by splitting
4012 the addend across an add and the mem insn. This can cut the number of
4013 extra insns needed from 3 to 1. It is only useful for load/store of a
4014 single register with 12 bit offset field. */
4015 if (GET_CODE (x) == PLUS
4016 && REG_P (XEXP (x, 0))
4017 && CONST_INT_P (XEXP (x, 1))
4018 && HARD_REGISTER_P (XEXP (x, 0))
4019 && mode != TImode
4020 && mode != TFmode
4021 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4022 {
4023 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4024 HOST_WIDE_INT low = val & 0xfff;
4025 HOST_WIDE_INT high = val - low;
4026 HOST_WIDE_INT offs;
4027 rtx cst;
28514dda
YZ
4028 enum machine_mode xmode = GET_MODE (x);
4029
4030 /* In ILP32, xmode can be either DImode or SImode. */
4031 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4032
4033 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4034 BLKmode alignment. */
4035 if (GET_MODE_SIZE (mode) == 0)
4036 return NULL_RTX;
4037
4038 offs = low % GET_MODE_SIZE (mode);
4039
4040 /* Align misaligned offset by adjusting high part to compensate. */
4041 if (offs != 0)
4042 {
4043 if (aarch64_uimm12_shift (high + offs))
4044 {
4045 /* Align down. */
4046 low = low - offs;
4047 high = high + offs;
4048 }
4049 else
4050 {
4051 /* Align up. */
4052 offs = GET_MODE_SIZE (mode) - offs;
4053 low = low + offs;
4054 high = high + (low & 0x1000) - offs;
4055 low &= 0xfff;
4056 }
4057 }
4058
4059 /* Check for overflow. */
4060 if (high + low != val)
4061 return NULL_RTX;
4062
4063 cst = GEN_INT (high);
4064 if (!aarch64_uimm12_shift (high))
28514dda 4065 cst = force_const_mem (xmode, cst);
43e9d192
IB
4066
4067 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4068 in the mem instruction.
4069 Note that replacing this gen_rtx_PLUS with plus_constant is
4070 wrong in this case because we rely on the
4071 (plus (plus reg c1) c2) structure being preserved so that
4072 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4073 x = gen_rtx_PLUS (xmode,
4074 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4075 GEN_INT (low));
43e9d192
IB
4076
4077 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4078 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4079 opnum, (enum reload_type) type);
4080 return x;
4081 }
4082
4083 return NULL_RTX;
4084}
4085
4086
4087static reg_class_t
4088aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4089 reg_class_t rclass,
4090 enum machine_mode mode,
4091 secondary_reload_info *sri)
4092{
43e9d192
IB
4093 /* Without the TARGET_SIMD instructions we cannot move a Q register
4094 to a Q register directly. We need a scratch. */
4095 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4096 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4097 && reg_class_subset_p (rclass, FP_REGS))
4098 {
4099 if (mode == TFmode)
4100 sri->icode = CODE_FOR_aarch64_reload_movtf;
4101 else if (mode == TImode)
4102 sri->icode = CODE_FOR_aarch64_reload_movti;
4103 return NO_REGS;
4104 }
4105
4106 /* A TFmode or TImode memory access should be handled via an FP_REGS
4107 because AArch64 has richer addressing modes for LDR/STR instructions
4108 than LDP/STP instructions. */
4109 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4110 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4111 return FP_REGS;
4112
4113 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4114 return CORE_REGS;
4115
4116 return NO_REGS;
4117}
4118
4119static bool
4120aarch64_can_eliminate (const int from, const int to)
4121{
4122 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4123 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4124
4125 if (frame_pointer_needed)
4126 {
4127 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4128 return true;
4129 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4130 return false;
4131 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4132 && !cfun->calls_alloca)
4133 return true;
4134 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4135 return true;
4136 return false;
4137 }
4138 else
4139 {
777e6976
IB
4140 /* If we decided that we didn't need a leaf frame pointer but then used
4141 LR in the function, then we'll want a frame pointer after all, so
4142 prevent this elimination to ensure a frame pointer is used.
4143
4144 NOTE: the original value of flag_omit_frame_pointer gets trashed
4145 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4146 of faked_omit_frame_pointer here (which is true when we always
4147 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4148 pointers when LR is clobbered). */
77436791 4149 if (to == STACK_POINTER_REGNUM
777e6976
IB
4150 && df_regs_ever_live_p (LR_REGNUM)
4151 && faked_omit_frame_pointer)
43e9d192
IB
4152 return false;
4153 }
777e6976 4154
43e9d192
IB
4155 return true;
4156}
4157
4158HOST_WIDE_INT
4159aarch64_initial_elimination_offset (unsigned from, unsigned to)
4160{
4161 HOST_WIDE_INT frame_size;
4162 HOST_WIDE_INT offset;
4163
4164 aarch64_layout_frame ();
4165 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4166 + crtl->outgoing_args_size
4167 + cfun->machine->saved_varargs_size);
4168
4169 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4170 offset = frame_size;
4171
4172 if (to == HARD_FRAME_POINTER_REGNUM)
4173 {
4174 if (from == ARG_POINTER_REGNUM)
4175 return offset - crtl->outgoing_args_size;
4176
4177 if (from == FRAME_POINTER_REGNUM)
6991c977 4178 return cfun->machine->frame.saved_regs_size + get_frame_size ();
43e9d192
IB
4179 }
4180
4181 if (to == STACK_POINTER_REGNUM)
4182 {
4183 if (from == FRAME_POINTER_REGNUM)
4184 {
4185 HOST_WIDE_INT elim = crtl->outgoing_args_size
4186 + cfun->machine->frame.saved_regs_size
6991c977 4187 + get_frame_size ()
43e9d192
IB
4188 - cfun->machine->frame.fp_lr_offset;
4189 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4190 return elim;
4191 }
4192 }
4193
4194 return offset;
4195}
4196
4197
4198/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4199 previous frame. */
4200
4201rtx
4202aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4203{
4204 if (count != 0)
4205 return const0_rtx;
4206 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4207}
4208
4209
4210static void
4211aarch64_asm_trampoline_template (FILE *f)
4212{
28514dda
YZ
4213 if (TARGET_ILP32)
4214 {
4215 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4216 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4217 }
4218 else
4219 {
4220 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4221 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4222 }
01a3a324 4223 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4224 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4225 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4226 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4227}
4228
4229static void
4230aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4231{
4232 rtx fnaddr, mem, a_tramp;
28514dda 4233 const int tramp_code_sz = 16;
43e9d192
IB
4234
4235 /* Don't need to copy the trailing D-words, we fill those in below. */
4236 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4237 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4238 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4239 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4240 if (GET_MODE (fnaddr) != ptr_mode)
4241 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4242 emit_move_insn (mem, fnaddr);
4243
28514dda 4244 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4245 emit_move_insn (mem, chain_value);
4246
4247 /* XXX We should really define a "clear_cache" pattern and use
4248 gen_clear_cache(). */
4249 a_tramp = XEXP (m_tramp, 0);
4250 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4251 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4252 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4253 ptr_mode);
43e9d192
IB
4254}
4255
4256static unsigned char
4257aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4258{
4259 switch (regclass)
4260 {
4261 case CORE_REGS:
4262 case POINTER_REGS:
4263 case GENERAL_REGS:
4264 case ALL_REGS:
4265 case FP_REGS:
4266 case FP_LO_REGS:
4267 return
4268 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4269 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4270 case STACK_REG:
4271 return 1;
4272
4273 case NO_REGS:
4274 return 0;
4275
4276 default:
4277 break;
4278 }
4279 gcc_unreachable ();
4280}
4281
4282static reg_class_t
78d8b9f0 4283aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4284{
51bb310d 4285 if (regclass == POINTER_REGS)
78d8b9f0
IB
4286 return GENERAL_REGS;
4287
51bb310d
MS
4288 if (regclass == STACK_REG)
4289 {
4290 if (REG_P(x)
4291 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4292 return regclass;
4293
4294 return NO_REGS;
4295 }
4296
78d8b9f0
IB
4297 /* If it's an integer immediate that MOVI can't handle, then
4298 FP_REGS is not an option, so we return NO_REGS instead. */
4299 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4300 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4301 return NO_REGS;
4302
27bd251b
IB
4303 /* Register eliminiation can result in a request for
4304 SP+constant->FP_REGS. We cannot support such operations which
4305 use SP as source and an FP_REG as destination, so reject out
4306 right now. */
4307 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4308 {
4309 rtx lhs = XEXP (x, 0);
4310
4311 /* Look through a possible SUBREG introduced by ILP32. */
4312 if (GET_CODE (lhs) == SUBREG)
4313 lhs = SUBREG_REG (lhs);
4314
4315 gcc_assert (REG_P (lhs));
4316 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4317 POINTER_REGS));
4318 return NO_REGS;
4319 }
4320
78d8b9f0 4321 return regclass;
43e9d192
IB
4322}
4323
4324void
4325aarch64_asm_output_labelref (FILE* f, const char *name)
4326{
4327 asm_fprintf (f, "%U%s", name);
4328}
4329
4330static void
4331aarch64_elf_asm_constructor (rtx symbol, int priority)
4332{
4333 if (priority == DEFAULT_INIT_PRIORITY)
4334 default_ctor_section_asm_out_constructor (symbol, priority);
4335 else
4336 {
4337 section *s;
4338 char buf[18];
4339 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4340 s = get_section (buf, SECTION_WRITE, NULL);
4341 switch_to_section (s);
4342 assemble_align (POINTER_SIZE);
28514dda 4343 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4344 }
4345}
4346
4347static void
4348aarch64_elf_asm_destructor (rtx symbol, int priority)
4349{
4350 if (priority == DEFAULT_INIT_PRIORITY)
4351 default_dtor_section_asm_out_destructor (symbol, priority);
4352 else
4353 {
4354 section *s;
4355 char buf[18];
4356 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4357 s = get_section (buf, SECTION_WRITE, NULL);
4358 switch_to_section (s);
4359 assemble_align (POINTER_SIZE);
28514dda 4360 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4361 }
4362}
4363
4364const char*
4365aarch64_output_casesi (rtx *operands)
4366{
4367 char buf[100];
4368 char label[100];
592a16fc 4369 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4370 int index;
4371 static const char *const patterns[4][2] =
4372 {
4373 {
4374 "ldrb\t%w3, [%0,%w1,uxtw]",
4375 "add\t%3, %4, %w3, sxtb #2"
4376 },
4377 {
4378 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4379 "add\t%3, %4, %w3, sxth #2"
4380 },
4381 {
4382 "ldr\t%w3, [%0,%w1,uxtw #2]",
4383 "add\t%3, %4, %w3, sxtw #2"
4384 },
4385 /* We assume that DImode is only generated when not optimizing and
4386 that we don't really need 64-bit address offsets. That would
4387 imply an object file with 8GB of code in a single function! */
4388 {
4389 "ldr\t%w3, [%0,%w1,uxtw #2]",
4390 "add\t%3, %4, %w3, sxtw #2"
4391 }
4392 };
4393
4394 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4395
4396 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4397
4398 gcc_assert (index >= 0 && index <= 3);
4399
4400 /* Need to implement table size reduction, by chaning the code below. */
4401 output_asm_insn (patterns[index][0], operands);
4402 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4403 snprintf (buf, sizeof (buf),
4404 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4405 output_asm_insn (buf, operands);
4406 output_asm_insn (patterns[index][1], operands);
4407 output_asm_insn ("br\t%3", operands);
4408 assemble_label (asm_out_file, label);
4409 return "";
4410}
4411
4412
4413/* Return size in bits of an arithmetic operand which is shifted/scaled and
4414 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4415 operator. */
4416
4417int
4418aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4419{
4420 if (shift >= 0 && shift <= 3)
4421 {
4422 int size;
4423 for (size = 8; size <= 32; size *= 2)
4424 {
4425 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4426 if (mask == bits << shift)
4427 return size;
4428 }
4429 }
4430 return 0;
4431}
4432
4433static bool
4434aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4435 const_rtx x ATTRIBUTE_UNUSED)
4436{
4437 /* We can't use blocks for constants when we're using a per-function
4438 constant pool. */
4439 return false;
4440}
4441
4442static section *
4443aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4444 rtx x ATTRIBUTE_UNUSED,
4445 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4446{
4447 /* Force all constant pool entries into the current function section. */
4448 return function_section (current_function_decl);
4449}
4450
4451
4452/* Costs. */
4453
4454/* Helper function for rtx cost calculation. Strip a shift expression
4455 from X. Returns the inner operand if successful, or the original
4456 expression on failure. */
4457static rtx
4458aarch64_strip_shift (rtx x)
4459{
4460 rtx op = x;
4461
4462 if ((GET_CODE (op) == ASHIFT
4463 || GET_CODE (op) == ASHIFTRT
4464 || GET_CODE (op) == LSHIFTRT)
4465 && CONST_INT_P (XEXP (op, 1)))
4466 return XEXP (op, 0);
4467
4468 if (GET_CODE (op) == MULT
4469 && CONST_INT_P (XEXP (op, 1))
4470 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4471 return XEXP (op, 0);
4472
4473 return x;
4474}
4475
4476/* Helper function for rtx cost calculation. Strip a shift or extend
4477 expression from X. Returns the inner operand if successful, or the
4478 original expression on failure. We deal with a number of possible
4479 canonicalization variations here. */
4480static rtx
4481aarch64_strip_shift_or_extend (rtx x)
4482{
4483 rtx op = x;
4484
4485 /* Zero and sign extraction of a widened value. */
4486 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4487 && XEXP (op, 2) == const0_rtx
4488 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4489 XEXP (op, 1)))
4490 return XEXP (XEXP (op, 0), 0);
4491
4492 /* It can also be represented (for zero-extend) as an AND with an
4493 immediate. */
4494 if (GET_CODE (op) == AND
4495 && GET_CODE (XEXP (op, 0)) == MULT
4496 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4497 && CONST_INT_P (XEXP (op, 1))
4498 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4499 INTVAL (XEXP (op, 1))) != 0)
4500 return XEXP (XEXP (op, 0), 0);
4501
4502 /* Now handle extended register, as this may also have an optional
4503 left shift by 1..4. */
4504 if (GET_CODE (op) == ASHIFT
4505 && CONST_INT_P (XEXP (op, 1))
4506 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4507 op = XEXP (op, 0);
4508
4509 if (GET_CODE (op) == ZERO_EXTEND
4510 || GET_CODE (op) == SIGN_EXTEND)
4511 op = XEXP (op, 0);
4512
4513 if (op != x)
4514 return op;
4515
4516 return aarch64_strip_shift (x);
4517}
4518
4519/* Calculate the cost of calculating X, storing it in *COST. Result
4520 is true if the total cost of the operation has now been calculated. */
4521static bool
4522aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4523 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4524{
4525 rtx op0, op1;
73250c4c 4526 const struct cpu_cost_table *extra_cost
43e9d192
IB
4527 = aarch64_tune_params->insn_extra_cost;
4528
4529 switch (code)
4530 {
4531 case SET:
4532 op0 = SET_DEST (x);
4533 op1 = SET_SRC (x);
4534
4535 switch (GET_CODE (op0))
4536 {
4537 case MEM:
4538 if (speed)
73250c4c 4539 *cost += extra_cost->ldst.store;
43e9d192
IB
4540
4541 if (op1 != const0_rtx)
4542 *cost += rtx_cost (op1, SET, 1, speed);
4543 return true;
4544
4545 case SUBREG:
4546 if (! REG_P (SUBREG_REG (op0)))
4547 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4548 /* Fall through. */
4549 case REG:
4550 /* Cost is just the cost of the RHS of the set. */
4551 *cost += rtx_cost (op1, SET, 1, true);
4552 return true;
4553
4554 case ZERO_EXTRACT: /* Bit-field insertion. */
4555 case SIGN_EXTRACT:
4556 /* Strip any redundant widening of the RHS to meet the width of
4557 the target. */
4558 if (GET_CODE (op1) == SUBREG)
4559 op1 = SUBREG_REG (op1);
4560 if ((GET_CODE (op1) == ZERO_EXTEND
4561 || GET_CODE (op1) == SIGN_EXTEND)
4562 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4563 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4564 >= INTVAL (XEXP (op0, 1))))
4565 op1 = XEXP (op1, 0);
4566 *cost += rtx_cost (op1, SET, 1, speed);
4567 return true;
4568
4569 default:
4570 break;
4571 }
4572 return false;
4573
4574 case MEM:
4575 if (speed)
73250c4c 4576 *cost += extra_cost->ldst.load;
43e9d192
IB
4577
4578 return true;
4579
4580 case NEG:
4581 op0 = CONST0_RTX (GET_MODE (x));
4582 op1 = XEXP (x, 0);
4583 goto cost_minus;
4584
4585 case COMPARE:
4586 op0 = XEXP (x, 0);
4587 op1 = XEXP (x, 1);
4588
4589 if (op1 == const0_rtx
4590 && GET_CODE (op0) == AND)
4591 {
4592 x = op0;
4593 goto cost_logic;
4594 }
4595
4596 /* Comparisons can work if the order is swapped.
4597 Canonicalization puts the more complex operation first, but
4598 we want it in op1. */
4599 if (! (REG_P (op0)
4600 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4601 {
4602 op0 = XEXP (x, 1);
4603 op1 = XEXP (x, 0);
4604 }
4605 goto cost_minus;
4606
4607 case MINUS:
4608 op0 = XEXP (x, 0);
4609 op1 = XEXP (x, 1);
4610
4611 cost_minus:
4612 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4613 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4614 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4615 {
4616 if (op0 != const0_rtx)
4617 *cost += rtx_cost (op0, MINUS, 0, speed);
4618
4619 if (CONST_INT_P (op1))
4620 {
4621 if (!aarch64_uimm12_shift (INTVAL (op1)))
4622 *cost += rtx_cost (op1, MINUS, 1, speed);
4623 }
4624 else
4625 {
4626 op1 = aarch64_strip_shift_or_extend (op1);
4627 *cost += rtx_cost (op1, MINUS, 1, speed);
4628 }
4629 return true;
4630 }
4631
4632 return false;
4633
4634 case PLUS:
4635 op0 = XEXP (x, 0);
4636 op1 = XEXP (x, 1);
4637
4638 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4639 {
4640 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4641 {
4642 *cost += rtx_cost (op0, PLUS, 0, speed);
4643 }
4644 else
4645 {
4646 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4647
4648 if (new_op0 == op0
4649 && GET_CODE (op0) == MULT)
4650 {
4651 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4652 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4653 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4654 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4655 {
4656 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4657 speed)
4658 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4659 speed)
4660 + rtx_cost (op1, PLUS, 1, speed));
4661 if (speed)
73250c4c
KT
4662 *cost +=
4663 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
43e9d192
IB
4664 return true;
4665 }
328402a9 4666
43e9d192
IB
4667 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4668 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4669 + rtx_cost (op1, PLUS, 1, speed));
4670
4671 if (speed)
73250c4c 4672 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
328402a9
RE
4673
4674 return true;
43e9d192
IB
4675 }
4676
4677 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4678 + rtx_cost (op1, PLUS, 1, speed));
4679 }
4680 return true;
4681 }
4682
4683 return false;
4684
4685 case IOR:
4686 case XOR:
4687 case AND:
4688 cost_logic:
4689 op0 = XEXP (x, 0);
4690 op1 = XEXP (x, 1);
4691
4692 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4693 {
4694 if (CONST_INT_P (op1)
4695 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4696 {
4697 *cost += rtx_cost (op0, AND, 0, speed);
4698 }
4699 else
4700 {
4701 if (GET_CODE (op0) == NOT)
4702 op0 = XEXP (op0, 0);
4703 op0 = aarch64_strip_shift (op0);
4704 *cost += (rtx_cost (op0, AND, 0, speed)
4705 + rtx_cost (op1, AND, 1, speed));
4706 }
4707 return true;
4708 }
4709 return false;
4710
4711 case ZERO_EXTEND:
4712 if ((GET_MODE (x) == DImode
4713 && GET_MODE (XEXP (x, 0)) == SImode)
4714 || GET_CODE (XEXP (x, 0)) == MEM)
4715 {
4716 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4717 return true;
4718 }
4719 return false;
4720
4721 case SIGN_EXTEND:
4722 if (GET_CODE (XEXP (x, 0)) == MEM)
4723 {
4724 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4725 return true;
4726 }
4727 return false;
4728
4729 case ROTATE:
4730 if (!CONST_INT_P (XEXP (x, 1)))
4731 *cost += COSTS_N_INSNS (2);
4732 /* Fall through. */
4733 case ROTATERT:
4734 case LSHIFTRT:
4735 case ASHIFT:
4736 case ASHIFTRT:
4737
4738 /* Shifting by a register often takes an extra cycle. */
4739 if (speed && !CONST_INT_P (XEXP (x, 1)))
73250c4c 4740 *cost += extra_cost->alu.arith_shift_reg;
43e9d192
IB
4741
4742 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4743 return true;
4744
4745 case HIGH:
4746 if (!CONSTANT_P (XEXP (x, 0)))
4747 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4748 return true;
4749
4750 case LO_SUM:
4751 if (!CONSTANT_P (XEXP (x, 1)))
4752 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4753 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4754 return true;
4755
4756 case ZERO_EXTRACT:
4757 case SIGN_EXTRACT:
4758 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4759 return true;
4760
4761 case MULT:
4762 op0 = XEXP (x, 0);
4763 op1 = XEXP (x, 1);
4764
4765 *cost = COSTS_N_INSNS (1);
4766 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4767 {
4768 if (CONST_INT_P (op1)
4769 && exact_log2 (INTVAL (op1)) > 0)
4770 {
4771 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4772 return true;
4773 }
4774
4775 if ((GET_CODE (op0) == ZERO_EXTEND
4776 && GET_CODE (op1) == ZERO_EXTEND)
4777 || (GET_CODE (op0) == SIGN_EXTEND
4778 && GET_CODE (op1) == SIGN_EXTEND))
4779 {
4780 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4781 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4782 if (speed)
73250c4c 4783 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
43e9d192
IB
4784 return true;
4785 }
4786
4787 if (speed)
73250c4c 4788 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
43e9d192
IB
4789 }
4790 else if (speed)
4791 {
4792 if (GET_MODE (x) == DFmode)
73250c4c 4793 *cost += extra_cost->fp[1].mult;
43e9d192 4794 else if (GET_MODE (x) == SFmode)
73250c4c 4795 *cost += extra_cost->fp[0].mult;
43e9d192
IB
4796 }
4797
4798 return false; /* All arguments need to be in registers. */
4799
4800 case MOD:
4801 case UMOD:
4802 *cost = COSTS_N_INSNS (2);
4803 if (speed)
4804 {
4805 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
4806 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4807 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 4808 else if (GET_MODE (x) == DFmode)
73250c4c
KT
4809 *cost += (extra_cost->fp[1].mult
4810 + extra_cost->fp[1].div);
43e9d192 4811 else if (GET_MODE (x) == SFmode)
73250c4c
KT
4812 *cost += (extra_cost->fp[0].mult
4813 + extra_cost->fp[0].div);
43e9d192
IB
4814 }
4815 return false; /* All arguments need to be in registers. */
4816
4817 case DIV:
4818 case UDIV:
4819 *cost = COSTS_N_INSNS (1);
4820 if (speed)
4821 {
4822 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c 4823 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
43e9d192 4824 else if (GET_MODE (x) == DFmode)
73250c4c 4825 *cost += extra_cost->fp[1].div;
43e9d192 4826 else if (GET_MODE (x) == SFmode)
73250c4c 4827 *cost += extra_cost->fp[0].div;
43e9d192
IB
4828 }
4829 return false; /* All arguments need to be in registers. */
4830
4831 default:
4832 break;
4833 }
4834 return false;
4835}
4836
4837static int
4838aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4839 enum machine_mode mode ATTRIBUTE_UNUSED,
4840 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4841{
4842 enum rtx_code c = GET_CODE (x);
4843 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4844
4845 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4846 return addr_cost->pre_modify;
4847
4848 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4849 return addr_cost->post_modify;
4850
4851 if (c == PLUS)
4852 {
4853 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4854 return addr_cost->imm_offset;
4855 else if (GET_CODE (XEXP (x, 0)) == MULT
4856 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4857 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4858 return addr_cost->register_extend;
4859
4860 return addr_cost->register_offset;
4861 }
4862 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4863 return addr_cost->imm_offset;
4864
4865 return 0;
4866}
4867
4868static int
4869aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4870 reg_class_t from, reg_class_t to)
4871{
4872 const struct cpu_regmove_cost *regmove_cost
4873 = aarch64_tune_params->regmove_cost;
4874
6ee70f81
AP
4875 /* Moving between GPR and stack cost is the same as GP2GP. */
4876 if ((from == GENERAL_REGS && to == STACK_REG)
4877 || (to == GENERAL_REGS && from == STACK_REG))
4878 return regmove_cost->GP2GP;
4879
4880 /* To/From the stack register, we move via the gprs. */
4881 if (to == STACK_REG || from == STACK_REG)
4882 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
4883 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
4884
43e9d192
IB
4885 if (from == GENERAL_REGS && to == GENERAL_REGS)
4886 return regmove_cost->GP2GP;
4887 else if (from == GENERAL_REGS)
4888 return regmove_cost->GP2FP;
4889 else if (to == GENERAL_REGS)
4890 return regmove_cost->FP2GP;
4891
4892 /* When AdvSIMD instructions are disabled it is not possible to move
4893 a 128-bit value directly between Q registers. This is handled in
4894 secondary reload. A general register is used as a scratch to move
4895 the upper DI value and the lower DI value is moved directly,
4896 hence the cost is the sum of three moves. */
4897
4898 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4899 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4900
4901 return regmove_cost->FP2FP;
4902}
4903
4904static int
4905aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4906 reg_class_t rclass ATTRIBUTE_UNUSED,
4907 bool in ATTRIBUTE_UNUSED)
4908{
4909 return aarch64_tune_params->memmov_cost;
4910}
4911
d126a4ae
AP
4912/* Return the number of instructions that can be issued per cycle. */
4913static int
4914aarch64_sched_issue_rate (void)
4915{
4916 return aarch64_tune_params->issue_rate;
4917}
4918
8990e73a
TB
4919/* Vectorizer cost model target hooks. */
4920
4921/* Implement targetm.vectorize.builtin_vectorization_cost. */
4922static int
4923aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4924 tree vectype,
4925 int misalign ATTRIBUTE_UNUSED)
4926{
4927 unsigned elements;
4928
4929 switch (type_of_cost)
4930 {
4931 case scalar_stmt:
4932 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4933
4934 case scalar_load:
4935 return aarch64_tune_params->vec_costs->scalar_load_cost;
4936
4937 case scalar_store:
4938 return aarch64_tune_params->vec_costs->scalar_store_cost;
4939
4940 case vector_stmt:
4941 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4942
4943 case vector_load:
4944 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4945
4946 case vector_store:
4947 return aarch64_tune_params->vec_costs->vec_store_cost;
4948
4949 case vec_to_scalar:
4950 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4951
4952 case scalar_to_vec:
4953 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4954
4955 case unaligned_load:
4956 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4957
4958 case unaligned_store:
4959 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4960
4961 case cond_branch_taken:
4962 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4963
4964 case cond_branch_not_taken:
4965 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4966
4967 case vec_perm:
4968 case vec_promote_demote:
4969 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4970
4971 case vec_construct:
4972 elements = TYPE_VECTOR_SUBPARTS (vectype);
4973 return elements / 2 + 1;
4974
4975 default:
4976 gcc_unreachable ();
4977 }
4978}
4979
4980/* Implement targetm.vectorize.add_stmt_cost. */
4981static unsigned
4982aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4983 struct _stmt_vec_info *stmt_info, int misalign,
4984 enum vect_cost_model_location where)
4985{
4986 unsigned *cost = (unsigned *) data;
4987 unsigned retval = 0;
4988
4989 if (flag_vect_cost_model)
4990 {
4991 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4992 int stmt_cost =
4993 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4994
4995 /* Statements in an inner loop relative to the loop being
4996 vectorized are weighted more heavily. The value here is
4997 a function (linear for now) of the loop nest level. */
4998 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4999 {
5000 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5001 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5002 unsigned nest_level = loop_depth (loop);
5003
5004 count *= nest_level;
5005 }
5006
5007 retval = (unsigned) (count * stmt_cost);
5008 cost[where] += retval;
5009 }
5010
5011 return retval;
5012}
5013
43e9d192
IB
5014static void initialize_aarch64_code_model (void);
5015
5016/* Parse the architecture extension string. */
5017
5018static void
5019aarch64_parse_extension (char *str)
5020{
5021 /* The extension string is parsed left to right. */
5022 const struct aarch64_option_extension *opt = NULL;
5023
5024 /* Flag to say whether we are adding or removing an extension. */
5025 int adding_ext = -1;
5026
5027 while (str != NULL && *str != 0)
5028 {
5029 char *ext;
5030 size_t len;
5031
5032 str++;
5033 ext = strchr (str, '+');
5034
5035 if (ext != NULL)
5036 len = ext - str;
5037 else
5038 len = strlen (str);
5039
5040 if (len >= 2 && strncmp (str, "no", 2) == 0)
5041 {
5042 adding_ext = 0;
5043 len -= 2;
5044 str += 2;
5045 }
5046 else if (len > 0)
5047 adding_ext = 1;
5048
5049 if (len == 0)
5050 {
5051 error ("missing feature modifier after %qs", "+no");
5052 return;
5053 }
5054
5055 /* Scan over the extensions table trying to find an exact match. */
5056 for (opt = all_extensions; opt->name != NULL; opt++)
5057 {
5058 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5059 {
5060 /* Add or remove the extension. */
5061 if (adding_ext)
5062 aarch64_isa_flags |= opt->flags_on;
5063 else
5064 aarch64_isa_flags &= ~(opt->flags_off);
5065 break;
5066 }
5067 }
5068
5069 if (opt->name == NULL)
5070 {
5071 /* Extension not found in list. */
5072 error ("unknown feature modifier %qs", str);
5073 return;
5074 }
5075
5076 str = ext;
5077 };
5078
5079 return;
5080}
5081
5082/* Parse the ARCH string. */
5083
5084static void
5085aarch64_parse_arch (void)
5086{
5087 char *ext;
5088 const struct processor *arch;
5089 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5090 size_t len;
5091
5092 strcpy (str, aarch64_arch_string);
5093
5094 ext = strchr (str, '+');
5095
5096 if (ext != NULL)
5097 len = ext - str;
5098 else
5099 len = strlen (str);
5100
5101 if (len == 0)
5102 {
5103 error ("missing arch name in -march=%qs", str);
5104 return;
5105 }
5106
5107 /* Loop through the list of supported ARCHs to find a match. */
5108 for (arch = all_architectures; arch->name != NULL; arch++)
5109 {
5110 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5111 {
5112 selected_arch = arch;
5113 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
5114
5115 if (!selected_cpu)
5116 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
5117
5118 if (ext != NULL)
5119 {
5120 /* ARCH string contains at least one extension. */
5121 aarch64_parse_extension (ext);
5122 }
5123
ffee7aa9
JG
5124 if (strcmp (selected_arch->arch, selected_cpu->arch))
5125 {
5126 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5127 selected_cpu->name, selected_arch->name);
5128 }
5129
43e9d192
IB
5130 return;
5131 }
5132 }
5133
5134 /* ARCH name not found in list. */
5135 error ("unknown value %qs for -march", str);
5136 return;
5137}
5138
5139/* Parse the CPU string. */
5140
5141static void
5142aarch64_parse_cpu (void)
5143{
5144 char *ext;
5145 const struct processor *cpu;
5146 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5147 size_t len;
5148
5149 strcpy (str, aarch64_cpu_string);
5150
5151 ext = strchr (str, '+');
5152
5153 if (ext != NULL)
5154 len = ext - str;
5155 else
5156 len = strlen (str);
5157
5158 if (len == 0)
5159 {
5160 error ("missing cpu name in -mcpu=%qs", str);
5161 return;
5162 }
5163
5164 /* Loop through the list of supported CPUs to find a match. */
5165 for (cpu = all_cores; cpu->name != NULL; cpu++)
5166 {
5167 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5168 {
5169 selected_cpu = cpu;
192ed1dd 5170 selected_tune = cpu;
43e9d192
IB
5171 aarch64_isa_flags = selected_cpu->flags;
5172
5173 if (ext != NULL)
5174 {
5175 /* CPU string contains at least one extension. */
5176 aarch64_parse_extension (ext);
5177 }
5178
5179 return;
5180 }
5181 }
5182
5183 /* CPU name not found in list. */
5184 error ("unknown value %qs for -mcpu", str);
5185 return;
5186}
5187
5188/* Parse the TUNE string. */
5189
5190static void
5191aarch64_parse_tune (void)
5192{
5193 const struct processor *cpu;
5194 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5195 strcpy (str, aarch64_tune_string);
5196
5197 /* Loop through the list of supported CPUs to find a match. */
5198 for (cpu = all_cores; cpu->name != NULL; cpu++)
5199 {
5200 if (strcmp (cpu->name, str) == 0)
5201 {
5202 selected_tune = cpu;
5203 return;
5204 }
5205 }
5206
5207 /* CPU name not found in list. */
5208 error ("unknown value %qs for -mtune", str);
5209 return;
5210}
5211
5212
5213/* Implement TARGET_OPTION_OVERRIDE. */
5214
5215static void
5216aarch64_override_options (void)
5217{
ffee7aa9
JG
5218 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5219 If either of -march or -mtune is given, they override their
5220 respective component of -mcpu.
43e9d192 5221
ffee7aa9
JG
5222 So, first parse AARCH64_CPU_STRING, then the others, be careful
5223 with -march as, if -mcpu is not present on the command line, march
5224 must set a sensible default CPU. */
5225 if (aarch64_cpu_string)
43e9d192 5226 {
ffee7aa9 5227 aarch64_parse_cpu ();
43e9d192
IB
5228 }
5229
ffee7aa9 5230 if (aarch64_arch_string)
43e9d192 5231 {
ffee7aa9 5232 aarch64_parse_arch ();
43e9d192
IB
5233 }
5234
5235 if (aarch64_tune_string)
5236 {
5237 aarch64_parse_tune ();
5238 }
5239
63892fa2
KV
5240#ifndef HAVE_AS_MABI_OPTION
5241 /* The compiler may have been configured with 2.23.* binutils, which does
5242 not have support for ILP32. */
5243 if (TARGET_ILP32)
5244 error ("Assembler does not support -mabi=ilp32");
5245#endif
5246
43e9d192
IB
5247 initialize_aarch64_code_model ();
5248
5249 aarch64_build_bitmask_table ();
5250
5251 /* This target defaults to strict volatile bitfields. */
5252 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5253 flag_strict_volatile_bitfields = 1;
5254
5255 /* If the user did not specify a processor, choose the default
5256 one for them. This will be the CPU set during configuration using
e7af1c22 5257 --with-cpu, otherwise it is "cortex-a53". */
43e9d192
IB
5258 if (!selected_cpu)
5259 {
5260 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5261 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5262 }
5263
5264 gcc_assert (selected_cpu);
5265
5266 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5267 if (!selected_tune)
5268 selected_tune = &all_cores[selected_cpu->core];
5269
5270 aarch64_tune_flags = selected_tune->flags;
5271 aarch64_tune = selected_tune->core;
5272 aarch64_tune_params = selected_tune->tune;
5273
5274 aarch64_override_options_after_change ();
5275}
5276
5277/* Implement targetm.override_options_after_change. */
5278
5279static void
5280aarch64_override_options_after_change (void)
5281{
5282 faked_omit_frame_pointer = false;
5283
5284 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5285 that aarch64_frame_pointer_required will be called. We need to remember
5286 whether flag_omit_frame_pointer was turned on normally or just faked. */
5287
5288 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5289 {
5290 flag_omit_frame_pointer = true;
5291 faked_omit_frame_pointer = true;
5292 }
5293}
5294
5295static struct machine_function *
5296aarch64_init_machine_status (void)
5297{
5298 struct machine_function *machine;
5299 machine = ggc_alloc_cleared_machine_function ();
5300 return machine;
5301}
5302
5303void
5304aarch64_init_expanders (void)
5305{
5306 init_machine_status = aarch64_init_machine_status;
5307}
5308
5309/* A checking mechanism for the implementation of the various code models. */
5310static void
5311initialize_aarch64_code_model (void)
5312{
5313 if (flag_pic)
5314 {
5315 switch (aarch64_cmodel_var)
5316 {
5317 case AARCH64_CMODEL_TINY:
5318 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5319 break;
5320 case AARCH64_CMODEL_SMALL:
5321 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5322 break;
5323 case AARCH64_CMODEL_LARGE:
5324 sorry ("code model %qs with -f%s", "large",
5325 flag_pic > 1 ? "PIC" : "pic");
5326 default:
5327 gcc_unreachable ();
5328 }
5329 }
5330 else
5331 aarch64_cmodel = aarch64_cmodel_var;
5332}
5333
5334/* Return true if SYMBOL_REF X binds locally. */
5335
5336static bool
5337aarch64_symbol_binds_local_p (const_rtx x)
5338{
5339 return (SYMBOL_REF_DECL (x)
5340 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5341 : SYMBOL_REF_LOCAL_P (x));
5342}
5343
5344/* Return true if SYMBOL_REF X is thread local */
5345static bool
5346aarch64_tls_symbol_p (rtx x)
5347{
5348 if (! TARGET_HAVE_TLS)
5349 return false;
5350
5351 if (GET_CODE (x) != SYMBOL_REF)
5352 return false;
5353
5354 return SYMBOL_REF_TLS_MODEL (x) != 0;
5355}
5356
5357/* Classify a TLS symbol into one of the TLS kinds. */
5358enum aarch64_symbol_type
5359aarch64_classify_tls_symbol (rtx x)
5360{
5361 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5362
5363 switch (tls_kind)
5364 {
5365 case TLS_MODEL_GLOBAL_DYNAMIC:
5366 case TLS_MODEL_LOCAL_DYNAMIC:
5367 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5368
5369 case TLS_MODEL_INITIAL_EXEC:
5370 return SYMBOL_SMALL_GOTTPREL;
5371
5372 case TLS_MODEL_LOCAL_EXEC:
5373 return SYMBOL_SMALL_TPREL;
5374
5375 case TLS_MODEL_EMULATED:
5376 case TLS_MODEL_NONE:
5377 return SYMBOL_FORCE_TO_MEM;
5378
5379 default:
5380 gcc_unreachable ();
5381 }
5382}
5383
5384/* Return the method that should be used to access SYMBOL_REF or
5385 LABEL_REF X in context CONTEXT. */
17f4d4bf 5386
43e9d192
IB
5387enum aarch64_symbol_type
5388aarch64_classify_symbol (rtx x,
5389 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5390{
5391 if (GET_CODE (x) == LABEL_REF)
5392 {
5393 switch (aarch64_cmodel)
5394 {
5395 case AARCH64_CMODEL_LARGE:
5396 return SYMBOL_FORCE_TO_MEM;
5397
5398 case AARCH64_CMODEL_TINY_PIC:
5399 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5400 return SYMBOL_TINY_ABSOLUTE;
5401
43e9d192
IB
5402 case AARCH64_CMODEL_SMALL_PIC:
5403 case AARCH64_CMODEL_SMALL:
5404 return SYMBOL_SMALL_ABSOLUTE;
5405
5406 default:
5407 gcc_unreachable ();
5408 }
5409 }
5410
17f4d4bf 5411 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5412 {
4a985a37
MS
5413 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5414 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
5415
5416 if (aarch64_tls_symbol_p (x))
5417 return aarch64_classify_tls_symbol (x);
5418
17f4d4bf
CSS
5419 switch (aarch64_cmodel)
5420 {
5421 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5422 if (SYMBOL_REF_WEAK (x))
5423 return SYMBOL_FORCE_TO_MEM;
5424 return SYMBOL_TINY_ABSOLUTE;
5425
17f4d4bf
CSS
5426 case AARCH64_CMODEL_SMALL:
5427 if (SYMBOL_REF_WEAK (x))
5428 return SYMBOL_FORCE_TO_MEM;
5429 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5430
17f4d4bf 5431 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 5432 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 5433 return SYMBOL_TINY_GOT;
38e6c9a6
MS
5434 return SYMBOL_TINY_ABSOLUTE;
5435
17f4d4bf
CSS
5436 case AARCH64_CMODEL_SMALL_PIC:
5437 if (!aarch64_symbol_binds_local_p (x))
5438 return SYMBOL_SMALL_GOT;
5439 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5440
17f4d4bf
CSS
5441 default:
5442 gcc_unreachable ();
5443 }
43e9d192 5444 }
17f4d4bf 5445
43e9d192
IB
5446 /* By default push everything into the constant pool. */
5447 return SYMBOL_FORCE_TO_MEM;
5448}
5449
43e9d192
IB
5450bool
5451aarch64_constant_address_p (rtx x)
5452{
5453 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5454}
5455
5456bool
5457aarch64_legitimate_pic_operand_p (rtx x)
5458{
5459 if (GET_CODE (x) == SYMBOL_REF
5460 || (GET_CODE (x) == CONST
5461 && GET_CODE (XEXP (x, 0)) == PLUS
5462 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5463 return false;
5464
5465 return true;
5466}
5467
3520f7cc
JG
5468/* Return true if X holds either a quarter-precision or
5469 floating-point +0.0 constant. */
5470static bool
5471aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5472{
5473 if (!CONST_DOUBLE_P (x))
5474 return false;
5475
5476 /* TODO: We could handle moving 0.0 to a TFmode register,
5477 but first we would like to refactor the movtf_aarch64
5478 to be more amicable to split moves properly and
5479 correctly gate on TARGET_SIMD. For now - reject all
5480 constants which are not to SFmode or DFmode registers. */
5481 if (!(mode == SFmode || mode == DFmode))
5482 return false;
5483
5484 if (aarch64_float_const_zero_rtx_p (x))
5485 return true;
5486 return aarch64_float_const_representable_p (x);
5487}
5488
43e9d192
IB
5489static bool
5490aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5491{
5492 /* Do not allow vector struct mode constants. We could support
5493 0 and -1 easily, but they need support in aarch64-simd.md. */
5494 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5495 return false;
5496
5497 /* This could probably go away because
5498 we now decompose CONST_INTs according to expand_mov_immediate. */
5499 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5500 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5501 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5502 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5503
5504 if (GET_CODE (x) == HIGH
5505 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5506 return true;
5507
5508 return aarch64_constant_address_p (x);
5509}
5510
a5bc806c 5511rtx
43e9d192
IB
5512aarch64_load_tp (rtx target)
5513{
5514 if (!target
5515 || GET_MODE (target) != Pmode
5516 || !register_operand (target, Pmode))
5517 target = gen_reg_rtx (Pmode);
5518
5519 /* Can return in any reg. */
5520 emit_insn (gen_aarch64_load_tp_hard (target));
5521 return target;
5522}
5523
43e9d192
IB
5524/* On AAPCS systems, this is the "struct __va_list". */
5525static GTY(()) tree va_list_type;
5526
5527/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5528 Return the type to use as __builtin_va_list.
5529
5530 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5531
5532 struct __va_list
5533 {
5534 void *__stack;
5535 void *__gr_top;
5536 void *__vr_top;
5537 int __gr_offs;
5538 int __vr_offs;
5539 }; */
5540
5541static tree
5542aarch64_build_builtin_va_list (void)
5543{
5544 tree va_list_name;
5545 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5546
5547 /* Create the type. */
5548 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5549 /* Give it the required name. */
5550 va_list_name = build_decl (BUILTINS_LOCATION,
5551 TYPE_DECL,
5552 get_identifier ("__va_list"),
5553 va_list_type);
5554 DECL_ARTIFICIAL (va_list_name) = 1;
5555 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5556 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5557
5558 /* Create the fields. */
5559 f_stack = build_decl (BUILTINS_LOCATION,
5560 FIELD_DECL, get_identifier ("__stack"),
5561 ptr_type_node);
5562 f_grtop = build_decl (BUILTINS_LOCATION,
5563 FIELD_DECL, get_identifier ("__gr_top"),
5564 ptr_type_node);
5565 f_vrtop = build_decl (BUILTINS_LOCATION,
5566 FIELD_DECL, get_identifier ("__vr_top"),
5567 ptr_type_node);
5568 f_groff = build_decl (BUILTINS_LOCATION,
5569 FIELD_DECL, get_identifier ("__gr_offs"),
5570 integer_type_node);
5571 f_vroff = build_decl (BUILTINS_LOCATION,
5572 FIELD_DECL, get_identifier ("__vr_offs"),
5573 integer_type_node);
5574
5575 DECL_ARTIFICIAL (f_stack) = 1;
5576 DECL_ARTIFICIAL (f_grtop) = 1;
5577 DECL_ARTIFICIAL (f_vrtop) = 1;
5578 DECL_ARTIFICIAL (f_groff) = 1;
5579 DECL_ARTIFICIAL (f_vroff) = 1;
5580
5581 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5582 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5583 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5584 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5585 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5586
5587 TYPE_FIELDS (va_list_type) = f_stack;
5588 DECL_CHAIN (f_stack) = f_grtop;
5589 DECL_CHAIN (f_grtop) = f_vrtop;
5590 DECL_CHAIN (f_vrtop) = f_groff;
5591 DECL_CHAIN (f_groff) = f_vroff;
5592
5593 /* Compute its layout. */
5594 layout_type (va_list_type);
5595
5596 return va_list_type;
5597}
5598
5599/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5600static void
5601aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5602{
5603 const CUMULATIVE_ARGS *cum;
5604 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5605 tree stack, grtop, vrtop, groff, vroff;
5606 tree t;
5607 int gr_save_area_size;
5608 int vr_save_area_size;
5609 int vr_offset;
5610
5611 cum = &crtl->args.info;
5612 gr_save_area_size
5613 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5614 vr_save_area_size
5615 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5616
5617 if (TARGET_GENERAL_REGS_ONLY)
5618 {
5619 if (cum->aapcs_nvrn > 0)
5620 sorry ("%qs and floating point or vector arguments",
5621 "-mgeneral-regs-only");
5622 vr_save_area_size = 0;
5623 }
5624
5625 f_stack = TYPE_FIELDS (va_list_type_node);
5626 f_grtop = DECL_CHAIN (f_stack);
5627 f_vrtop = DECL_CHAIN (f_grtop);
5628 f_groff = DECL_CHAIN (f_vrtop);
5629 f_vroff = DECL_CHAIN (f_groff);
5630
5631 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5632 NULL_TREE);
5633 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5634 NULL_TREE);
5635 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5636 NULL_TREE);
5637 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5638 NULL_TREE);
5639 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5640 NULL_TREE);
5641
5642 /* Emit code to initialize STACK, which points to the next varargs stack
5643 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5644 by named arguments. STACK is 8-byte aligned. */
5645 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5646 if (cum->aapcs_stack_size > 0)
5647 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5648 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5650
5651 /* Emit code to initialize GRTOP, the top of the GR save area.
5652 virtual_incoming_args_rtx should have been 16 byte aligned. */
5653 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5654 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5655 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5656
5657 /* Emit code to initialize VRTOP, the top of the VR save area.
5658 This address is gr_save_area_bytes below GRTOP, rounded
5659 down to the next 16-byte boundary. */
5660 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5661 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5662 STACK_BOUNDARY / BITS_PER_UNIT);
5663
5664 if (vr_offset)
5665 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5666 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5667 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5668
5669 /* Emit code to initialize GROFF, the offset from GRTOP of the
5670 next GPR argument. */
5671 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5672 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5673 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5674
5675 /* Likewise emit code to initialize VROFF, the offset from FTOP
5676 of the next VR argument. */
5677 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5678 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5679 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5680}
5681
5682/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5683
5684static tree
5685aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5686 gimple_seq *post_p ATTRIBUTE_UNUSED)
5687{
5688 tree addr;
5689 bool indirect_p;
5690 bool is_ha; /* is HFA or HVA. */
5691 bool dw_align; /* double-word align. */
5692 enum machine_mode ag_mode = VOIDmode;
5693 int nregs;
5694 enum machine_mode mode;
5695
5696 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5697 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5698 HOST_WIDE_INT size, rsize, adjust, align;
5699 tree t, u, cond1, cond2;
5700
5701 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5702 if (indirect_p)
5703 type = build_pointer_type (type);
5704
5705 mode = TYPE_MODE (type);
5706
5707 f_stack = TYPE_FIELDS (va_list_type_node);
5708 f_grtop = DECL_CHAIN (f_stack);
5709 f_vrtop = DECL_CHAIN (f_grtop);
5710 f_groff = DECL_CHAIN (f_vrtop);
5711 f_vroff = DECL_CHAIN (f_groff);
5712
5713 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5714 f_stack, NULL_TREE);
5715 size = int_size_in_bytes (type);
5716 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5717
5718 dw_align = false;
5719 adjust = 0;
5720 if (aarch64_vfp_is_call_or_return_candidate (mode,
5721 type,
5722 &ag_mode,
5723 &nregs,
5724 &is_ha))
5725 {
5726 /* TYPE passed in fp/simd registers. */
5727 if (TARGET_GENERAL_REGS_ONLY)
5728 sorry ("%qs and floating point or vector arguments",
5729 "-mgeneral-regs-only");
5730
5731 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5732 unshare_expr (valist), f_vrtop, NULL_TREE);
5733 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5734 unshare_expr (valist), f_vroff, NULL_TREE);
5735
5736 rsize = nregs * UNITS_PER_VREG;
5737
5738 if (is_ha)
5739 {
5740 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5741 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5742 }
5743 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5744 && size < UNITS_PER_VREG)
5745 {
5746 adjust = UNITS_PER_VREG - size;
5747 }
5748 }
5749 else
5750 {
5751 /* TYPE passed in general registers. */
5752 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5753 unshare_expr (valist), f_grtop, NULL_TREE);
5754 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5755 unshare_expr (valist), f_groff, NULL_TREE);
5756 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5757 nregs = rsize / UNITS_PER_WORD;
5758
5759 if (align > 8)
5760 dw_align = true;
5761
5762 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5763 && size < UNITS_PER_WORD)
5764 {
5765 adjust = UNITS_PER_WORD - size;
5766 }
5767 }
5768
5769 /* Get a local temporary for the field value. */
5770 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5771
5772 /* Emit code to branch if off >= 0. */
5773 t = build2 (GE_EXPR, boolean_type_node, off,
5774 build_int_cst (TREE_TYPE (off), 0));
5775 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5776
5777 if (dw_align)
5778 {
5779 /* Emit: offs = (offs + 15) & -16. */
5780 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5781 build_int_cst (TREE_TYPE (off), 15));
5782 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5783 build_int_cst (TREE_TYPE (off), -16));
5784 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5785 }
5786 else
5787 roundup = NULL;
5788
5789 /* Update ap.__[g|v]r_offs */
5790 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5791 build_int_cst (TREE_TYPE (off), rsize));
5792 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5793
5794 /* String up. */
5795 if (roundup)
5796 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5797
5798 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5799 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5800 build_int_cst (TREE_TYPE (f_off), 0));
5801 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5802
5803 /* String up: make sure the assignment happens before the use. */
5804 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5805 COND_EXPR_ELSE (cond1) = t;
5806
5807 /* Prepare the trees handling the argument that is passed on the stack;
5808 the top level node will store in ON_STACK. */
5809 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5810 if (align > 8)
5811 {
5812 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5813 t = fold_convert (intDI_type_node, arg);
5814 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5815 build_int_cst (TREE_TYPE (t), 15));
5816 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5817 build_int_cst (TREE_TYPE (t), -16));
5818 t = fold_convert (TREE_TYPE (arg), t);
5819 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5820 }
5821 else
5822 roundup = NULL;
5823 /* Advance ap.__stack */
5824 t = fold_convert (intDI_type_node, arg);
5825 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5826 build_int_cst (TREE_TYPE (t), size + 7));
5827 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5828 build_int_cst (TREE_TYPE (t), -8));
5829 t = fold_convert (TREE_TYPE (arg), t);
5830 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5831 /* String up roundup and advance. */
5832 if (roundup)
5833 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5834 /* String up with arg */
5835 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5836 /* Big-endianness related address adjustment. */
5837 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5838 && size < UNITS_PER_WORD)
5839 {
5840 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5841 size_int (UNITS_PER_WORD - size));
5842 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5843 }
5844
5845 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5846 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5847
5848 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5849 t = off;
5850 if (adjust)
5851 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5852 build_int_cst (TREE_TYPE (off), adjust));
5853
5854 t = fold_convert (sizetype, t);
5855 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5856
5857 if (is_ha)
5858 {
5859 /* type ha; // treat as "struct {ftype field[n];}"
5860 ... [computing offs]
5861 for (i = 0; i <nregs; ++i, offs += 16)
5862 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5863 return ha; */
5864 int i;
5865 tree tmp_ha, field_t, field_ptr_t;
5866
5867 /* Declare a local variable. */
5868 tmp_ha = create_tmp_var_raw (type, "ha");
5869 gimple_add_tmp_var (tmp_ha);
5870
5871 /* Establish the base type. */
5872 switch (ag_mode)
5873 {
5874 case SFmode:
5875 field_t = float_type_node;
5876 field_ptr_t = float_ptr_type_node;
5877 break;
5878 case DFmode:
5879 field_t = double_type_node;
5880 field_ptr_t = double_ptr_type_node;
5881 break;
5882 case TFmode:
5883 field_t = long_double_type_node;
5884 field_ptr_t = long_double_ptr_type_node;
5885 break;
5886/* The half precision and quad precision are not fully supported yet. Enable
5887 the following code after the support is complete. Need to find the correct
5888 type node for __fp16 *. */
5889#if 0
5890 case HFmode:
5891 field_t = float_type_node;
5892 field_ptr_t = float_ptr_type_node;
5893 break;
5894#endif
5895 case V2SImode:
5896 case V4SImode:
5897 {
5898 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5899 field_t = build_vector_type_for_mode (innertype, ag_mode);
5900 field_ptr_t = build_pointer_type (field_t);
5901 }
5902 break;
5903 default:
5904 gcc_assert (0);
5905 }
5906
5907 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5908 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5909 addr = t;
5910 t = fold_convert (field_ptr_t, addr);
5911 t = build2 (MODIFY_EXPR, field_t,
5912 build1 (INDIRECT_REF, field_t, tmp_ha),
5913 build1 (INDIRECT_REF, field_t, t));
5914
5915 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5916 for (i = 1; i < nregs; ++i)
5917 {
5918 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5919 u = fold_convert (field_ptr_t, addr);
5920 u = build2 (MODIFY_EXPR, field_t,
5921 build2 (MEM_REF, field_t, tmp_ha,
5922 build_int_cst (field_ptr_t,
5923 (i *
5924 int_size_in_bytes (field_t)))),
5925 build1 (INDIRECT_REF, field_t, u));
5926 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5927 }
5928
5929 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5930 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5931 }
5932
5933 COND_EXPR_ELSE (cond2) = t;
5934 addr = fold_convert (build_pointer_type (type), cond1);
5935 addr = build_va_arg_indirect_ref (addr);
5936
5937 if (indirect_p)
5938 addr = build_va_arg_indirect_ref (addr);
5939
5940 return addr;
5941}
5942
5943/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5944
5945static void
5946aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5947 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5948 int no_rtl)
5949{
5950 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5951 CUMULATIVE_ARGS local_cum;
5952 int gr_saved, vr_saved;
5953
5954 /* The caller has advanced CUM up to, but not beyond, the last named
5955 argument. Advance a local copy of CUM past the last "real" named
5956 argument, to find out how many registers are left over. */
5957 local_cum = *cum;
5958 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5959
5960 /* Found out how many registers we need to save. */
5961 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5962 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5963
5964 if (TARGET_GENERAL_REGS_ONLY)
5965 {
5966 if (local_cum.aapcs_nvrn > 0)
5967 sorry ("%qs and floating point or vector arguments",
5968 "-mgeneral-regs-only");
5969 vr_saved = 0;
5970 }
5971
5972 if (!no_rtl)
5973 {
5974 if (gr_saved > 0)
5975 {
5976 rtx ptr, mem;
5977
5978 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5979 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5980 - gr_saved * UNITS_PER_WORD);
5981 mem = gen_frame_mem (BLKmode, ptr);
5982 set_mem_alias_set (mem, get_varargs_alias_set ());
5983
5984 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5985 mem, gr_saved);
5986 }
5987 if (vr_saved > 0)
5988 {
5989 /* We can't use move_block_from_reg, because it will use
5990 the wrong mode, storing D regs only. */
5991 enum machine_mode mode = TImode;
5992 int off, i;
5993
5994 /* Set OFF to the offset from virtual_incoming_args_rtx of
5995 the first vector register. The VR save area lies below
5996 the GR one, and is aligned to 16 bytes. */
5997 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5998 STACK_BOUNDARY / BITS_PER_UNIT);
5999 off -= vr_saved * UNITS_PER_VREG;
6000
6001 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6002 {
6003 rtx ptr, mem;
6004
6005 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6006 mem = gen_frame_mem (mode, ptr);
6007 set_mem_alias_set (mem, get_varargs_alias_set ());
6008 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6009 off += UNITS_PER_VREG;
6010 }
6011 }
6012 }
6013
6014 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6015 any complication of having crtl->args.pretend_args_size changed. */
6016 cfun->machine->saved_varargs_size
6017 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6018 STACK_BOUNDARY / BITS_PER_UNIT)
6019 + vr_saved * UNITS_PER_VREG);
6020}
6021
6022static void
6023aarch64_conditional_register_usage (void)
6024{
6025 int i;
6026 if (!TARGET_FLOAT)
6027 {
6028 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6029 {
6030 fixed_regs[i] = 1;
6031 call_used_regs[i] = 1;
6032 }
6033 }
6034}
6035
6036/* Walk down the type tree of TYPE counting consecutive base elements.
6037 If *MODEP is VOIDmode, then set it to the first valid floating point
6038 type. If a non-floating point type is found, or if a floating point
6039 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6040 otherwise return the count in the sub-tree. */
6041static int
6042aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6043{
6044 enum machine_mode mode;
6045 HOST_WIDE_INT size;
6046
6047 switch (TREE_CODE (type))
6048 {
6049 case REAL_TYPE:
6050 mode = TYPE_MODE (type);
6051 if (mode != DFmode && mode != SFmode && mode != TFmode)
6052 return -1;
6053
6054 if (*modep == VOIDmode)
6055 *modep = mode;
6056
6057 if (*modep == mode)
6058 return 1;
6059
6060 break;
6061
6062 case COMPLEX_TYPE:
6063 mode = TYPE_MODE (TREE_TYPE (type));
6064 if (mode != DFmode && mode != SFmode && mode != TFmode)
6065 return -1;
6066
6067 if (*modep == VOIDmode)
6068 *modep = mode;
6069
6070 if (*modep == mode)
6071 return 2;
6072
6073 break;
6074
6075 case VECTOR_TYPE:
6076 /* Use V2SImode and V4SImode as representatives of all 64-bit
6077 and 128-bit vector types. */
6078 size = int_size_in_bytes (type);
6079 switch (size)
6080 {
6081 case 8:
6082 mode = V2SImode;
6083 break;
6084 case 16:
6085 mode = V4SImode;
6086 break;
6087 default:
6088 return -1;
6089 }
6090
6091 if (*modep == VOIDmode)
6092 *modep = mode;
6093
6094 /* Vector modes are considered to be opaque: two vectors are
6095 equivalent for the purposes of being homogeneous aggregates
6096 if they are the same size. */
6097 if (*modep == mode)
6098 return 1;
6099
6100 break;
6101
6102 case ARRAY_TYPE:
6103 {
6104 int count;
6105 tree index = TYPE_DOMAIN (type);
6106
6107 /* Can't handle incomplete types. */
6108 if (!COMPLETE_TYPE_P (type))
6109 return -1;
6110
6111 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6112 if (count == -1
6113 || !index
6114 || !TYPE_MAX_VALUE (index)
cc269bb6 6115 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 6116 || !TYPE_MIN_VALUE (index)
cc269bb6 6117 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
6118 || count < 0)
6119 return -1;
6120
ae7e9ddd
RS
6121 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6122 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
6123
6124 /* There must be no padding. */
cc269bb6 6125 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6126 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6127 != count * GET_MODE_BITSIZE (*modep)))
6128 return -1;
6129
6130 return count;
6131 }
6132
6133 case RECORD_TYPE:
6134 {
6135 int count = 0;
6136 int sub_count;
6137 tree field;
6138
6139 /* Can't handle incomplete types. */
6140 if (!COMPLETE_TYPE_P (type))
6141 return -1;
6142
6143 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6144 {
6145 if (TREE_CODE (field) != FIELD_DECL)
6146 continue;
6147
6148 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6149 if (sub_count < 0)
6150 return -1;
6151 count += sub_count;
6152 }
6153
6154 /* There must be no padding. */
cc269bb6 6155 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6156 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6157 != count * GET_MODE_BITSIZE (*modep)))
6158 return -1;
6159
6160 return count;
6161 }
6162
6163 case UNION_TYPE:
6164 case QUAL_UNION_TYPE:
6165 {
6166 /* These aren't very interesting except in a degenerate case. */
6167 int count = 0;
6168 int sub_count;
6169 tree field;
6170
6171 /* Can't handle incomplete types. */
6172 if (!COMPLETE_TYPE_P (type))
6173 return -1;
6174
6175 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6176 {
6177 if (TREE_CODE (field) != FIELD_DECL)
6178 continue;
6179
6180 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6181 if (sub_count < 0)
6182 return -1;
6183 count = count > sub_count ? count : sub_count;
6184 }
6185
6186 /* There must be no padding. */
cc269bb6 6187 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6188 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6189 != count * GET_MODE_BITSIZE (*modep)))
6190 return -1;
6191
6192 return count;
6193 }
6194
6195 default:
6196 break;
6197 }
6198
6199 return -1;
6200}
6201
38e8f663
YR
6202/* Return true if we use LRA instead of reload pass. */
6203static bool
6204aarch64_lra_p (void)
6205{
6206 return aarch64_lra_flag;
6207}
6208
43e9d192
IB
6209/* Return TRUE if the type, as described by TYPE and MODE, is a composite
6210 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6211 array types. The C99 floating-point complex types are also considered
6212 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6213 types, which are GCC extensions and out of the scope of AAPCS64, are
6214 treated as composite types here as well.
6215
6216 Note that MODE itself is not sufficient in determining whether a type
6217 is such a composite type or not. This is because
6218 stor-layout.c:compute_record_mode may have already changed the MODE
6219 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6220 structure with only one field may have its MODE set to the mode of the
6221 field. Also an integer mode whose size matches the size of the
6222 RECORD_TYPE type may be used to substitute the original mode
6223 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6224 solely relied on. */
6225
6226static bool
6227aarch64_composite_type_p (const_tree type,
6228 enum machine_mode mode)
6229{
6230 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6231 return true;
6232
6233 if (mode == BLKmode
6234 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6235 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6236 return true;
6237
6238 return false;
6239}
6240
6241/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6242 type as described in AAPCS64 \S 4.1.2.
6243
6244 See the comment above aarch64_composite_type_p for the notes on MODE. */
6245
6246static bool
6247aarch64_short_vector_p (const_tree type,
6248 enum machine_mode mode)
6249{
6250 HOST_WIDE_INT size = -1;
6251
6252 if (type && TREE_CODE (type) == VECTOR_TYPE)
6253 size = int_size_in_bytes (type);
6254 else if (!aarch64_composite_type_p (type, mode)
6255 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6256 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6257 size = GET_MODE_SIZE (mode);
6258
6259 return (size == 8 || size == 16) ? true : false;
6260}
6261
6262/* Return TRUE if an argument, whose type is described by TYPE and MODE,
6263 shall be passed or returned in simd/fp register(s) (providing these
6264 parameter passing registers are available).
6265
6266 Upon successful return, *COUNT returns the number of needed registers,
6267 *BASE_MODE returns the mode of the individual register and when IS_HAF
6268 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6269 floating-point aggregate or a homogeneous short-vector aggregate. */
6270
6271static bool
6272aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6273 const_tree type,
6274 enum machine_mode *base_mode,
6275 int *count,
6276 bool *is_ha)
6277{
6278 enum machine_mode new_mode = VOIDmode;
6279 bool composite_p = aarch64_composite_type_p (type, mode);
6280
6281 if (is_ha != NULL) *is_ha = false;
6282
6283 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6284 || aarch64_short_vector_p (type, mode))
6285 {
6286 *count = 1;
6287 new_mode = mode;
6288 }
6289 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6290 {
6291 if (is_ha != NULL) *is_ha = true;
6292 *count = 2;
6293 new_mode = GET_MODE_INNER (mode);
6294 }
6295 else if (type && composite_p)
6296 {
6297 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6298
6299 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6300 {
6301 if (is_ha != NULL) *is_ha = true;
6302 *count = ag_count;
6303 }
6304 else
6305 return false;
6306 }
6307 else
6308 return false;
6309
6310 *base_mode = new_mode;
6311 return true;
6312}
6313
6314/* Implement TARGET_STRUCT_VALUE_RTX. */
6315
6316static rtx
6317aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6318 int incoming ATTRIBUTE_UNUSED)
6319{
6320 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6321}
6322
6323/* Implements target hook vector_mode_supported_p. */
6324static bool
6325aarch64_vector_mode_supported_p (enum machine_mode mode)
6326{
6327 if (TARGET_SIMD
6328 && (mode == V4SImode || mode == V8HImode
6329 || mode == V16QImode || mode == V2DImode
6330 || mode == V2SImode || mode == V4HImode
6331 || mode == V8QImode || mode == V2SFmode
6332 || mode == V4SFmode || mode == V2DFmode))
6333 return true;
6334
6335 return false;
6336}
6337
b7342d25
IB
6338/* Return appropriate SIMD container
6339 for MODE within a vector of WIDTH bits. */
43e9d192 6340static enum machine_mode
b7342d25 6341aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6342{
b7342d25 6343 gcc_assert (width == 64 || width == 128);
43e9d192 6344 if (TARGET_SIMD)
b7342d25
IB
6345 {
6346 if (width == 128)
6347 switch (mode)
6348 {
6349 case DFmode:
6350 return V2DFmode;
6351 case SFmode:
6352 return V4SFmode;
6353 case SImode:
6354 return V4SImode;
6355 case HImode:
6356 return V8HImode;
6357 case QImode:
6358 return V16QImode;
6359 case DImode:
6360 return V2DImode;
6361 default:
6362 break;
6363 }
6364 else
6365 switch (mode)
6366 {
6367 case SFmode:
6368 return V2SFmode;
6369 case SImode:
6370 return V2SImode;
6371 case HImode:
6372 return V4HImode;
6373 case QImode:
6374 return V8QImode;
6375 default:
6376 break;
6377 }
6378 }
43e9d192
IB
6379 return word_mode;
6380}
6381
b7342d25
IB
6382/* Return 128-bit container as the preferred SIMD mode for MODE. */
6383static enum machine_mode
6384aarch64_preferred_simd_mode (enum machine_mode mode)
6385{
6386 return aarch64_simd_container_mode (mode, 128);
6387}
6388
3b357264
JG
6389/* Return the bitmask of possible vector sizes for the vectorizer
6390 to iterate over. */
6391static unsigned int
6392aarch64_autovectorize_vector_sizes (void)
6393{
6394 return (16 | 8);
6395}
6396
c6fc9e43
YZ
6397/* A table to help perform AArch64-specific name mangling for AdvSIMD
6398 vector types in order to conform to the AAPCS64 (see "Procedure
6399 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6400 qualify for emission with the mangled names defined in that document,
6401 a vector type must not only be of the correct mode but also be
6402 composed of AdvSIMD vector element types (e.g.
6403 _builtin_aarch64_simd_qi); these types are registered by
6404 aarch64_init_simd_builtins (). In other words, vector types defined
6405 in other ways e.g. via vector_size attribute will get default
6406 mangled names. */
6407typedef struct
6408{
6409 enum machine_mode mode;
6410 const char *element_type_name;
6411 const char *mangled_name;
6412} aarch64_simd_mangle_map_entry;
6413
6414static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6415 /* 64-bit containerized types. */
6416 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6417 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6418 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6419 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6420 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6421 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6422 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6423 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6424 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6425 /* 128-bit containerized types. */
6426 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6427 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6428 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6429 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6430 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6431 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6432 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6433 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6434 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6435 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6436 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6437 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 6438 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
6439 { VOIDmode, NULL, NULL }
6440};
6441
ac2b960f
YZ
6442/* Implement TARGET_MANGLE_TYPE. */
6443
6f549691 6444static const char *
ac2b960f
YZ
6445aarch64_mangle_type (const_tree type)
6446{
6447 /* The AArch64 ABI documents say that "__va_list" has to be
6448 managled as if it is in the "std" namespace. */
6449 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6450 return "St9__va_list";
6451
c6fc9e43
YZ
6452 /* Check the mode of the vector type, and the name of the vector
6453 element type, against the table. */
6454 if (TREE_CODE (type) == VECTOR_TYPE)
6455 {
6456 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6457
6458 while (pos->mode != VOIDmode)
6459 {
6460 tree elt_type = TREE_TYPE (type);
6461
6462 if (pos->mode == TYPE_MODE (type)
6463 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6464 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6465 pos->element_type_name))
6466 return pos->mangled_name;
6467
6468 pos++;
6469 }
6470 }
6471
ac2b960f
YZ
6472 /* Use the default mangling. */
6473 return NULL;
6474}
6475
43e9d192 6476/* Return the equivalent letter for size. */
81c2dfb9 6477static char
43e9d192
IB
6478sizetochar (int size)
6479{
6480 switch (size)
6481 {
6482 case 64: return 'd';
6483 case 32: return 's';
6484 case 16: return 'h';
6485 case 8 : return 'b';
6486 default: gcc_unreachable ();
6487 }
6488}
6489
3520f7cc
JG
6490/* Return true iff x is a uniform vector of floating-point
6491 constants, and the constant can be represented in
6492 quarter-precision form. Note, as aarch64_float_const_representable
6493 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6494static bool
6495aarch64_vect_float_const_representable_p (rtx x)
6496{
6497 int i = 0;
6498 REAL_VALUE_TYPE r0, ri;
6499 rtx x0, xi;
6500
6501 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6502 return false;
6503
6504 x0 = CONST_VECTOR_ELT (x, 0);
6505 if (!CONST_DOUBLE_P (x0))
6506 return false;
6507
6508 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6509
6510 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6511 {
6512 xi = CONST_VECTOR_ELT (x, i);
6513 if (!CONST_DOUBLE_P (xi))
6514 return false;
6515
6516 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6517 if (!REAL_VALUES_EQUAL (r0, ri))
6518 return false;
6519 }
6520
6521 return aarch64_float_const_representable_p (x0);
6522}
6523
d8edd899 6524/* Return true for valid and false for invalid. */
3ea63f60 6525bool
48063b9d
IB
6526aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6527 struct simd_immediate_info *info)
43e9d192
IB
6528{
6529#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6530 matches = 1; \
6531 for (i = 0; i < idx; i += (STRIDE)) \
6532 if (!(TEST)) \
6533 matches = 0; \
6534 if (matches) \
6535 { \
6536 immtype = (CLASS); \
6537 elsize = (ELSIZE); \
43e9d192
IB
6538 eshift = (SHIFT); \
6539 emvn = (NEG); \
6540 break; \
6541 }
6542
6543 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6544 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6545 unsigned char bytes[16];
43e9d192
IB
6546 int immtype = -1, matches;
6547 unsigned int invmask = inverse ? 0xff : 0;
6548 int eshift, emvn;
6549
43e9d192 6550 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6551 {
81c2dfb9
IB
6552 if (! (aarch64_simd_imm_zero_p (op, mode)
6553 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6554 return false;
3520f7cc 6555
48063b9d
IB
6556 if (info)
6557 {
6558 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6559 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6560 info->mvn = false;
6561 info->shift = 0;
6562 }
3520f7cc 6563
d8edd899 6564 return true;
3520f7cc 6565 }
43e9d192
IB
6566
6567 /* Splat vector constant out into a byte vector. */
6568 for (i = 0; i < n_elts; i++)
6569 {
6570 rtx el = CONST_VECTOR_ELT (op, i);
6571 unsigned HOST_WIDE_INT elpart;
6572 unsigned int part, parts;
6573
6574 if (GET_CODE (el) == CONST_INT)
6575 {
6576 elpart = INTVAL (el);
6577 parts = 1;
6578 }
6579 else if (GET_CODE (el) == CONST_DOUBLE)
6580 {
6581 elpart = CONST_DOUBLE_LOW (el);
6582 parts = 2;
6583 }
6584 else
6585 gcc_unreachable ();
6586
6587 for (part = 0; part < parts; part++)
6588 {
6589 unsigned int byte;
6590 for (byte = 0; byte < innersize; byte++)
6591 {
6592 bytes[idx++] = (elpart & 0xff) ^ invmask;
6593 elpart >>= BITS_PER_UNIT;
6594 }
6595 if (GET_CODE (el) == CONST_DOUBLE)
6596 elpart = CONST_DOUBLE_HIGH (el);
6597 }
6598 }
6599
6600 /* Sanity check. */
6601 gcc_assert (idx == GET_MODE_SIZE (mode));
6602
6603 do
6604 {
6605 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6606 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6607
6608 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6609 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6610
6611 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6612 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6613
6614 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6615 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6616
6617 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6618
6619 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6620
6621 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6622 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6623
6624 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6625 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6626
6627 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6628 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6629
6630 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6631 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6632
6633 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6634
6635 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6636
6637 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 6638 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
6639
6640 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 6641 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
6642
6643 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 6644 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
6645
6646 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 6647 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
6648
6649 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6650
6651 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6652 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6653 }
6654 while (0);
6655
e4f0f84d 6656 if (immtype == -1)
d8edd899 6657 return false;
43e9d192 6658
48063b9d 6659 if (info)
43e9d192 6660 {
48063b9d 6661 info->element_width = elsize;
48063b9d
IB
6662 info->mvn = emvn != 0;
6663 info->shift = eshift;
6664
43e9d192
IB
6665 unsigned HOST_WIDE_INT imm = 0;
6666
e4f0f84d
TB
6667 if (immtype >= 12 && immtype <= 15)
6668 info->msl = true;
6669
43e9d192
IB
6670 /* Un-invert bytes of recognized vector, if necessary. */
6671 if (invmask != 0)
6672 for (i = 0; i < idx; i++)
6673 bytes[i] ^= invmask;
6674
6675 if (immtype == 17)
6676 {
6677 /* FIXME: Broken on 32-bit H_W_I hosts. */
6678 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6679
6680 for (i = 0; i < 8; i++)
6681 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6682 << (i * BITS_PER_UNIT);
6683
43e9d192 6684
48063b9d
IB
6685 info->value = GEN_INT (imm);
6686 }
6687 else
6688 {
6689 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6690 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6691
6692 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6693 generic constants. */
6694 if (info->mvn)
43e9d192 6695 imm = ~imm;
48063b9d
IB
6696 imm = (imm >> info->shift) & 0xff;
6697 info->value = GEN_INT (imm);
6698 }
43e9d192
IB
6699 }
6700
48063b9d 6701 return true;
43e9d192
IB
6702#undef CHECK
6703}
6704
43e9d192
IB
6705static bool
6706aarch64_const_vec_all_same_int_p (rtx x,
6707 HOST_WIDE_INT minval,
6708 HOST_WIDE_INT maxval)
6709{
6710 HOST_WIDE_INT firstval;
6711 int count, i;
6712
6713 if (GET_CODE (x) != CONST_VECTOR
6714 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6715 return false;
6716
6717 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6718 if (firstval < minval || firstval > maxval)
6719 return false;
6720
6721 count = CONST_VECTOR_NUNITS (x);
6722 for (i = 1; i < count; i++)
6723 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6724 return false;
6725
6726 return true;
6727}
6728
6729/* Check of immediate shift constants are within range. */
6730bool
6731aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6732{
6733 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6734 if (left)
6735 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6736 else
6737 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6738}
6739
3520f7cc
JG
6740/* Return true if X is a uniform vector where all elements
6741 are either the floating-point constant 0.0 or the
6742 integer constant 0. */
43e9d192
IB
6743bool
6744aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6745{
3520f7cc 6746 return x == CONST0_RTX (mode);
43e9d192
IB
6747}
6748
6749bool
6750aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6751{
6752 HOST_WIDE_INT imm = INTVAL (x);
6753 int i;
6754
6755 for (i = 0; i < 8; i++)
6756 {
6757 unsigned int byte = imm & 0xff;
6758 if (byte != 0xff && byte != 0)
6759 return false;
6760 imm >>= 8;
6761 }
6762
6763 return true;
6764}
6765
83f8c414
CSS
6766bool
6767aarch64_mov_operand_p (rtx x,
a5350ddc 6768 enum aarch64_symbol_context context,
83f8c414
CSS
6769 enum machine_mode mode)
6770{
83f8c414
CSS
6771 if (GET_CODE (x) == HIGH
6772 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6773 return true;
6774
6775 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6776 return true;
6777
6778 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6779 return true;
6780
a5350ddc
CSS
6781 return aarch64_classify_symbolic_expression (x, context)
6782 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6783}
6784
43e9d192
IB
6785/* Return a const_int vector of VAL. */
6786rtx
6787aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6788{
6789 int nunits = GET_MODE_NUNITS (mode);
6790 rtvec v = rtvec_alloc (nunits);
6791 int i;
6792
6793 for (i=0; i < nunits; i++)
6794 RTVEC_ELT (v, i) = GEN_INT (val);
6795
6796 return gen_rtx_CONST_VECTOR (mode, v);
6797}
6798
051d0e2f
SN
6799/* Check OP is a legal scalar immediate for the MOVI instruction. */
6800
6801bool
6802aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6803{
6804 enum machine_mode vmode;
6805
6806 gcc_assert (!VECTOR_MODE_P (mode));
6807 vmode = aarch64_preferred_simd_mode (mode);
6808 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6809 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6810}
6811
43e9d192
IB
6812/* Construct and return a PARALLEL RTX vector. */
6813rtx
6814aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6815{
6816 int nunits = GET_MODE_NUNITS (mode);
6817 rtvec v = rtvec_alloc (nunits / 2);
6818 int base = high ? nunits / 2 : 0;
6819 rtx t1;
6820 int i;
6821
6822 for (i=0; i < nunits / 2; i++)
6823 RTVEC_ELT (v, i) = GEN_INT (base + i);
6824
6825 t1 = gen_rtx_PARALLEL (mode, v);
6826 return t1;
6827}
6828
6829/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6830 HIGH (exclusive). */
6831void
6832aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6833{
6834 HOST_WIDE_INT lane;
6835 gcc_assert (GET_CODE (operand) == CONST_INT);
6836 lane = INTVAL (operand);
6837
6838 if (lane < low || lane >= high)
6839 error ("lane out of range");
6840}
6841
6842void
6843aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6844{
6845 gcc_assert (GET_CODE (operand) == CONST_INT);
6846 HOST_WIDE_INT lane = INTVAL (operand);
6847
6848 if (lane < low || lane >= high)
6849 error ("constant out of range");
6850}
6851
6852/* Emit code to reinterpret one AdvSIMD type as another,
6853 without altering bits. */
6854void
6855aarch64_simd_reinterpret (rtx dest, rtx src)
6856{
6857 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6858}
6859
6860/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6861 registers). */
6862void
6863aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6864 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6865 rtx op1)
6866{
6867 rtx mem = gen_rtx_MEM (mode, destaddr);
6868 rtx tmp1 = gen_reg_rtx (mode);
6869 rtx tmp2 = gen_reg_rtx (mode);
6870
6871 emit_insn (intfn (tmp1, op1, tmp2));
6872
6873 emit_move_insn (mem, tmp1);
6874 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6875 emit_move_insn (mem, tmp2);
6876}
6877
6878/* Return TRUE if OP is a valid vector addressing mode. */
6879bool
6880aarch64_simd_mem_operand_p (rtx op)
6881{
6882 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6883 || GET_CODE (XEXP (op, 0)) == REG);
6884}
6885
6886/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6887 not to early-clobber SRC registers in the process.
6888
6889 We assume that the operands described by SRC and DEST represent a
6890 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6891 number of components into which the copy has been decomposed. */
6892void
6893aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6894 rtx *src, unsigned int count)
6895{
6896 unsigned int i;
6897
6898 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6899 || REGNO (operands[0]) < REGNO (operands[1]))
6900 {
6901 for (i = 0; i < count; i++)
6902 {
6903 operands[2 * i] = dest[i];
6904 operands[2 * i + 1] = src[i];
6905 }
6906 }
6907 else
6908 {
6909 for (i = 0; i < count; i++)
6910 {
6911 operands[2 * i] = dest[count - i - 1];
6912 operands[2 * i + 1] = src[count - i - 1];
6913 }
6914 }
6915}
6916
6917/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6918 one of VSTRUCT modes: OI, CI or XI. */
6919int
6920aarch64_simd_attr_length_move (rtx insn)
6921{
43e9d192
IB
6922 enum machine_mode mode;
6923
6924 extract_insn_cached (insn);
6925
6926 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6927 {
6928 mode = GET_MODE (recog_data.operand[0]);
6929 switch (mode)
6930 {
6931 case OImode:
6932 return 8;
6933 case CImode:
6934 return 12;
6935 case XImode:
6936 return 16;
6937 default:
6938 gcc_unreachable ();
6939 }
6940 }
6941 return 4;
6942}
6943
db0253a4
TB
6944/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6945 alignment of a vector to 128 bits. */
6946static HOST_WIDE_INT
6947aarch64_simd_vector_alignment (const_tree type)
6948{
9439e9a1 6949 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
6950 return MIN (align, 128);
6951}
6952
6953/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6954static bool
6955aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6956{
6957 if (is_packed)
6958 return false;
6959
6960 /* We guarantee alignment for vectors up to 128-bits. */
6961 if (tree_int_cst_compare (TYPE_SIZE (type),
6962 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6963 return false;
6964
6965 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6966 return true;
6967}
6968
4369c11e
TB
6969/* If VALS is a vector constant that can be loaded into a register
6970 using DUP, generate instructions to do so and return an RTX to
6971 assign to the register. Otherwise return NULL_RTX. */
6972static rtx
6973aarch64_simd_dup_constant (rtx vals)
6974{
6975 enum machine_mode mode = GET_MODE (vals);
6976 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6977 int n_elts = GET_MODE_NUNITS (mode);
6978 bool all_same = true;
6979 rtx x;
6980 int i;
6981
6982 if (GET_CODE (vals) != CONST_VECTOR)
6983 return NULL_RTX;
6984
6985 for (i = 1; i < n_elts; ++i)
6986 {
6987 x = CONST_VECTOR_ELT (vals, i);
6988 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6989 all_same = false;
6990 }
6991
6992 if (!all_same)
6993 return NULL_RTX;
6994
6995 /* We can load this constant by using DUP and a constant in a
6996 single ARM register. This will be cheaper than a vector
6997 load. */
6998 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6999 return gen_rtx_VEC_DUPLICATE (mode, x);
7000}
7001
7002
7003/* Generate code to load VALS, which is a PARALLEL containing only
7004 constants (for vec_init) or CONST_VECTOR, efficiently into a
7005 register. Returns an RTX to copy into the register, or NULL_RTX
7006 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 7007static rtx
4369c11e
TB
7008aarch64_simd_make_constant (rtx vals)
7009{
7010 enum machine_mode mode = GET_MODE (vals);
7011 rtx const_dup;
7012 rtx const_vec = NULL_RTX;
7013 int n_elts = GET_MODE_NUNITS (mode);
7014 int n_const = 0;
7015 int i;
7016
7017 if (GET_CODE (vals) == CONST_VECTOR)
7018 const_vec = vals;
7019 else if (GET_CODE (vals) == PARALLEL)
7020 {
7021 /* A CONST_VECTOR must contain only CONST_INTs and
7022 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7023 Only store valid constants in a CONST_VECTOR. */
7024 for (i = 0; i < n_elts; ++i)
7025 {
7026 rtx x = XVECEXP (vals, 0, i);
7027 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7028 n_const++;
7029 }
7030 if (n_const == n_elts)
7031 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7032 }
7033 else
7034 gcc_unreachable ();
7035
7036 if (const_vec != NULL_RTX
48063b9d 7037 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
7038 /* Load using MOVI/MVNI. */
7039 return const_vec;
7040 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7041 /* Loaded using DUP. */
7042 return const_dup;
7043 else if (const_vec != NULL_RTX)
7044 /* Load from constant pool. We can not take advantage of single-cycle
7045 LD1 because we need a PC-relative addressing mode. */
7046 return const_vec;
7047 else
7048 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7049 We can not construct an initializer. */
7050 return NULL_RTX;
7051}
7052
7053void
7054aarch64_expand_vector_init (rtx target, rtx vals)
7055{
7056 enum machine_mode mode = GET_MODE (target);
7057 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7058 int n_elts = GET_MODE_NUNITS (mode);
7059 int n_var = 0, one_var = -1;
7060 bool all_same = true;
7061 rtx x, mem;
7062 int i;
7063
7064 x = XVECEXP (vals, 0, 0);
7065 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7066 n_var = 1, one_var = 0;
7067
7068 for (i = 1; i < n_elts; ++i)
7069 {
7070 x = XVECEXP (vals, 0, i);
7071 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7072 ++n_var, one_var = i;
7073
7074 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7075 all_same = false;
7076 }
7077
7078 if (n_var == 0)
7079 {
7080 rtx constant = aarch64_simd_make_constant (vals);
7081 if (constant != NULL_RTX)
7082 {
7083 emit_move_insn (target, constant);
7084 return;
7085 }
7086 }
7087
7088 /* Splat a single non-constant element if we can. */
7089 if (all_same)
7090 {
7091 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7092 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7093 return;
7094 }
7095
7096 /* One field is non-constant. Load constant then overwrite varying
7097 field. This is more efficient than using the stack. */
7098 if (n_var == 1)
7099 {
7100 rtx copy = copy_rtx (vals);
7101 rtx index = GEN_INT (one_var);
7102 enum insn_code icode;
7103
7104 /* Load constant part of vector, substitute neighboring value for
7105 varying element. */
7106 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7107 aarch64_expand_vector_init (target, copy);
7108
7109 /* Insert variable. */
7110 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7111 icode = optab_handler (vec_set_optab, mode);
7112 gcc_assert (icode != CODE_FOR_nothing);
7113 emit_insn (GEN_FCN (icode) (target, x, index));
7114 return;
7115 }
7116
7117 /* Construct the vector in memory one field at a time
7118 and load the whole vector. */
7119 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7120 for (i = 0; i < n_elts; i++)
7121 emit_move_insn (adjust_address_nv (mem, inner_mode,
7122 i * GET_MODE_SIZE (inner_mode)),
7123 XVECEXP (vals, 0, i));
7124 emit_move_insn (target, mem);
7125
7126}
7127
43e9d192
IB
7128static unsigned HOST_WIDE_INT
7129aarch64_shift_truncation_mask (enum machine_mode mode)
7130{
7131 return
7132 (aarch64_vector_mode_supported_p (mode)
7133 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7134}
7135
7136#ifndef TLS_SECTION_ASM_FLAG
7137#define TLS_SECTION_ASM_FLAG 'T'
7138#endif
7139
7140void
7141aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7142 tree decl ATTRIBUTE_UNUSED)
7143{
7144 char flagchars[10], *f = flagchars;
7145
7146 /* If we have already declared this section, we can use an
7147 abbreviated form to switch back to it -- unless this section is
7148 part of a COMDAT groups, in which case GAS requires the full
7149 declaration every time. */
7150 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7151 && (flags & SECTION_DECLARED))
7152 {
7153 fprintf (asm_out_file, "\t.section\t%s\n", name);
7154 return;
7155 }
7156
7157 if (!(flags & SECTION_DEBUG))
7158 *f++ = 'a';
7159 if (flags & SECTION_WRITE)
7160 *f++ = 'w';
7161 if (flags & SECTION_CODE)
7162 *f++ = 'x';
7163 if (flags & SECTION_SMALL)
7164 *f++ = 's';
7165 if (flags & SECTION_MERGE)
7166 *f++ = 'M';
7167 if (flags & SECTION_STRINGS)
7168 *f++ = 'S';
7169 if (flags & SECTION_TLS)
7170 *f++ = TLS_SECTION_ASM_FLAG;
7171 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7172 *f++ = 'G';
7173 *f = '\0';
7174
7175 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7176
7177 if (!(flags & SECTION_NOTYPE))
7178 {
7179 const char *type;
7180 const char *format;
7181
7182 if (flags & SECTION_BSS)
7183 type = "nobits";
7184 else
7185 type = "progbits";
7186
7187#ifdef TYPE_OPERAND_FMT
7188 format = "," TYPE_OPERAND_FMT;
7189#else
7190 format = ",@%s";
7191#endif
7192
7193 fprintf (asm_out_file, format, type);
7194
7195 if (flags & SECTION_ENTSIZE)
7196 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7197 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7198 {
7199 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7200 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7201 else
7202 fprintf (asm_out_file, ",%s,comdat",
7203 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7204 }
7205 }
7206
7207 putc ('\n', asm_out_file);
7208}
7209
7210/* Select a format to encode pointers in exception handling data. */
7211int
7212aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7213{
7214 int type;
7215 switch (aarch64_cmodel)
7216 {
7217 case AARCH64_CMODEL_TINY:
7218 case AARCH64_CMODEL_TINY_PIC:
7219 case AARCH64_CMODEL_SMALL:
7220 case AARCH64_CMODEL_SMALL_PIC:
7221 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7222 for everything. */
7223 type = DW_EH_PE_sdata4;
7224 break;
7225 default:
7226 /* No assumptions here. 8-byte relocs required. */
7227 type = DW_EH_PE_sdata8;
7228 break;
7229 }
7230 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7231}
7232
0462169c
SN
7233/* Emit load exclusive. */
7234
7235static void
7236aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7237 rtx mem, rtx model_rtx)
7238{
7239 rtx (*gen) (rtx, rtx, rtx);
7240
7241 switch (mode)
7242 {
7243 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7244 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7245 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7246 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7247 default:
7248 gcc_unreachable ();
7249 }
7250
7251 emit_insn (gen (rval, mem, model_rtx));
7252}
7253
7254/* Emit store exclusive. */
7255
7256static void
7257aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7258 rtx rval, rtx mem, rtx model_rtx)
7259{
7260 rtx (*gen) (rtx, rtx, rtx, rtx);
7261
7262 switch (mode)
7263 {
7264 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7265 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7266 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7267 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7268 default:
7269 gcc_unreachable ();
7270 }
7271
7272 emit_insn (gen (bval, rval, mem, model_rtx));
7273}
7274
7275/* Mark the previous jump instruction as unlikely. */
7276
7277static void
7278aarch64_emit_unlikely_jump (rtx insn)
7279{
e5af9ddd 7280 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
7281
7282 insn = emit_jump_insn (insn);
e5af9ddd 7283 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
7284}
7285
7286/* Expand a compare and swap pattern. */
7287
7288void
7289aarch64_expand_compare_and_swap (rtx operands[])
7290{
7291 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7292 enum machine_mode mode, cmp_mode;
7293 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7294
7295 bval = operands[0];
7296 rval = operands[1];
7297 mem = operands[2];
7298 oldval = operands[3];
7299 newval = operands[4];
7300 is_weak = operands[5];
7301 mod_s = operands[6];
7302 mod_f = operands[7];
7303 mode = GET_MODE (mem);
7304 cmp_mode = mode;
7305
7306 /* Normally the succ memory model must be stronger than fail, but in the
7307 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7308 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7309
7310 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7311 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7312 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7313
7314 switch (mode)
7315 {
7316 case QImode:
7317 case HImode:
7318 /* For short modes, we're going to perform the comparison in SImode,
7319 so do the zero-extension now. */
7320 cmp_mode = SImode;
7321 rval = gen_reg_rtx (SImode);
7322 oldval = convert_modes (SImode, mode, oldval, true);
7323 /* Fall through. */
7324
7325 case SImode:
7326 case DImode:
7327 /* Force the value into a register if needed. */
7328 if (!aarch64_plus_operand (oldval, mode))
7329 oldval = force_reg (cmp_mode, oldval);
7330 break;
7331
7332 default:
7333 gcc_unreachable ();
7334 }
7335
7336 switch (mode)
7337 {
7338 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7339 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7340 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7341 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7342 default:
7343 gcc_unreachable ();
7344 }
7345
7346 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7347
7348 if (mode == QImode || mode == HImode)
7349 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7350
7351 x = gen_rtx_REG (CCmode, CC_REGNUM);
7352 x = gen_rtx_EQ (SImode, x, const0_rtx);
7353 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7354}
7355
7356/* Split a compare and swap pattern. */
7357
7358void
7359aarch64_split_compare_and_swap (rtx operands[])
7360{
7361 rtx rval, mem, oldval, newval, scratch;
7362 enum machine_mode mode;
0462169c
SN
7363 bool is_weak;
7364 rtx label1, label2, x, cond;
7365
7366 rval = operands[0];
7367 mem = operands[1];
7368 oldval = operands[2];
7369 newval = operands[3];
7370 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7371 scratch = operands[7];
7372 mode = GET_MODE (mem);
7373
7374 label1 = NULL_RTX;
7375 if (!is_weak)
7376 {
7377 label1 = gen_label_rtx ();
7378 emit_label (label1);
7379 }
7380 label2 = gen_label_rtx ();
7381
7382 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7383
7384 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7385 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7386 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7387 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7388 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7389
7390 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7391
7392 if (!is_weak)
7393 {
7394 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7395 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7396 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7397 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7398 }
7399 else
7400 {
7401 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7402 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7403 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7404 }
7405
7406 emit_label (label2);
7407}
7408
7409/* Split an atomic operation. */
7410
7411void
7412aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7413 rtx value, rtx model_rtx, rtx cond)
7414{
7415 enum machine_mode mode = GET_MODE (mem);
7416 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7417 rtx label, x;
7418
7419 label = gen_label_rtx ();
7420 emit_label (label);
7421
7422 if (new_out)
7423 new_out = gen_lowpart (wmode, new_out);
7424 if (old_out)
7425 old_out = gen_lowpart (wmode, old_out);
7426 else
7427 old_out = new_out;
7428 value = simplify_gen_subreg (wmode, value, mode, 0);
7429
7430 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7431
7432 switch (code)
7433 {
7434 case SET:
7435 new_out = value;
7436 break;
7437
7438 case NOT:
7439 x = gen_rtx_AND (wmode, old_out, value);
7440 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7441 x = gen_rtx_NOT (wmode, new_out);
7442 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7443 break;
7444
7445 case MINUS:
7446 if (CONST_INT_P (value))
7447 {
7448 value = GEN_INT (-INTVAL (value));
7449 code = PLUS;
7450 }
7451 /* Fall through. */
7452
7453 default:
7454 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7455 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7456 break;
7457 }
7458
7459 aarch64_emit_store_exclusive (mode, cond, mem,
7460 gen_lowpart (mode, new_out), model_rtx);
7461
7462 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7463 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7464 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7465 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7466}
7467
95ca411e
YZ
7468static void
7469aarch64_print_extension (void)
7470{
7471 const struct aarch64_option_extension *opt = NULL;
7472
7473 for (opt = all_extensions; opt->name != NULL; opt++)
7474 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7475 asm_fprintf (asm_out_file, "+%s", opt->name);
7476
7477 asm_fprintf (asm_out_file, "\n");
7478}
7479
43e9d192
IB
7480static void
7481aarch64_start_file (void)
7482{
7483 if (selected_arch)
95ca411e
YZ
7484 {
7485 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7486 aarch64_print_extension ();
7487 }
43e9d192 7488 else if (selected_cpu)
95ca411e 7489 {
682287fb
JG
7490 const char *truncated_name
7491 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7492 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
7493 aarch64_print_extension ();
7494 }
43e9d192
IB
7495 default_file_start();
7496}
7497
7498/* Target hook for c_mode_for_suffix. */
7499static enum machine_mode
7500aarch64_c_mode_for_suffix (char suffix)
7501{
7502 if (suffix == 'q')
7503 return TFmode;
7504
7505 return VOIDmode;
7506}
7507
3520f7cc
JG
7508/* We can only represent floating point constants which will fit in
7509 "quarter-precision" values. These values are characterised by
7510 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7511 by:
7512
7513 (-1)^s * (n/16) * 2^r
7514
7515 Where:
7516 's' is the sign bit.
7517 'n' is an integer in the range 16 <= n <= 31.
7518 'r' is an integer in the range -3 <= r <= 4. */
7519
7520/* Return true iff X can be represented by a quarter-precision
7521 floating point immediate operand X. Note, we cannot represent 0.0. */
7522bool
7523aarch64_float_const_representable_p (rtx x)
7524{
7525 /* This represents our current view of how many bits
7526 make up the mantissa. */
7527 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7528 int exponent;
3520f7cc
JG
7529 unsigned HOST_WIDE_INT mantissa, mask;
7530 HOST_WIDE_INT m1, m2;
7531 REAL_VALUE_TYPE r, m;
7532
7533 if (!CONST_DOUBLE_P (x))
7534 return false;
7535
7536 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7537
7538 /* We cannot represent infinities, NaNs or +/-zero. We won't
7539 know if we have +zero until we analyse the mantissa, but we
7540 can reject the other invalid values. */
7541 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7542 || REAL_VALUE_MINUS_ZERO (r))
7543 return false;
7544
ba96cdfb 7545 /* Extract exponent. */
3520f7cc
JG
7546 r = real_value_abs (&r);
7547 exponent = REAL_EXP (&r);
7548
7549 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7550 highest (sign) bit, with a fixed binary point at bit point_pos.
7551 m1 holds the low part of the mantissa, m2 the high part.
7552 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7553 bits for the mantissa, this can fail (low bits will be lost). */
7554 real_ldexp (&m, &r, point_pos - exponent);
7555 REAL_VALUE_TO_INT (&m1, &m2, m);
7556
7557 /* If the low part of the mantissa has bits set we cannot represent
7558 the value. */
7559 if (m1 != 0)
7560 return false;
7561 /* We have rejected the lower HOST_WIDE_INT, so update our
7562 understanding of how many bits lie in the mantissa and
7563 look only at the high HOST_WIDE_INT. */
7564 mantissa = m2;
7565 point_pos -= HOST_BITS_PER_WIDE_INT;
7566
7567 /* We can only represent values with a mantissa of the form 1.xxxx. */
7568 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7569 if ((mantissa & mask) != 0)
7570 return false;
7571
7572 /* Having filtered unrepresentable values, we may now remove all
7573 but the highest 5 bits. */
7574 mantissa >>= point_pos - 5;
7575
7576 /* We cannot represent the value 0.0, so reject it. This is handled
7577 elsewhere. */
7578 if (mantissa == 0)
7579 return false;
7580
7581 /* Then, as bit 4 is always set, we can mask it off, leaving
7582 the mantissa in the range [0, 15]. */
7583 mantissa &= ~(1 << 4);
7584 gcc_assert (mantissa <= 15);
7585
7586 /* GCC internally does not use IEEE754-like encoding (where normalized
7587 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7588 Our mantissa values are shifted 4 places to the left relative to
7589 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7590 by 5 places to correct for GCC's representation. */
7591 exponent = 5 - exponent;
7592
7593 return (exponent >= 0 && exponent <= 7);
7594}
7595
7596char*
81c2dfb9 7597aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7598 enum machine_mode mode,
7599 unsigned width)
7600{
3ea63f60 7601 bool is_valid;
3520f7cc 7602 static char templ[40];
3520f7cc 7603 const char *mnemonic;
e4f0f84d 7604 const char *shift_op;
3520f7cc 7605 unsigned int lane_count = 0;
81c2dfb9 7606 char element_char;
3520f7cc 7607
e4f0f84d 7608 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
7609
7610 /* This will return true to show const_vector is legal for use as either
7611 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7612 also update INFO to show how the immediate should be generated. */
81c2dfb9 7613 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7614 gcc_assert (is_valid);
7615
81c2dfb9 7616 element_char = sizetochar (info.element_width);
48063b9d
IB
7617 lane_count = width / info.element_width;
7618
3520f7cc
JG
7619 mode = GET_MODE_INNER (mode);
7620 if (mode == SFmode || mode == DFmode)
7621 {
48063b9d
IB
7622 gcc_assert (info.shift == 0 && ! info.mvn);
7623 if (aarch64_float_const_zero_rtx_p (info.value))
7624 info.value = GEN_INT (0);
7625 else
7626 {
7627#define buf_size 20
7628 REAL_VALUE_TYPE r;
7629 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7630 char float_buf[buf_size] = {'\0'};
7631 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7632#undef buf_size
7633
7634 if (lane_count == 1)
7635 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7636 else
7637 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7638 lane_count, element_char, float_buf);
48063b9d
IB
7639 return templ;
7640 }
3520f7cc 7641 }
3520f7cc 7642
48063b9d 7643 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 7644 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
7645
7646 if (lane_count == 1)
48063b9d
IB
7647 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7648 mnemonic, UINTVAL (info.value));
7649 else if (info.shift)
7650 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
7651 ", %s %d", mnemonic, lane_count, element_char,
7652 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 7653 else
48063b9d 7654 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7655 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7656 return templ;
7657}
7658
b7342d25
IB
7659char*
7660aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7661 enum machine_mode mode)
7662{
7663 enum machine_mode vmode;
7664
7665 gcc_assert (!VECTOR_MODE_P (mode));
7666 vmode = aarch64_simd_container_mode (mode, 64);
7667 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7668 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7669}
7670
88b08073
JG
7671/* Split operands into moves from op[1] + op[2] into op[0]. */
7672
7673void
7674aarch64_split_combinev16qi (rtx operands[3])
7675{
7676 unsigned int dest = REGNO (operands[0]);
7677 unsigned int src1 = REGNO (operands[1]);
7678 unsigned int src2 = REGNO (operands[2]);
7679 enum machine_mode halfmode = GET_MODE (operands[1]);
7680 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7681 rtx destlo, desthi;
7682
7683 gcc_assert (halfmode == V16QImode);
7684
7685 if (src1 == dest && src2 == dest + halfregs)
7686 {
7687 /* No-op move. Can't split to nothing; emit something. */
7688 emit_note (NOTE_INSN_DELETED);
7689 return;
7690 }
7691
7692 /* Preserve register attributes for variable tracking. */
7693 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7694 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7695 GET_MODE_SIZE (halfmode));
7696
7697 /* Special case of reversed high/low parts. */
7698 if (reg_overlap_mentioned_p (operands[2], destlo)
7699 && reg_overlap_mentioned_p (operands[1], desthi))
7700 {
7701 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7702 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7703 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7704 }
7705 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7706 {
7707 /* Try to avoid unnecessary moves if part of the result
7708 is in the right place already. */
7709 if (src1 != dest)
7710 emit_move_insn (destlo, operands[1]);
7711 if (src2 != dest + halfregs)
7712 emit_move_insn (desthi, operands[2]);
7713 }
7714 else
7715 {
7716 if (src2 != dest + halfregs)
7717 emit_move_insn (desthi, operands[2]);
7718 if (src1 != dest)
7719 emit_move_insn (destlo, operands[1]);
7720 }
7721}
7722
7723/* vec_perm support. */
7724
7725#define MAX_VECT_LEN 16
7726
7727struct expand_vec_perm_d
7728{
7729 rtx target, op0, op1;
7730 unsigned char perm[MAX_VECT_LEN];
7731 enum machine_mode vmode;
7732 unsigned char nelt;
7733 bool one_vector_p;
7734 bool testing_p;
7735};
7736
7737/* Generate a variable permutation. */
7738
7739static void
7740aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7741{
7742 enum machine_mode vmode = GET_MODE (target);
7743 bool one_vector_p = rtx_equal_p (op0, op1);
7744
7745 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7746 gcc_checking_assert (GET_MODE (op0) == vmode);
7747 gcc_checking_assert (GET_MODE (op1) == vmode);
7748 gcc_checking_assert (GET_MODE (sel) == vmode);
7749 gcc_checking_assert (TARGET_SIMD);
7750
7751 if (one_vector_p)
7752 {
7753 if (vmode == V8QImode)
7754 {
7755 /* Expand the argument to a V16QI mode by duplicating it. */
7756 rtx pair = gen_reg_rtx (V16QImode);
7757 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7758 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7759 }
7760 else
7761 {
7762 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7763 }
7764 }
7765 else
7766 {
7767 rtx pair;
7768
7769 if (vmode == V8QImode)
7770 {
7771 pair = gen_reg_rtx (V16QImode);
7772 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7773 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7774 }
7775 else
7776 {
7777 pair = gen_reg_rtx (OImode);
7778 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7779 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7780 }
7781 }
7782}
7783
7784void
7785aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7786{
7787 enum machine_mode vmode = GET_MODE (target);
7788 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7789 bool one_vector_p = rtx_equal_p (op0, op1);
7790 rtx rmask[MAX_VECT_LEN], mask;
7791
7792 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7793
7794 /* The TBL instruction does not use a modulo index, so we must take care
7795 of that ourselves. */
7796 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7797 for (i = 0; i < nelt; ++i)
7798 rmask[i] = mask;
7799 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7800 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7801
7802 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7803}
7804
cc4d934f
JG
7805/* Recognize patterns suitable for the TRN instructions. */
7806static bool
7807aarch64_evpc_trn (struct expand_vec_perm_d *d)
7808{
7809 unsigned int i, odd, mask, nelt = d->nelt;
7810 rtx out, in0, in1, x;
7811 rtx (*gen) (rtx, rtx, rtx);
7812 enum machine_mode vmode = d->vmode;
7813
7814 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7815 return false;
7816
7817 /* Note that these are little-endian tests.
7818 We correct for big-endian later. */
7819 if (d->perm[0] == 0)
7820 odd = 0;
7821 else if (d->perm[0] == 1)
7822 odd = 1;
7823 else
7824 return false;
7825 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7826
7827 for (i = 0; i < nelt; i += 2)
7828 {
7829 if (d->perm[i] != i + odd)
7830 return false;
7831 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7832 return false;
7833 }
7834
7835 /* Success! */
7836 if (d->testing_p)
7837 return true;
7838
7839 in0 = d->op0;
7840 in1 = d->op1;
7841 if (BYTES_BIG_ENDIAN)
7842 {
7843 x = in0, in0 = in1, in1 = x;
7844 odd = !odd;
7845 }
7846 out = d->target;
7847
7848 if (odd)
7849 {
7850 switch (vmode)
7851 {
7852 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7853 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7854 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7855 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7856 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7857 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7858 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7859 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7860 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7861 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7862 default:
7863 return false;
7864 }
7865 }
7866 else
7867 {
7868 switch (vmode)
7869 {
7870 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7871 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7872 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7873 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7874 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7875 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7876 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7877 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7878 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7879 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7880 default:
7881 return false;
7882 }
7883 }
7884
7885 emit_insn (gen (out, in0, in1));
7886 return true;
7887}
7888
7889/* Recognize patterns suitable for the UZP instructions. */
7890static bool
7891aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7892{
7893 unsigned int i, odd, mask, nelt = d->nelt;
7894 rtx out, in0, in1, x;
7895 rtx (*gen) (rtx, rtx, rtx);
7896 enum machine_mode vmode = d->vmode;
7897
7898 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7899 return false;
7900
7901 /* Note that these are little-endian tests.
7902 We correct for big-endian later. */
7903 if (d->perm[0] == 0)
7904 odd = 0;
7905 else if (d->perm[0] == 1)
7906 odd = 1;
7907 else
7908 return false;
7909 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7910
7911 for (i = 0; i < nelt; i++)
7912 {
7913 unsigned elt = (i * 2 + odd) & mask;
7914 if (d->perm[i] != elt)
7915 return false;
7916 }
7917
7918 /* Success! */
7919 if (d->testing_p)
7920 return true;
7921
7922 in0 = d->op0;
7923 in1 = d->op1;
7924 if (BYTES_BIG_ENDIAN)
7925 {
7926 x = in0, in0 = in1, in1 = x;
7927 odd = !odd;
7928 }
7929 out = d->target;
7930
7931 if (odd)
7932 {
7933 switch (vmode)
7934 {
7935 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7936 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7937 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7938 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7939 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7940 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7941 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7942 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7943 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7944 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7945 default:
7946 return false;
7947 }
7948 }
7949 else
7950 {
7951 switch (vmode)
7952 {
7953 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7954 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7955 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7956 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7957 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7958 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7959 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7960 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7961 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7962 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7963 default:
7964 return false;
7965 }
7966 }
7967
7968 emit_insn (gen (out, in0, in1));
7969 return true;
7970}
7971
7972/* Recognize patterns suitable for the ZIP instructions. */
7973static bool
7974aarch64_evpc_zip (struct expand_vec_perm_d *d)
7975{
7976 unsigned int i, high, mask, nelt = d->nelt;
7977 rtx out, in0, in1, x;
7978 rtx (*gen) (rtx, rtx, rtx);
7979 enum machine_mode vmode = d->vmode;
7980
7981 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7982 return false;
7983
7984 /* Note that these are little-endian tests.
7985 We correct for big-endian later. */
7986 high = nelt / 2;
7987 if (d->perm[0] == high)
7988 /* Do Nothing. */
7989 ;
7990 else if (d->perm[0] == 0)
7991 high = 0;
7992 else
7993 return false;
7994 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7995
7996 for (i = 0; i < nelt / 2; i++)
7997 {
7998 unsigned elt = (i + high) & mask;
7999 if (d->perm[i * 2] != elt)
8000 return false;
8001 elt = (elt + nelt) & mask;
8002 if (d->perm[i * 2 + 1] != elt)
8003 return false;
8004 }
8005
8006 /* Success! */
8007 if (d->testing_p)
8008 return true;
8009
8010 in0 = d->op0;
8011 in1 = d->op1;
8012 if (BYTES_BIG_ENDIAN)
8013 {
8014 x = in0, in0 = in1, in1 = x;
8015 high = !high;
8016 }
8017 out = d->target;
8018
8019 if (high)
8020 {
8021 switch (vmode)
8022 {
8023 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8024 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8025 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8026 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8027 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8028 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8029 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8030 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8031 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8032 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8033 default:
8034 return false;
8035 }
8036 }
8037 else
8038 {
8039 switch (vmode)
8040 {
8041 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8042 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8043 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8044 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8045 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8046 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8047 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8048 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8049 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8050 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8051 default:
8052 return false;
8053 }
8054 }
8055
8056 emit_insn (gen (out, in0, in1));
8057 return true;
8058}
8059
91bd4114
JG
8060static bool
8061aarch64_evpc_dup (struct expand_vec_perm_d *d)
8062{
8063 rtx (*gen) (rtx, rtx, rtx);
8064 rtx out = d->target;
8065 rtx in0;
8066 enum machine_mode vmode = d->vmode;
8067 unsigned int i, elt, nelt = d->nelt;
8068 rtx lane;
8069
8070 /* TODO: This may not be big-endian safe. */
8071 if (BYTES_BIG_ENDIAN)
8072 return false;
8073
8074 elt = d->perm[0];
8075 for (i = 1; i < nelt; i++)
8076 {
8077 if (elt != d->perm[i])
8078 return false;
8079 }
8080
8081 /* The generic preparation in aarch64_expand_vec_perm_const_1
8082 swaps the operand order and the permute indices if it finds
8083 d->perm[0] to be in the second operand. Thus, we can always
8084 use d->op0 and need not do any extra arithmetic to get the
8085 correct lane number. */
8086 in0 = d->op0;
8087 lane = GEN_INT (elt);
8088
8089 switch (vmode)
8090 {
8091 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8092 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8093 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8094 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8095 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8096 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8097 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8098 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8099 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8100 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8101 default:
8102 return false;
8103 }
8104
8105 emit_insn (gen (out, in0, lane));
8106 return true;
8107}
8108
88b08073
JG
8109static bool
8110aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8111{
8112 rtx rperm[MAX_VECT_LEN], sel;
8113 enum machine_mode vmode = d->vmode;
8114 unsigned int i, nelt = d->nelt;
8115
8116 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8117 numbering of elements for big-endian, we must reverse the order. */
8118 if (BYTES_BIG_ENDIAN)
8119 return false;
8120
8121 if (d->testing_p)
8122 return true;
8123
8124 /* Generic code will try constant permutation twice. Once with the
8125 original mode and again with the elements lowered to QImode.
8126 So wait and don't do the selector expansion ourselves. */
8127 if (vmode != V8QImode && vmode != V16QImode)
8128 return false;
8129
8130 for (i = 0; i < nelt; ++i)
8131 rperm[i] = GEN_INT (d->perm[i]);
8132 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8133 sel = force_reg (vmode, sel);
8134
8135 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8136 return true;
8137}
8138
8139static bool
8140aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8141{
8142 /* The pattern matching functions above are written to look for a small
8143 number to begin the sequence (0, 1, N/2). If we begin with an index
8144 from the second operand, we can swap the operands. */
8145 if (d->perm[0] >= d->nelt)
8146 {
8147 unsigned i, nelt = d->nelt;
8148 rtx x;
8149
8150 for (i = 0; i < nelt; ++i)
8151 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8152
8153 x = d->op0;
8154 d->op0 = d->op1;
8155 d->op1 = x;
8156 }
8157
8158 if (TARGET_SIMD)
cc4d934f
JG
8159 {
8160 if (aarch64_evpc_zip (d))
8161 return true;
8162 else if (aarch64_evpc_uzp (d))
8163 return true;
8164 else if (aarch64_evpc_trn (d))
8165 return true;
91bd4114
JG
8166 else if (aarch64_evpc_dup (d))
8167 return true;
cc4d934f
JG
8168 return aarch64_evpc_tbl (d);
8169 }
88b08073
JG
8170 return false;
8171}
8172
8173/* Expand a vec_perm_const pattern. */
8174
8175bool
8176aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8177{
8178 struct expand_vec_perm_d d;
8179 int i, nelt, which;
8180
8181 d.target = target;
8182 d.op0 = op0;
8183 d.op1 = op1;
8184
8185 d.vmode = GET_MODE (target);
8186 gcc_assert (VECTOR_MODE_P (d.vmode));
8187 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8188 d.testing_p = false;
8189
8190 for (i = which = 0; i < nelt; ++i)
8191 {
8192 rtx e = XVECEXP (sel, 0, i);
8193 int ei = INTVAL (e) & (2 * nelt - 1);
8194 which |= (ei < nelt ? 1 : 2);
8195 d.perm[i] = ei;
8196 }
8197
8198 switch (which)
8199 {
8200 default:
8201 gcc_unreachable ();
8202
8203 case 3:
8204 d.one_vector_p = false;
8205 if (!rtx_equal_p (op0, op1))
8206 break;
8207
8208 /* The elements of PERM do not suggest that only the first operand
8209 is used, but both operands are identical. Allow easier matching
8210 of the permutation by folding the permutation into the single
8211 input vector. */
8212 /* Fall Through. */
8213 case 2:
8214 for (i = 0; i < nelt; ++i)
8215 d.perm[i] &= nelt - 1;
8216 d.op0 = op1;
8217 d.one_vector_p = true;
8218 break;
8219
8220 case 1:
8221 d.op1 = op0;
8222 d.one_vector_p = true;
8223 break;
8224 }
8225
8226 return aarch64_expand_vec_perm_const_1 (&d);
8227}
8228
8229static bool
8230aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8231 const unsigned char *sel)
8232{
8233 struct expand_vec_perm_d d;
8234 unsigned int i, nelt, which;
8235 bool ret;
8236
8237 d.vmode = vmode;
8238 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8239 d.testing_p = true;
8240 memcpy (d.perm, sel, nelt);
8241
8242 /* Calculate whether all elements are in one vector. */
8243 for (i = which = 0; i < nelt; ++i)
8244 {
8245 unsigned char e = d.perm[i];
8246 gcc_assert (e < 2 * nelt);
8247 which |= (e < nelt ? 1 : 2);
8248 }
8249
8250 /* If all elements are from the second vector, reindex as if from the
8251 first vector. */
8252 if (which == 2)
8253 for (i = 0; i < nelt; ++i)
8254 d.perm[i] -= nelt;
8255
8256 /* Check whether the mask can be applied to a single vector. */
8257 d.one_vector_p = (which != 3);
8258
8259 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8260 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8261 if (!d.one_vector_p)
8262 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8263
8264 start_sequence ();
8265 ret = aarch64_expand_vec_perm_const_1 (&d);
8266 end_sequence ();
8267
8268 return ret;
8269}
8270
69675d50
TB
8271/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8272bool
8273aarch64_cannot_change_mode_class (enum machine_mode from,
8274 enum machine_mode to,
8275 enum reg_class rclass)
8276{
8277 /* Full-reg subregs are allowed on general regs or any class if they are
8278 the same size. */
8279 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8280 || !reg_classes_intersect_p (FP_REGS, rclass))
8281 return false;
8282
8283 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8284 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8285 2. Scalar to Scalar for integer modes or same size float modes.
8286 3. Vector to Vector modes. */
8287 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8288 {
8289 if (aarch64_vector_mode_supported_p (from)
8290 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8291 return false;
8292
8293 if (GET_MODE_NUNITS (from) == 1
8294 && GET_MODE_NUNITS (to) == 1
8295 && (GET_MODE_CLASS (from) == MODE_INT
8296 || from == to))
8297 return false;
8298
8299 if (aarch64_vector_mode_supported_p (from)
8300 && aarch64_vector_mode_supported_p (to))
8301 return false;
8302 }
8303
8304 return true;
8305}
8306
43e9d192
IB
8307#undef TARGET_ADDRESS_COST
8308#define TARGET_ADDRESS_COST aarch64_address_cost
8309
8310/* This hook will determines whether unnamed bitfields affect the alignment
8311 of the containing structure. The hook returns true if the structure
8312 should inherit the alignment requirements of an unnamed bitfield's
8313 type. */
8314#undef TARGET_ALIGN_ANON_BITFIELD
8315#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8316
8317#undef TARGET_ASM_ALIGNED_DI_OP
8318#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8319
8320#undef TARGET_ASM_ALIGNED_HI_OP
8321#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8322
8323#undef TARGET_ASM_ALIGNED_SI_OP
8324#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8325
8326#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8327#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8328 hook_bool_const_tree_hwi_hwi_const_tree_true
8329
8330#undef TARGET_ASM_FILE_START
8331#define TARGET_ASM_FILE_START aarch64_start_file
8332
8333#undef TARGET_ASM_OUTPUT_MI_THUNK
8334#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8335
8336#undef TARGET_ASM_SELECT_RTX_SECTION
8337#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8338
8339#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8340#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8341
8342#undef TARGET_BUILD_BUILTIN_VA_LIST
8343#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8344
8345#undef TARGET_CALLEE_COPIES
8346#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8347
8348#undef TARGET_CAN_ELIMINATE
8349#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8350
8351#undef TARGET_CANNOT_FORCE_CONST_MEM
8352#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8353
8354#undef TARGET_CONDITIONAL_REGISTER_USAGE
8355#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8356
8357/* Only the least significant bit is used for initialization guard
8358 variables. */
8359#undef TARGET_CXX_GUARD_MASK_BIT
8360#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8361
8362#undef TARGET_C_MODE_FOR_SUFFIX
8363#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8364
8365#ifdef TARGET_BIG_ENDIAN_DEFAULT
8366#undef TARGET_DEFAULT_TARGET_FLAGS
8367#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8368#endif
8369
8370#undef TARGET_CLASS_MAX_NREGS
8371#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8372
119103ca
JG
8373#undef TARGET_BUILTIN_DECL
8374#define TARGET_BUILTIN_DECL aarch64_builtin_decl
8375
43e9d192
IB
8376#undef TARGET_EXPAND_BUILTIN
8377#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8378
8379#undef TARGET_EXPAND_BUILTIN_VA_START
8380#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8381
9697e620
JG
8382#undef TARGET_FOLD_BUILTIN
8383#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8384
43e9d192
IB
8385#undef TARGET_FUNCTION_ARG
8386#define TARGET_FUNCTION_ARG aarch64_function_arg
8387
8388#undef TARGET_FUNCTION_ARG_ADVANCE
8389#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8390
8391#undef TARGET_FUNCTION_ARG_BOUNDARY
8392#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8393
8394#undef TARGET_FUNCTION_OK_FOR_SIBCALL
8395#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8396
8397#undef TARGET_FUNCTION_VALUE
8398#define TARGET_FUNCTION_VALUE aarch64_function_value
8399
8400#undef TARGET_FUNCTION_VALUE_REGNO_P
8401#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8402
8403#undef TARGET_FRAME_POINTER_REQUIRED
8404#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8405
0ac198d3
JG
8406#undef TARGET_GIMPLE_FOLD_BUILTIN
8407#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8408
43e9d192
IB
8409#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8410#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8411
8412#undef TARGET_INIT_BUILTINS
8413#define TARGET_INIT_BUILTINS aarch64_init_builtins
8414
8415#undef TARGET_LEGITIMATE_ADDRESS_P
8416#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8417
8418#undef TARGET_LEGITIMATE_CONSTANT_P
8419#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8420
8421#undef TARGET_LIBGCC_CMP_RETURN_MODE
8422#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8423
38e8f663
YR
8424#undef TARGET_LRA_P
8425#define TARGET_LRA_P aarch64_lra_p
8426
ac2b960f
YZ
8427#undef TARGET_MANGLE_TYPE
8428#define TARGET_MANGLE_TYPE aarch64_mangle_type
8429
43e9d192
IB
8430#undef TARGET_MEMORY_MOVE_COST
8431#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8432
8433#undef TARGET_MUST_PASS_IN_STACK
8434#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8435
8436/* This target hook should return true if accesses to volatile bitfields
8437 should use the narrowest mode possible. It should return false if these
8438 accesses should use the bitfield container type. */
8439#undef TARGET_NARROW_VOLATILE_BITFIELD
8440#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8441
8442#undef TARGET_OPTION_OVERRIDE
8443#define TARGET_OPTION_OVERRIDE aarch64_override_options
8444
8445#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8446#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8447 aarch64_override_options_after_change
8448
8449#undef TARGET_PASS_BY_REFERENCE
8450#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8451
8452#undef TARGET_PREFERRED_RELOAD_CLASS
8453#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8454
8455#undef TARGET_SECONDARY_RELOAD
8456#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8457
8458#undef TARGET_SHIFT_TRUNCATION_MASK
8459#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8460
8461#undef TARGET_SETUP_INCOMING_VARARGS
8462#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8463
8464#undef TARGET_STRUCT_VALUE_RTX
8465#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8466
8467#undef TARGET_REGISTER_MOVE_COST
8468#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8469
8470#undef TARGET_RETURN_IN_MEMORY
8471#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8472
8473#undef TARGET_RETURN_IN_MSB
8474#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8475
8476#undef TARGET_RTX_COSTS
8477#define TARGET_RTX_COSTS aarch64_rtx_costs
8478
d126a4ae
AP
8479#undef TARGET_SCHED_ISSUE_RATE
8480#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8481
43e9d192
IB
8482#undef TARGET_TRAMPOLINE_INIT
8483#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8484
8485#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8486#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8487
8488#undef TARGET_VECTOR_MODE_SUPPORTED_P
8489#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8490
8491#undef TARGET_ARRAY_MODE_SUPPORTED_P
8492#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8493
8990e73a
TB
8494#undef TARGET_VECTORIZE_ADD_STMT_COST
8495#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8496
8497#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8498#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8499 aarch64_builtin_vectorization_cost
8500
43e9d192
IB
8501#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8502#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8503
42fc9a7f
JG
8504#undef TARGET_VECTORIZE_BUILTINS
8505#define TARGET_VECTORIZE_BUILTINS
8506
8507#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8508#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8509 aarch64_builtin_vectorized_function
8510
3b357264
JG
8511#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8512#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8513 aarch64_autovectorize_vector_sizes
8514
43e9d192
IB
8515/* Section anchor support. */
8516
8517#undef TARGET_MIN_ANCHOR_OFFSET
8518#define TARGET_MIN_ANCHOR_OFFSET -256
8519
8520/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8521 byte offset; we can do much more for larger data types, but have no way
8522 to determine the size of the access. We assume accesses are aligned. */
8523#undef TARGET_MAX_ANCHOR_OFFSET
8524#define TARGET_MAX_ANCHOR_OFFSET 4095
8525
db0253a4
TB
8526#undef TARGET_VECTOR_ALIGNMENT
8527#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8528
8529#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8530#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8531 aarch64_simd_vector_alignment_reachable
8532
88b08073
JG
8533/* vec_perm support. */
8534
8535#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8536#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8537 aarch64_vectorize_vec_perm_const_ok
8538
70f09188 8539
706b2314 8540#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8541#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8542
43e9d192
IB
8543struct gcc_target targetm = TARGET_INITIALIZER;
8544
8545#include "gt-aarch64.h"