]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[multiple changes]
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
43e9d192 66
28514dda
YZ
67/* Defined for convenience. */
68#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
69
43e9d192
IB
70/* Classifies an address.
71
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
74
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
77
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
80
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
83
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
86
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
89
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
92
93enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
101};
102
103struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
109};
110
48063b9d
IB
111struct simd_immediate_info
112{
113 rtx value;
114 int shift;
115 int element_width;
48063b9d 116 bool mvn;
e4f0f84d 117 bool msl;
48063b9d
IB
118};
119
43e9d192
IB
120/* The current code model. */
121enum aarch64_code_model aarch64_cmodel;
122
123#ifdef HAVE_AS_TLS
124#undef TARGET_HAVE_TLS
125#define TARGET_HAVE_TLS 1
126#endif
127
38e8f663 128static bool aarch64_lra_p (void);
43e9d192
IB
129static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 136static void aarch64_override_options_after_change (void);
43e9d192
IB
137static bool aarch64_vector_mode_supported_p (enum machine_mode);
138static unsigned bit_count (unsigned HOST_WIDE_INT);
139static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
141
88b08073
JG
142static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
144
43e9d192 145/* The processor for which instructions should be scheduled. */
02fdbd5b 146enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
147
148/* The current tuning set. */
149const struct tune_params *aarch64_tune_params;
150
151/* Mask to specify which instructions we are allowed to generate. */
152unsigned long aarch64_isa_flags = 0;
153
154/* Mask to specify which instruction scheduling options should be used. */
155unsigned long aarch64_tune_flags = 0;
156
157/* Tuning parameters. */
158
159#if HAVE_DESIGNATED_INITIALIZERS
160#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161#else
162#define NAMED_PARAM(NAME, VAL) (VAL)
163#endif
164
165#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166__extension__
167#endif
43e9d192
IB
168
169#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170__extension__
171#endif
172static const struct cpu_addrcost_table generic_addrcost_table =
173{
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
179};
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct cpu_regmove_cost generic_regmove_cost =
185{
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
193};
194
8990e73a
TB
195/* Generic costs for vector insn classes. */
196#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197__extension__
198#endif
199static const struct cpu_vector_cost generic_vector_cost =
200{
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213};
214
43e9d192
IB
215#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216__extension__
217#endif
218static const struct tune_params generic_tunings =
219{
73250c4c 220 &generic_extra_costs,
43e9d192
IB
221 &generic_addrcost_table,
222 &generic_regmove_cost,
8990e73a 223 &generic_vector_cost,
d126a4ae
AP
224 NAMED_PARAM (memmov_cost, 4),
225 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
226};
227
984239ad
KT
228static const struct tune_params cortexa53_tunings =
229{
230 &cortexa53_extra_costs,
231 &generic_addrcost_table,
232 &generic_regmove_cost,
233 &generic_vector_cost,
d126a4ae
AP
234 NAMED_PARAM (memmov_cost, 4),
235 NAMED_PARAM (issue_rate, 2)
984239ad
KT
236};
237
43e9d192
IB
238/* A processor implementing AArch64. */
239struct processor
240{
241 const char *const name;
242 enum aarch64_processor core;
243 const char *arch;
244 const unsigned long flags;
245 const struct tune_params *const tune;
246};
247
248/* Processor cores implementing AArch64. */
249static const struct processor all_cores[] =
250{
192ed1dd 251#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
252 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
253#include "aarch64-cores.def"
254#undef AARCH64_CORE
02fdbd5b 255 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
256 {NULL, aarch64_none, NULL, 0, NULL}
257};
258
259/* Architectures implementing AArch64. */
260static const struct processor all_architectures[] =
261{
262#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
263 {NAME, CORE, #ARCH, FLAGS, NULL},
264#include "aarch64-arches.def"
265#undef AARCH64_ARCH
43e9d192
IB
266 {NULL, aarch64_none, NULL, 0, NULL}
267};
268
269/* Target specification. These are populated as commandline arguments
270 are processed, or NULL if not specified. */
271static const struct processor *selected_arch;
272static const struct processor *selected_cpu;
273static const struct processor *selected_tune;
274
275#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
276
277/* An ISA extension in the co-processor and main instruction set space. */
278struct aarch64_option_extension
279{
280 const char *const name;
281 const unsigned long flags_on;
282 const unsigned long flags_off;
283};
284
285/* ISA extensions in AArch64. */
286static const struct aarch64_option_extension all_extensions[] =
287{
288#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
289 {NAME, FLAGS_ON, FLAGS_OFF},
290#include "aarch64-option-extensions.def"
291#undef AARCH64_OPT_EXTENSION
292 {NULL, 0, 0}
293};
294
295/* Used to track the size of an address when generating a pre/post
296 increment address. */
297static enum machine_mode aarch64_memory_reference_mode;
298
299/* Used to force GTY into this file. */
300static GTY(()) int gty_dummy;
301
302/* A table of valid AArch64 "bitmask immediate" values for
303 logical instructions. */
304
305#define AARCH64_NUM_BITMASKS 5334
306static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
307
308/* Did we set flag_omit_frame_pointer just so
309 aarch64_frame_pointer_required would be called? */
310static bool faked_omit_frame_pointer;
311
312typedef enum aarch64_cond_code
313{
314 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
315 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
316 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
317}
318aarch64_cc;
319
320#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
321
322/* The condition codes of the processor, and the inverse function. */
323static const char * const aarch64_condition_codes[] =
324{
325 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
326 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
327};
328
329/* Provide a mapping from gcc register numbers to dwarf register numbers. */
330unsigned
331aarch64_dbx_register_number (unsigned regno)
332{
333 if (GP_REGNUM_P (regno))
334 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
335 else if (regno == SP_REGNUM)
336 return AARCH64_DWARF_SP;
337 else if (FP_REGNUM_P (regno))
338 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
339
340 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
341 equivalent DWARF register. */
342 return DWARF_FRAME_REGISTERS;
343}
344
345/* Return TRUE if MODE is any of the large INT modes. */
346static bool
347aarch64_vect_struct_mode_p (enum machine_mode mode)
348{
349 return mode == OImode || mode == CImode || mode == XImode;
350}
351
352/* Return TRUE if MODE is any of the vector modes. */
353static bool
354aarch64_vector_mode_p (enum machine_mode mode)
355{
356 return aarch64_vector_mode_supported_p (mode)
357 || aarch64_vect_struct_mode_p (mode);
358}
359
360/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
361static bool
362aarch64_array_mode_supported_p (enum machine_mode mode,
363 unsigned HOST_WIDE_INT nelems)
364{
365 if (TARGET_SIMD
366 && AARCH64_VALID_SIMD_QREG_MODE (mode)
367 && (nelems >= 2 && nelems <= 4))
368 return true;
369
370 return false;
371}
372
373/* Implement HARD_REGNO_NREGS. */
374
375int
376aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
377{
378 switch (aarch64_regno_regclass (regno))
379 {
380 case FP_REGS:
381 case FP_LO_REGS:
382 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
383 default:
384 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
385 }
386 gcc_unreachable ();
387}
388
389/* Implement HARD_REGNO_MODE_OK. */
390
391int
392aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
393{
394 if (GET_MODE_CLASS (mode) == MODE_CC)
395 return regno == CC_REGNUM;
396
9259db42
YZ
397 if (regno == SP_REGNUM)
398 /* The purpose of comparing with ptr_mode is to support the
399 global register variable associated with the stack pointer
400 register via the syntax of asm ("wsp") in ILP32. */
401 return mode == Pmode || mode == ptr_mode;
402
403 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
404 return mode == Pmode;
405
406 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
407 return 1;
408
409 if (FP_REGNUM_P (regno))
410 {
411 if (aarch64_vect_struct_mode_p (mode))
412 return
413 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
414 else
415 return 1;
416 }
417
418 return 0;
419}
420
421/* Return true if calls to DECL should be treated as
422 long-calls (ie called via a register). */
423static bool
424aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
425{
426 return false;
427}
428
429/* Return true if calls to symbol-ref SYM should be treated as
430 long-calls (ie called via a register). */
431bool
432aarch64_is_long_call_p (rtx sym)
433{
434 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
435}
436
437/* Return true if the offsets to a zero/sign-extract operation
438 represent an expression that matches an extend operation. The
439 operands represent the paramters from
440
441 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
442bool
443aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
444 rtx extract_imm)
445{
446 HOST_WIDE_INT mult_val, extract_val;
447
448 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
449 return false;
450
451 mult_val = INTVAL (mult_imm);
452 extract_val = INTVAL (extract_imm);
453
454 if (extract_val > 8
455 && extract_val < GET_MODE_BITSIZE (mode)
456 && exact_log2 (extract_val & ~7) > 0
457 && (extract_val & 7) <= 4
458 && mult_val == (1 << (extract_val & 7)))
459 return true;
460
461 return false;
462}
463
464/* Emit an insn that's a simple single-set. Both the operands must be
465 known to be valid. */
466inline static rtx
467emit_set_insn (rtx x, rtx y)
468{
469 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
470}
471
472/* X and Y are two things to compare using CODE. Emit the compare insn and
473 return the rtx for register 0 in the proper mode. */
474rtx
475aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
476{
477 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
478 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
479
480 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
481 return cc_reg;
482}
483
484/* Build the SYMBOL_REF for __tls_get_addr. */
485
486static GTY(()) rtx tls_get_addr_libfunc;
487
488rtx
489aarch64_tls_get_addr (void)
490{
491 if (!tls_get_addr_libfunc)
492 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
493 return tls_get_addr_libfunc;
494}
495
496/* Return the TLS model to use for ADDR. */
497
498static enum tls_model
499tls_symbolic_operand_type (rtx addr)
500{
501 enum tls_model tls_kind = TLS_MODEL_NONE;
502 rtx sym, addend;
503
504 if (GET_CODE (addr) == CONST)
505 {
506 split_const (addr, &sym, &addend);
507 if (GET_CODE (sym) == SYMBOL_REF)
508 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
509 }
510 else if (GET_CODE (addr) == SYMBOL_REF)
511 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
512
513 return tls_kind;
514}
515
516/* We'll allow lo_sum's in addresses in our legitimate addresses
517 so that combine would take care of combining addresses where
518 necessary, but for generation purposes, we'll generate the address
519 as :
520 RTL Absolute
521 tmp = hi (symbol_ref); adrp x1, foo
522 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
523 nop
524
525 PIC TLS
526 adrp x1, :got:foo adrp tmp, :tlsgd:foo
527 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
528 bl __tls_get_addr
529 nop
530
531 Load TLS symbol, depending on TLS mechanism and TLS access model.
532
533 Global Dynamic - Traditional TLS:
534 adrp tmp, :tlsgd:imm
535 add dest, tmp, #:tlsgd_lo12:imm
536 bl __tls_get_addr
537
538 Global Dynamic - TLS Descriptors:
539 adrp dest, :tlsdesc:imm
540 ldr tmp, [dest, #:tlsdesc_lo12:imm]
541 add dest, dest, #:tlsdesc_lo12:imm
542 blr tmp
543 mrs tp, tpidr_el0
544 add dest, dest, tp
545
546 Initial Exec:
547 mrs tp, tpidr_el0
548 adrp tmp, :gottprel:imm
549 ldr dest, [tmp, #:gottprel_lo12:imm]
550 add dest, dest, tp
551
552 Local Exec:
553 mrs tp, tpidr_el0
554 add t0, tp, #:tprel_hi12:imm
555 add t0, #:tprel_lo12_nc:imm
556*/
557
558static void
559aarch64_load_symref_appropriately (rtx dest, rtx imm,
560 enum aarch64_symbol_type type)
561{
562 switch (type)
563 {
564 case SYMBOL_SMALL_ABSOLUTE:
565 {
28514dda 566 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 567 rtx tmp_reg = dest;
28514dda
YZ
568 enum machine_mode mode = GET_MODE (dest);
569
570 gcc_assert (mode == Pmode || mode == ptr_mode);
571
43e9d192 572 if (can_create_pseudo_p ())
28514dda 573 tmp_reg = gen_reg_rtx (mode);
43e9d192 574
28514dda 575 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
576 emit_insn (gen_add_losym (dest, tmp_reg, imm));
577 return;
578 }
579
a5350ddc
CSS
580 case SYMBOL_TINY_ABSOLUTE:
581 emit_insn (gen_rtx_SET (Pmode, dest, imm));
582 return;
583
43e9d192
IB
584 case SYMBOL_SMALL_GOT:
585 {
28514dda
YZ
586 /* In ILP32, the mode of dest can be either SImode or DImode,
587 while the got entry is always of SImode size. The mode of
588 dest depends on how dest is used: if dest is assigned to a
589 pointer (e.g. in the memory), it has SImode; it may have
590 DImode if dest is dereferenced to access the memeory.
591 This is why we have to handle three different ldr_got_small
592 patterns here (two patterns for ILP32). */
43e9d192 593 rtx tmp_reg = dest;
28514dda
YZ
594 enum machine_mode mode = GET_MODE (dest);
595
43e9d192 596 if (can_create_pseudo_p ())
28514dda
YZ
597 tmp_reg = gen_reg_rtx (mode);
598
599 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
600 if (mode == ptr_mode)
601 {
602 if (mode == DImode)
603 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
604 else
605 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
606 }
607 else
608 {
609 gcc_assert (mode == Pmode);
610 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
611 }
612
43e9d192
IB
613 return;
614 }
615
616 case SYMBOL_SMALL_TLSGD:
617 {
618 rtx insns;
619 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
620
621 start_sequence ();
622 emit_call_insn (gen_tlsgd_small (result, imm));
623 insns = get_insns ();
624 end_sequence ();
625
626 RTL_CONST_CALL_P (insns) = 1;
627 emit_libcall_block (insns, dest, result, imm);
628 return;
629 }
630
631 case SYMBOL_SMALL_TLSDESC:
632 {
633 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
634 rtx tp;
635
636 emit_insn (gen_tlsdesc_small (imm));
637 tp = aarch64_load_tp (NULL);
638 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
639 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
640 return;
641 }
642
643 case SYMBOL_SMALL_GOTTPREL:
644 {
645 rtx tmp_reg = gen_reg_rtx (Pmode);
646 rtx tp = aarch64_load_tp (NULL);
647 emit_insn (gen_tlsie_small (tmp_reg, imm));
648 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
649 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
650 return;
651 }
652
653 case SYMBOL_SMALL_TPREL:
654 {
655 rtx tp = aarch64_load_tp (NULL);
656 emit_insn (gen_tlsle_small (dest, tp, imm));
657 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
658 return;
659 }
660
87dd8ab0
MS
661 case SYMBOL_TINY_GOT:
662 emit_insn (gen_ldr_got_tiny (dest, imm));
663 return;
664
43e9d192
IB
665 default:
666 gcc_unreachable ();
667 }
668}
669
670/* Emit a move from SRC to DEST. Assume that the move expanders can
671 handle all moves if !can_create_pseudo_p (). The distinction is
672 important because, unlike emit_move_insn, the move expanders know
673 how to force Pmode objects into the constant pool even when the
674 constant pool address is not itself legitimate. */
675static rtx
676aarch64_emit_move (rtx dest, rtx src)
677{
678 return (can_create_pseudo_p ()
679 ? emit_move_insn (dest, src)
680 : emit_move_insn_1 (dest, src));
681}
682
030d03b8
RE
683/* Split a 128-bit move operation into two 64-bit move operations,
684 taking care to handle partial overlap of register to register
685 copies. Special cases are needed when moving between GP regs and
686 FP regs. SRC can be a register, constant or memory; DST a register
687 or memory. If either operand is memory it must not have any side
688 effects. */
43e9d192
IB
689void
690aarch64_split_128bit_move (rtx dst, rtx src)
691{
030d03b8
RE
692 rtx dst_lo, dst_hi;
693 rtx src_lo, src_hi;
43e9d192 694
030d03b8 695 enum machine_mode mode = GET_MODE (dst);
12dc6974 696
030d03b8
RE
697 gcc_assert (mode == TImode || mode == TFmode);
698 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
699 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
700
701 if (REG_P (dst) && REG_P (src))
702 {
030d03b8
RE
703 int src_regno = REGNO (src);
704 int dst_regno = REGNO (dst);
43e9d192 705
030d03b8 706 /* Handle FP <-> GP regs. */
43e9d192
IB
707 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
708 {
030d03b8
RE
709 src_lo = gen_lowpart (word_mode, src);
710 src_hi = gen_highpart (word_mode, src);
711
712 if (mode == TImode)
713 {
714 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
715 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
716 }
717 else
718 {
719 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
720 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
721 }
722 return;
43e9d192
IB
723 }
724 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
725 {
030d03b8
RE
726 dst_lo = gen_lowpart (word_mode, dst);
727 dst_hi = gen_highpart (word_mode, dst);
728
729 if (mode == TImode)
730 {
731 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
732 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
733 }
734 else
735 {
736 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
737 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
738 }
739 return;
43e9d192 740 }
43e9d192
IB
741 }
742
030d03b8
RE
743 dst_lo = gen_lowpart (word_mode, dst);
744 dst_hi = gen_highpart (word_mode, dst);
745 src_lo = gen_lowpart (word_mode, src);
746 src_hi = gen_highpart_mode (word_mode, mode, src);
747
748 /* At most one pairing may overlap. */
749 if (reg_overlap_mentioned_p (dst_lo, src_hi))
750 {
751 aarch64_emit_move (dst_hi, src_hi);
752 aarch64_emit_move (dst_lo, src_lo);
753 }
754 else
755 {
756 aarch64_emit_move (dst_lo, src_lo);
757 aarch64_emit_move (dst_hi, src_hi);
758 }
43e9d192
IB
759}
760
761bool
762aarch64_split_128bit_move_p (rtx dst, rtx src)
763{
764 return (! REG_P (src)
765 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
766}
767
8b033a8a
SN
768/* Split a complex SIMD combine. */
769
770void
771aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
772{
773 enum machine_mode src_mode = GET_MODE (src1);
774 enum machine_mode dst_mode = GET_MODE (dst);
775
776 gcc_assert (VECTOR_MODE_P (dst_mode));
777
778 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
779 {
780 rtx (*gen) (rtx, rtx, rtx);
781
782 switch (src_mode)
783 {
784 case V8QImode:
785 gen = gen_aarch64_simd_combinev8qi;
786 break;
787 case V4HImode:
788 gen = gen_aarch64_simd_combinev4hi;
789 break;
790 case V2SImode:
791 gen = gen_aarch64_simd_combinev2si;
792 break;
793 case V2SFmode:
794 gen = gen_aarch64_simd_combinev2sf;
795 break;
796 case DImode:
797 gen = gen_aarch64_simd_combinedi;
798 break;
799 case DFmode:
800 gen = gen_aarch64_simd_combinedf;
801 break;
802 default:
803 gcc_unreachable ();
804 }
805
806 emit_insn (gen (dst, src1, src2));
807 return;
808 }
809}
810
fd4842cd
SN
811/* Split a complex SIMD move. */
812
813void
814aarch64_split_simd_move (rtx dst, rtx src)
815{
816 enum machine_mode src_mode = GET_MODE (src);
817 enum machine_mode dst_mode = GET_MODE (dst);
818
819 gcc_assert (VECTOR_MODE_P (dst_mode));
820
821 if (REG_P (dst) && REG_P (src))
822 {
c59b7e28
SN
823 rtx (*gen) (rtx, rtx);
824
fd4842cd
SN
825 gcc_assert (VECTOR_MODE_P (src_mode));
826
827 switch (src_mode)
828 {
829 case V16QImode:
c59b7e28 830 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
831 break;
832 case V8HImode:
c59b7e28 833 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
834 break;
835 case V4SImode:
c59b7e28 836 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
837 break;
838 case V2DImode:
c59b7e28 839 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
840 break;
841 case V4SFmode:
c59b7e28 842 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
843 break;
844 case V2DFmode:
c59b7e28 845 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
846 break;
847 default:
848 gcc_unreachable ();
849 }
c59b7e28
SN
850
851 emit_insn (gen (dst, src));
fd4842cd
SN
852 return;
853 }
854}
855
43e9d192 856static rtx
e18b4a81 857aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
858{
859 if (can_create_pseudo_p ())
e18b4a81 860 return force_reg (mode, value);
43e9d192
IB
861 else
862 {
863 x = aarch64_emit_move (x, value);
864 return x;
865 }
866}
867
868
869static rtx
870aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
871{
9c023bf0 872 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
873 {
874 rtx high;
875 /* Load the full offset into a register. This
876 might be improvable in the future. */
877 high = GEN_INT (offset);
878 offset = 0;
e18b4a81
YZ
879 high = aarch64_force_temporary (mode, temp, high);
880 reg = aarch64_force_temporary (mode, temp,
881 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
882 }
883 return plus_constant (mode, reg, offset);
884}
885
886void
887aarch64_expand_mov_immediate (rtx dest, rtx imm)
888{
889 enum machine_mode mode = GET_MODE (dest);
890 unsigned HOST_WIDE_INT mask;
891 int i;
892 bool first;
893 unsigned HOST_WIDE_INT val;
894 bool subtargets;
895 rtx subtarget;
896 int one_match, zero_match;
897
898 gcc_assert (mode == SImode || mode == DImode);
899
900 /* Check on what type of symbol it is. */
901 if (GET_CODE (imm) == SYMBOL_REF
902 || GET_CODE (imm) == LABEL_REF
903 || GET_CODE (imm) == CONST)
904 {
905 rtx mem, base, offset;
906 enum aarch64_symbol_type sty;
907
908 /* If we have (const (plus symbol offset)), separate out the offset
909 before we start classifying the symbol. */
910 split_const (imm, &base, &offset);
911
912 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
913 switch (sty)
914 {
915 case SYMBOL_FORCE_TO_MEM:
916 if (offset != const0_rtx
917 && targetm.cannot_force_const_mem (mode, imm))
918 {
919 gcc_assert(can_create_pseudo_p ());
e18b4a81 920 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
921 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
922 aarch64_emit_move (dest, base);
923 return;
924 }
28514dda 925 mem = force_const_mem (ptr_mode, imm);
43e9d192 926 gcc_assert (mem);
28514dda
YZ
927 if (mode != ptr_mode)
928 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
929 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
930 return;
931
932 case SYMBOL_SMALL_TLSGD:
933 case SYMBOL_SMALL_TLSDESC:
934 case SYMBOL_SMALL_GOTTPREL:
935 case SYMBOL_SMALL_GOT:
87dd8ab0 936 case SYMBOL_TINY_GOT:
43e9d192
IB
937 if (offset != const0_rtx)
938 {
939 gcc_assert(can_create_pseudo_p ());
e18b4a81 940 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
941 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
942 aarch64_emit_move (dest, base);
943 return;
944 }
945 /* FALLTHRU */
946
947 case SYMBOL_SMALL_TPREL:
948 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 949 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
950 aarch64_load_symref_appropriately (dest, imm, sty);
951 return;
952
953 default:
954 gcc_unreachable ();
955 }
956 }
957
958 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
959 {
960 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
961 return;
962 }
963
964 if (!CONST_INT_P (imm))
965 {
966 if (GET_CODE (imm) == HIGH)
967 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
968 else
969 {
970 rtx mem = force_const_mem (mode, imm);
971 gcc_assert (mem);
972 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
973 }
974
975 return;
976 }
977
978 if (mode == SImode)
979 {
980 /* We know we can't do this in 1 insn, and we must be able to do it
981 in two; so don't mess around looking for sequences that don't buy
982 us anything. */
983 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
984 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
985 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
986 return;
987 }
988
989 /* Remaining cases are all for DImode. */
990
991 val = INTVAL (imm);
992 subtargets = optimize && can_create_pseudo_p ();
993
994 one_match = 0;
995 zero_match = 0;
996 mask = 0xffff;
997
998 for (i = 0; i < 64; i += 16, mask <<= 16)
999 {
1000 if ((val & mask) == 0)
1001 zero_match++;
1002 else if ((val & mask) == mask)
1003 one_match++;
1004 }
1005
1006 if (one_match == 2)
1007 {
1008 mask = 0xffff;
1009 for (i = 0; i < 64; i += 16, mask <<= 16)
1010 {
1011 if ((val & mask) != mask)
1012 {
1013 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1014 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1015 GEN_INT ((val >> i) & 0xffff)));
1016 return;
1017 }
1018 }
1019 gcc_unreachable ();
1020 }
1021
1022 if (zero_match == 2)
1023 goto simple_sequence;
1024
1025 mask = 0x0ffff0000UL;
1026 for (i = 16; i < 64; i += 16, mask <<= 16)
1027 {
1028 HOST_WIDE_INT comp = mask & ~(mask - 1);
1029
1030 if (aarch64_uimm12_shift (val - (val & mask)))
1031 {
1032 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1033
1034 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1035 emit_insn (gen_adddi3 (dest, subtarget,
1036 GEN_INT (val - (val & mask))));
1037 return;
1038 }
1039 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1040 {
1041 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1042
1043 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1044 GEN_INT ((val + comp) & mask)));
1045 emit_insn (gen_adddi3 (dest, subtarget,
1046 GEN_INT (val - ((val + comp) & mask))));
1047 return;
1048 }
1049 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1050 {
1051 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1052
1053 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1054 GEN_INT ((val - comp) | ~mask)));
1055 emit_insn (gen_adddi3 (dest, subtarget,
1056 GEN_INT (val - ((val - comp) | ~mask))));
1057 return;
1058 }
1059 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1060 {
1061 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1062
1063 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1064 GEN_INT (val | ~mask)));
1065 emit_insn (gen_adddi3 (dest, subtarget,
1066 GEN_INT (val - (val | ~mask))));
1067 return;
1068 }
1069 }
1070
1071 /* See if we can do it by arithmetically combining two
1072 immediates. */
1073 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1074 {
1075 int j;
1076 mask = 0xffff;
1077
1078 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1079 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1080 {
1081 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1082 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1083 GEN_INT (aarch64_bitmasks[i])));
1084 emit_insn (gen_adddi3 (dest, subtarget,
1085 GEN_INT (val - aarch64_bitmasks[i])));
1086 return;
1087 }
1088
1089 for (j = 0; j < 64; j += 16, mask <<= 16)
1090 {
1091 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1092 {
1093 emit_insn (gen_rtx_SET (VOIDmode, dest,
1094 GEN_INT (aarch64_bitmasks[i])));
1095 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1096 GEN_INT ((val >> j) & 0xffff)));
1097 return;
1098 }
1099 }
1100 }
1101
1102 /* See if we can do it by logically combining two immediates. */
1103 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1104 {
1105 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1106 {
1107 int j;
1108
1109 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1110 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1111 {
1112 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1113 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1114 GEN_INT (aarch64_bitmasks[i])));
1115 emit_insn (gen_iordi3 (dest, subtarget,
1116 GEN_INT (aarch64_bitmasks[j])));
1117 return;
1118 }
1119 }
1120 else if ((val & aarch64_bitmasks[i]) == val)
1121 {
1122 int j;
1123
1124 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1125 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1126 {
1127
1128 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1129 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1130 GEN_INT (aarch64_bitmasks[j])));
1131 emit_insn (gen_anddi3 (dest, subtarget,
1132 GEN_INT (aarch64_bitmasks[i])));
1133 return;
1134 }
1135 }
1136 }
1137
1138 simple_sequence:
1139 first = true;
1140 mask = 0xffff;
1141 for (i = 0; i < 64; i += 16, mask <<= 16)
1142 {
1143 if ((val & mask) != 0)
1144 {
1145 if (first)
1146 {
1147 emit_insn (gen_rtx_SET (VOIDmode, dest,
1148 GEN_INT (val & mask)));
1149 first = false;
1150 }
1151 else
1152 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1153 GEN_INT ((val >> i) & 0xffff)));
1154 }
1155 }
1156}
1157
1158static bool
1159aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1160{
1161 /* Indirect calls are not currently supported. */
1162 if (decl == NULL)
1163 return false;
1164
1165 /* Cannot tail-call to long-calls, since these are outside of the
1166 range of a branch instruction (we could handle this if we added
1167 support for indirect tail-calls. */
1168 if (aarch64_decl_is_long_call_p (decl))
1169 return false;
1170
1171 return true;
1172}
1173
1174/* Implement TARGET_PASS_BY_REFERENCE. */
1175
1176static bool
1177aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1178 enum machine_mode mode,
1179 const_tree type,
1180 bool named ATTRIBUTE_UNUSED)
1181{
1182 HOST_WIDE_INT size;
1183 enum machine_mode dummymode;
1184 int nregs;
1185
1186 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1187 size = (mode == BLKmode && type)
1188 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1189
1190 if (type)
1191 {
1192 /* Arrays always passed by reference. */
1193 if (TREE_CODE (type) == ARRAY_TYPE)
1194 return true;
1195 /* Other aggregates based on their size. */
1196 if (AGGREGATE_TYPE_P (type))
1197 size = int_size_in_bytes (type);
1198 }
1199
1200 /* Variable sized arguments are always returned by reference. */
1201 if (size < 0)
1202 return true;
1203
1204 /* Can this be a candidate to be passed in fp/simd register(s)? */
1205 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1206 &dummymode, &nregs,
1207 NULL))
1208 return false;
1209
1210 /* Arguments which are variable sized or larger than 2 registers are
1211 passed by reference unless they are a homogenous floating point
1212 aggregate. */
1213 return size > 2 * UNITS_PER_WORD;
1214}
1215
1216/* Return TRUE if VALTYPE is padded to its least significant bits. */
1217static bool
1218aarch64_return_in_msb (const_tree valtype)
1219{
1220 enum machine_mode dummy_mode;
1221 int dummy_int;
1222
1223 /* Never happens in little-endian mode. */
1224 if (!BYTES_BIG_ENDIAN)
1225 return false;
1226
1227 /* Only composite types smaller than or equal to 16 bytes can
1228 be potentially returned in registers. */
1229 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1230 || int_size_in_bytes (valtype) <= 0
1231 || int_size_in_bytes (valtype) > 16)
1232 return false;
1233
1234 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1235 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1236 is always passed/returned in the least significant bits of fp/simd
1237 register(s). */
1238 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1239 &dummy_mode, &dummy_int, NULL))
1240 return false;
1241
1242 return true;
1243}
1244
1245/* Implement TARGET_FUNCTION_VALUE.
1246 Define how to find the value returned by a function. */
1247
1248static rtx
1249aarch64_function_value (const_tree type, const_tree func,
1250 bool outgoing ATTRIBUTE_UNUSED)
1251{
1252 enum machine_mode mode;
1253 int unsignedp;
1254 int count;
1255 enum machine_mode ag_mode;
1256
1257 mode = TYPE_MODE (type);
1258 if (INTEGRAL_TYPE_P (type))
1259 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1260
1261 if (aarch64_return_in_msb (type))
1262 {
1263 HOST_WIDE_INT size = int_size_in_bytes (type);
1264
1265 if (size % UNITS_PER_WORD != 0)
1266 {
1267 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1268 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1269 }
1270 }
1271
1272 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1273 &ag_mode, &count, NULL))
1274 {
1275 if (!aarch64_composite_type_p (type, mode))
1276 {
1277 gcc_assert (count == 1 && mode == ag_mode);
1278 return gen_rtx_REG (mode, V0_REGNUM);
1279 }
1280 else
1281 {
1282 int i;
1283 rtx par;
1284
1285 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1286 for (i = 0; i < count; i++)
1287 {
1288 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1289 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1290 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1291 XVECEXP (par, 0, i) = tmp;
1292 }
1293 return par;
1294 }
1295 }
1296 else
1297 return gen_rtx_REG (mode, R0_REGNUM);
1298}
1299
1300/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1301 Return true if REGNO is the number of a hard register in which the values
1302 of called function may come back. */
1303
1304static bool
1305aarch64_function_value_regno_p (const unsigned int regno)
1306{
1307 /* Maximum of 16 bytes can be returned in the general registers. Examples
1308 of 16-byte return values are: 128-bit integers and 16-byte small
1309 structures (excluding homogeneous floating-point aggregates). */
1310 if (regno == R0_REGNUM || regno == R1_REGNUM)
1311 return true;
1312
1313 /* Up to four fp/simd registers can return a function value, e.g. a
1314 homogeneous floating-point aggregate having four members. */
1315 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1316 return !TARGET_GENERAL_REGS_ONLY;
1317
1318 return false;
1319}
1320
1321/* Implement TARGET_RETURN_IN_MEMORY.
1322
1323 If the type T of the result of a function is such that
1324 void func (T arg)
1325 would require that arg be passed as a value in a register (or set of
1326 registers) according to the parameter passing rules, then the result
1327 is returned in the same registers as would be used for such an
1328 argument. */
1329
1330static bool
1331aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1332{
1333 HOST_WIDE_INT size;
1334 enum machine_mode ag_mode;
1335 int count;
1336
1337 if (!AGGREGATE_TYPE_P (type)
1338 && TREE_CODE (type) != COMPLEX_TYPE
1339 && TREE_CODE (type) != VECTOR_TYPE)
1340 /* Simple scalar types always returned in registers. */
1341 return false;
1342
1343 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1344 type,
1345 &ag_mode,
1346 &count,
1347 NULL))
1348 return false;
1349
1350 /* Types larger than 2 registers returned in memory. */
1351 size = int_size_in_bytes (type);
1352 return (size < 0 || size > 2 * UNITS_PER_WORD);
1353}
1354
1355static bool
1356aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1357 const_tree type, int *nregs)
1358{
1359 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1360 return aarch64_vfp_is_call_or_return_candidate (mode,
1361 type,
1362 &pcum->aapcs_vfp_rmode,
1363 nregs,
1364 NULL);
1365}
1366
1367/* Given MODE and TYPE of a function argument, return the alignment in
1368 bits. The idea is to suppress any stronger alignment requested by
1369 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1370 This is a helper function for local use only. */
1371
1372static unsigned int
1373aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1374{
1375 unsigned int alignment;
1376
1377 if (type)
1378 {
1379 if (!integer_zerop (TYPE_SIZE (type)))
1380 {
1381 if (TYPE_MODE (type) == mode)
1382 alignment = TYPE_ALIGN (type);
1383 else
1384 alignment = GET_MODE_ALIGNMENT (mode);
1385 }
1386 else
1387 alignment = 0;
1388 }
1389 else
1390 alignment = GET_MODE_ALIGNMENT (mode);
1391
1392 return alignment;
1393}
1394
1395/* Layout a function argument according to the AAPCS64 rules. The rule
1396 numbers refer to the rule numbers in the AAPCS64. */
1397
1398static void
1399aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1400 const_tree type,
1401 bool named ATTRIBUTE_UNUSED)
1402{
1403 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1404 int ncrn, nvrn, nregs;
1405 bool allocate_ncrn, allocate_nvrn;
1406
1407 /* We need to do this once per argument. */
1408 if (pcum->aapcs_arg_processed)
1409 return;
1410
1411 pcum->aapcs_arg_processed = true;
1412
1413 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1414 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1415 mode,
1416 type,
1417 &nregs);
1418
1419 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1420 The following code thus handles passing by SIMD/FP registers first. */
1421
1422 nvrn = pcum->aapcs_nvrn;
1423
1424 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1425 and homogenous short-vector aggregates (HVA). */
1426 if (allocate_nvrn)
1427 {
1428 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1429 {
1430 pcum->aapcs_nextnvrn = nvrn + nregs;
1431 if (!aarch64_composite_type_p (type, mode))
1432 {
1433 gcc_assert (nregs == 1);
1434 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1435 }
1436 else
1437 {
1438 rtx par;
1439 int i;
1440 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1441 for (i = 0; i < nregs; i++)
1442 {
1443 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1444 V0_REGNUM + nvrn + i);
1445 tmp = gen_rtx_EXPR_LIST
1446 (VOIDmode, tmp,
1447 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1448 XVECEXP (par, 0, i) = tmp;
1449 }
1450 pcum->aapcs_reg = par;
1451 }
1452 return;
1453 }
1454 else
1455 {
1456 /* C.3 NSRN is set to 8. */
1457 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1458 goto on_stack;
1459 }
1460 }
1461
1462 ncrn = pcum->aapcs_ncrn;
1463 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1464 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1465
1466
1467 /* C6 - C9. though the sign and zero extension semantics are
1468 handled elsewhere. This is the case where the argument fits
1469 entirely general registers. */
1470 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1471 {
1472 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1473
1474 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1475
1476 /* C.8 if the argument has an alignment of 16 then the NGRN is
1477 rounded up to the next even number. */
1478 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1479 {
1480 ++ncrn;
1481 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1482 }
1483 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1484 A reg is still generated for it, but the caller should be smart
1485 enough not to use it. */
1486 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1487 {
1488 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1489 }
1490 else
1491 {
1492 rtx par;
1493 int i;
1494
1495 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1496 for (i = 0; i < nregs; i++)
1497 {
1498 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1499 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1500 GEN_INT (i * UNITS_PER_WORD));
1501 XVECEXP (par, 0, i) = tmp;
1502 }
1503 pcum->aapcs_reg = par;
1504 }
1505
1506 pcum->aapcs_nextncrn = ncrn + nregs;
1507 return;
1508 }
1509
1510 /* C.11 */
1511 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1512
1513 /* The argument is passed on stack; record the needed number of words for
1514 this argument (we can re-use NREGS) and align the total size if
1515 necessary. */
1516on_stack:
1517 pcum->aapcs_stack_words = nregs;
1518 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1519 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1520 16 / UNITS_PER_WORD) + 1;
1521 return;
1522}
1523
1524/* Implement TARGET_FUNCTION_ARG. */
1525
1526static rtx
1527aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1528 const_tree type, bool named)
1529{
1530 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1531 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1532
1533 if (mode == VOIDmode)
1534 return NULL_RTX;
1535
1536 aarch64_layout_arg (pcum_v, mode, type, named);
1537 return pcum->aapcs_reg;
1538}
1539
1540void
1541aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1542 const_tree fntype ATTRIBUTE_UNUSED,
1543 rtx libname ATTRIBUTE_UNUSED,
1544 const_tree fndecl ATTRIBUTE_UNUSED,
1545 unsigned n_named ATTRIBUTE_UNUSED)
1546{
1547 pcum->aapcs_ncrn = 0;
1548 pcum->aapcs_nvrn = 0;
1549 pcum->aapcs_nextncrn = 0;
1550 pcum->aapcs_nextnvrn = 0;
1551 pcum->pcs_variant = ARM_PCS_AAPCS64;
1552 pcum->aapcs_reg = NULL_RTX;
1553 pcum->aapcs_arg_processed = false;
1554 pcum->aapcs_stack_words = 0;
1555 pcum->aapcs_stack_size = 0;
1556
1557 return;
1558}
1559
1560static void
1561aarch64_function_arg_advance (cumulative_args_t pcum_v,
1562 enum machine_mode mode,
1563 const_tree type,
1564 bool named)
1565{
1566 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1567 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1568 {
1569 aarch64_layout_arg (pcum_v, mode, type, named);
1570 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1571 != (pcum->aapcs_stack_words != 0));
1572 pcum->aapcs_arg_processed = false;
1573 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1574 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1575 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1576 pcum->aapcs_stack_words = 0;
1577 pcum->aapcs_reg = NULL_RTX;
1578 }
1579}
1580
1581bool
1582aarch64_function_arg_regno_p (unsigned regno)
1583{
1584 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1585 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1586}
1587
1588/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1589 PARM_BOUNDARY bits of alignment, but will be given anything up
1590 to STACK_BOUNDARY bits if the type requires it. This makes sure
1591 that both before and after the layout of each argument, the Next
1592 Stacked Argument Address (NSAA) will have a minimum alignment of
1593 8 bytes. */
1594
1595static unsigned int
1596aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1597{
1598 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1599
1600 if (alignment < PARM_BOUNDARY)
1601 alignment = PARM_BOUNDARY;
1602 if (alignment > STACK_BOUNDARY)
1603 alignment = STACK_BOUNDARY;
1604 return alignment;
1605}
1606
1607/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1608
1609 Return true if an argument passed on the stack should be padded upwards,
1610 i.e. if the least-significant byte of the stack slot has useful data.
1611
1612 Small aggregate types are placed in the lowest memory address.
1613
1614 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1615
1616bool
1617aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1618{
1619 /* On little-endian targets, the least significant byte of every stack
1620 argument is passed at the lowest byte address of the stack slot. */
1621 if (!BYTES_BIG_ENDIAN)
1622 return true;
1623
00edcfbe 1624 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1625 the least significant byte of a stack argument is passed at the highest
1626 byte address of the stack slot. */
1627 if (type
00edcfbe
YZ
1628 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1629 || POINTER_TYPE_P (type))
43e9d192
IB
1630 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1631 return false;
1632
1633 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1634 return true;
1635}
1636
1637/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1638
1639 It specifies padding for the last (may also be the only)
1640 element of a block move between registers and memory. If
1641 assuming the block is in the memory, padding upward means that
1642 the last element is padded after its highest significant byte,
1643 while in downward padding, the last element is padded at the
1644 its least significant byte side.
1645
1646 Small aggregates and small complex types are always padded
1647 upwards.
1648
1649 We don't need to worry about homogeneous floating-point or
1650 short-vector aggregates; their move is not affected by the
1651 padding direction determined here. Regardless of endianness,
1652 each element of such an aggregate is put in the least
1653 significant bits of a fp/simd register.
1654
1655 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1656 register has useful data, and return the opposite if the most
1657 significant byte does. */
1658
1659bool
1660aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1661 bool first ATTRIBUTE_UNUSED)
1662{
1663
1664 /* Small composite types are always padded upward. */
1665 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1666 {
1667 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1668 : GET_MODE_SIZE (mode));
1669 if (size < 2 * UNITS_PER_WORD)
1670 return true;
1671 }
1672
1673 /* Otherwise, use the default padding. */
1674 return !BYTES_BIG_ENDIAN;
1675}
1676
1677static enum machine_mode
1678aarch64_libgcc_cmp_return_mode (void)
1679{
1680 return SImode;
1681}
1682
1683static bool
1684aarch64_frame_pointer_required (void)
1685{
1686 /* If the function contains dynamic stack allocations, we need to
1687 use the frame pointer to access the static parts of the frame. */
1688 if (cfun->calls_alloca)
1689 return true;
1690
1691 /* We may have turned flag_omit_frame_pointer on in order to have this
1692 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1693 and we'll check it here.
1694 If we really did set flag_omit_frame_pointer normally, then we return false
1695 (no frame pointer required) in all cases. */
1696
1697 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1698 return false;
1699 else if (flag_omit_leaf_frame_pointer)
77436791 1700 return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
43e9d192
IB
1701 return true;
1702}
1703
1704/* Mark the registers that need to be saved by the callee and calculate
1705 the size of the callee-saved registers area and frame record (both FP
1706 and LR may be omitted). */
1707static void
1708aarch64_layout_frame (void)
1709{
1710 HOST_WIDE_INT offset = 0;
1711 int regno;
1712
1713 if (reload_completed && cfun->machine->frame.laid_out)
1714 return;
1715
1716 cfun->machine->frame.fp_lr_offset = 0;
1717
1718 /* First mark all the registers that really need to be saved... */
1719 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1720 cfun->machine->frame.reg_offset[regno] = -1;
1721
1722 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1723 cfun->machine->frame.reg_offset[regno] = -1;
1724
1725 /* ... that includes the eh data registers (if needed)... */
1726 if (crtl->calls_eh_return)
1727 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1728 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1729
1730 /* ... and any callee saved register that dataflow says is live. */
1731 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1732 if (df_regs_ever_live_p (regno)
1733 && !call_used_regs[regno])
1734 cfun->machine->frame.reg_offset[regno] = 0;
1735
1736 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1737 if (df_regs_ever_live_p (regno)
1738 && !call_used_regs[regno])
1739 cfun->machine->frame.reg_offset[regno] = 0;
1740
1741 if (frame_pointer_needed)
1742 {
1743 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1744 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1745 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1746 }
1747
1748 /* Now assign stack slots for them. */
1749 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1750 if (cfun->machine->frame.reg_offset[regno] != -1)
1751 {
1752 cfun->machine->frame.reg_offset[regno] = offset;
1753 offset += UNITS_PER_WORD;
1754 }
1755
1756 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1757 if (cfun->machine->frame.reg_offset[regno] != -1)
1758 {
1759 cfun->machine->frame.reg_offset[regno] = offset;
1760 offset += UNITS_PER_WORD;
1761 }
1762
1763 if (frame_pointer_needed)
1764 {
1765 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1766 offset += UNITS_PER_WORD;
1767 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1768 }
1769
1770 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1771 {
1772 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1773 offset += UNITS_PER_WORD;
1774 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1775 }
1776
1777 cfun->machine->frame.padding0 =
1778 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1779 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1780
1781 cfun->machine->frame.saved_regs_size = offset;
1782 cfun->machine->frame.laid_out = true;
1783}
1784
1785/* Make the last instruction frame-related and note that it performs
1786 the operation described by FRAME_PATTERN. */
1787
1788static void
1789aarch64_set_frame_expr (rtx frame_pattern)
1790{
1791 rtx insn;
1792
1793 insn = get_last_insn ();
1794 RTX_FRAME_RELATED_P (insn) = 1;
1795 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1796 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1797 frame_pattern,
1798 REG_NOTES (insn));
1799}
1800
1801static bool
1802aarch64_register_saved_on_entry (int regno)
1803{
1804 return cfun->machine->frame.reg_offset[regno] != -1;
1805}
1806
1807
1808static void
1809aarch64_save_or_restore_fprs (int start_offset, int increment,
1810 bool restore, rtx base_rtx)
1811
1812{
1813 unsigned regno;
1814 unsigned regno2;
1815 rtx insn;
e0f396bc
MS
1816 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1817 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1818
1819
1820 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1821 {
1822 if (aarch64_register_saved_on_entry (regno))
1823 {
1824 rtx mem;
1825 mem = gen_mem_ref (DFmode,
1826 plus_constant (Pmode,
1827 base_rtx,
1828 start_offset));
1829
1830 for (regno2 = regno + 1;
1831 regno2 <= V31_REGNUM
1832 && !aarch64_register_saved_on_entry (regno2);
1833 regno2++)
1834 {
1835 /* Empty loop. */
1836 }
1837 if (regno2 <= V31_REGNUM &&
1838 aarch64_register_saved_on_entry (regno2))
1839 {
1840 rtx mem2;
1841 /* Next highest register to be saved. */
1842 mem2 = gen_mem_ref (DFmode,
1843 plus_constant
1844 (Pmode,
1845 base_rtx,
1846 start_offset + increment));
1847 if (restore == false)
1848 {
1849 insn = emit_insn
1850 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1851 mem2, gen_rtx_REG (DFmode, regno2)));
1852
1853 }
1854 else
1855 {
1856 insn = emit_insn
1857 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1858 gen_rtx_REG (DFmode, regno2), mem2));
1859
e0f396bc
MS
1860 add_reg_note (insn, REG_CFA_RESTORE,
1861 gen_rtx_REG (DFmode, regno));
1862 add_reg_note (insn, REG_CFA_RESTORE,
1863 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1864 }
1865
1866 /* The first part of a frame-related parallel insn
1867 is always assumed to be relevant to the frame
1868 calculations; subsequent parts, are only
1869 frame-related if explicitly marked. */
e0f396bc 1870 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1871 regno = regno2;
1872 start_offset += increment * 2;
1873 }
1874 else
1875 {
1876 if (restore == false)
1877 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1878 else
1879 {
1880 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1881 add_reg_note (insn, REG_CFA_RESTORE,
1882 gen_rtx_REG (DImode, regno));
43e9d192
IB
1883 }
1884 start_offset += increment;
1885 }
1886 RTX_FRAME_RELATED_P (insn) = 1;
1887 }
1888 }
1889
1890}
1891
1892
1893/* offset from the stack pointer of where the saves and
1894 restore's have to happen. */
1895static void
1896aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1897 bool restore)
1898{
1899 rtx insn;
1900 rtx base_rtx = stack_pointer_rtx;
1901 HOST_WIDE_INT start_offset = offset;
1902 HOST_WIDE_INT increment = UNITS_PER_WORD;
1903 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1904 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1905 unsigned regno;
1906 unsigned regno2;
1907
1908 for (regno = R0_REGNUM; regno <= limit; regno++)
1909 {
1910 if (aarch64_register_saved_on_entry (regno))
1911 {
1912 rtx mem;
1913 mem = gen_mem_ref (Pmode,
1914 plus_constant (Pmode,
1915 base_rtx,
1916 start_offset));
1917
1918 for (regno2 = regno + 1;
1919 regno2 <= limit
1920 && !aarch64_register_saved_on_entry (regno2);
1921 regno2++)
1922 {
1923 /* Empty loop. */
1924 }
1925 if (regno2 <= limit &&
1926 aarch64_register_saved_on_entry (regno2))
1927 {
1928 rtx mem2;
1929 /* Next highest register to be saved. */
1930 mem2 = gen_mem_ref (Pmode,
1931 plus_constant
1932 (Pmode,
1933 base_rtx,
1934 start_offset + increment));
1935 if (restore == false)
1936 {
1937 insn = emit_insn
1938 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1939 mem2, gen_rtx_REG (DImode, regno2)));
1940
1941 }
1942 else
1943 {
1944 insn = emit_insn
1945 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1946 gen_rtx_REG (DImode, regno2), mem2));
1947
1948 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1949 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1950 }
1951
1952 /* The first part of a frame-related parallel insn
1953 is always assumed to be relevant to the frame
1954 calculations; subsequent parts, are only
1955 frame-related if explicitly marked. */
1956 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1957 1)) = 1;
1958 regno = regno2;
1959 start_offset += increment * 2;
1960 }
1961 else
1962 {
1963 if (restore == false)
1964 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1965 else
1966 {
1967 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1968 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1969 }
1970 start_offset += increment;
1971 }
1972 RTX_FRAME_RELATED_P (insn) = 1;
1973 }
1974 }
1975
1976 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1977
1978}
1979
1980/* AArch64 stack frames generated by this compiler look like:
1981
1982 +-------------------------------+
1983 | |
1984 | incoming stack arguments |
1985 | |
1986 +-------------------------------+ <-- arg_pointer_rtx
1987 | |
1988 | callee-allocated save area |
1989 | for register varargs |
1990 | |
1991 +-------------------------------+
1992 | |
1993 | local variables |
1994 | |
1995 +-------------------------------+ <-- frame_pointer_rtx
1996 | |
1997 | callee-saved registers |
1998 | |
1999 +-------------------------------+
2000 | LR' |
2001 +-------------------------------+
2002 | FP' |
2003 P +-------------------------------+ <-- hard_frame_pointer_rtx
2004 | dynamic allocation |
2005 +-------------------------------+
2006 | |
2007 | outgoing stack arguments |
2008 | |
2009 +-------------------------------+ <-- stack_pointer_rtx
2010
2011 Dynamic stack allocations such as alloca insert data at point P.
2012 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2013 hard_frame_pointer_rtx unchanged. */
2014
2015/* Generate the prologue instructions for entry into a function.
2016 Establish the stack frame by decreasing the stack pointer with a
2017 properly calculated size and, if necessary, create a frame record
2018 filled with the values of LR and previous frame pointer. The
6991c977 2019 current FP is also set up if it is in use. */
43e9d192
IB
2020
2021void
2022aarch64_expand_prologue (void)
2023{
2024 /* sub sp, sp, #<frame_size>
2025 stp {fp, lr}, [sp, #<frame_size> - 16]
2026 add fp, sp, #<frame_size> - hardfp_offset
2027 stp {cs_reg}, [fp, #-16] etc.
2028
2029 sub sp, sp, <final_adjustment_if_any>
2030 */
2031 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2032 HOST_WIDE_INT frame_size, offset;
2033 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2034 rtx insn;
2035
2036 aarch64_layout_frame ();
2037 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2038 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2039 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2040 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2041 + crtl->outgoing_args_size);
2042 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2043 STACK_BOUNDARY / BITS_PER_UNIT);
2044
2045 if (flag_stack_usage_info)
2046 current_function_static_stack_size = frame_size;
2047
2048 fp_offset = (offset
2049 - original_frame_size
2050 - cfun->machine->frame.saved_regs_size);
2051
44c0e7b9 2052 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2053 if (offset >= 512)
2054 {
2055 /* When the frame has a large size, an initial decrease is done on
2056 the stack pointer to jump over the callee-allocated save area for
2057 register varargs, the local variable area and/or the callee-saved
2058 register area. This will allow the pre-index write-back
2059 store pair instructions to be used for setting up the stack frame
2060 efficiently. */
2061 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2062 if (offset >= 512)
2063 offset = cfun->machine->frame.saved_regs_size;
2064
2065 frame_size -= (offset + crtl->outgoing_args_size);
2066 fp_offset = 0;
2067
2068 if (frame_size >= 0x1000000)
2069 {
2070 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2071 emit_move_insn (op0, GEN_INT (-frame_size));
2072 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2073 aarch64_set_frame_expr (gen_rtx_SET
2074 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2075 plus_constant (Pmode,
2076 stack_pointer_rtx,
2077 -frame_size)));
43e9d192
IB
2078 }
2079 else if (frame_size > 0)
2080 {
2081 if ((frame_size & 0xfff) != frame_size)
2082 {
2083 insn = emit_insn (gen_add2_insn
2084 (stack_pointer_rtx,
2085 GEN_INT (-(frame_size
2086 & ~(HOST_WIDE_INT)0xfff))));
2087 RTX_FRAME_RELATED_P (insn) = 1;
2088 }
2089 if ((frame_size & 0xfff) != 0)
2090 {
2091 insn = emit_insn (gen_add2_insn
2092 (stack_pointer_rtx,
2093 GEN_INT (-(frame_size
2094 & (HOST_WIDE_INT)0xfff))));
2095 RTX_FRAME_RELATED_P (insn) = 1;
2096 }
2097 }
2098 }
2099 else
2100 frame_size = -1;
2101
2102 if (offset > 0)
2103 {
2104 /* Save the frame pointer and lr if the frame pointer is needed
2105 first. Make the frame pointer point to the location of the
2106 old frame pointer on the stack. */
2107 if (frame_pointer_needed)
2108 {
2109 rtx mem_fp, mem_lr;
2110
2111 if (fp_offset)
2112 {
2113 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2114 GEN_INT (-offset)));
2115 RTX_FRAME_RELATED_P (insn) = 1;
2116 aarch64_set_frame_expr (gen_rtx_SET
2117 (Pmode, stack_pointer_rtx,
2118 gen_rtx_MINUS (Pmode,
2119 stack_pointer_rtx,
2120 GEN_INT (offset))));
2121 mem_fp = gen_frame_mem (DImode,
2122 plus_constant (Pmode,
2123 stack_pointer_rtx,
2124 fp_offset));
2125 mem_lr = gen_frame_mem (DImode,
2126 plus_constant (Pmode,
2127 stack_pointer_rtx,
2128 fp_offset
2129 + UNITS_PER_WORD));
2130 insn = emit_insn (gen_store_pairdi (mem_fp,
2131 hard_frame_pointer_rtx,
2132 mem_lr,
2133 gen_rtx_REG (DImode,
2134 LR_REGNUM)));
2135 }
2136 else
2137 {
2138 insn = emit_insn (gen_storewb_pairdi_di
2139 (stack_pointer_rtx, stack_pointer_rtx,
2140 hard_frame_pointer_rtx,
2141 gen_rtx_REG (DImode, LR_REGNUM),
2142 GEN_INT (-offset),
2143 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2144 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2145 }
2146
2147 /* The first part of a frame-related parallel insn is always
2148 assumed to be relevant to the frame calculations;
2149 subsequent parts, are only frame-related if explicitly
2150 marked. */
2151 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2152 RTX_FRAME_RELATED_P (insn) = 1;
2153
2154 /* Set up frame pointer to point to the location of the
2155 previous frame pointer on the stack. */
2156 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2157 stack_pointer_rtx,
2158 GEN_INT (fp_offset)));
2159 aarch64_set_frame_expr (gen_rtx_SET
2160 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2161 plus_constant (Pmode,
2162 stack_pointer_rtx,
2163 fp_offset)));
43e9d192
IB
2164 RTX_FRAME_RELATED_P (insn) = 1;
2165 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2166 hard_frame_pointer_rtx));
2167 }
2168 else
2169 {
2170 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2171 GEN_INT (-offset)));
2172 RTX_FRAME_RELATED_P (insn) = 1;
2173 }
2174
2175 aarch64_save_or_restore_callee_save_registers
2176 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2177 }
2178
2179 /* when offset >= 512,
2180 sub sp, sp, #<outgoing_args_size> */
2181 if (frame_size > -1)
2182 {
2183 if (crtl->outgoing_args_size > 0)
2184 {
2185 insn = emit_insn (gen_add2_insn
2186 (stack_pointer_rtx,
2187 GEN_INT (- crtl->outgoing_args_size)));
2188 RTX_FRAME_RELATED_P (insn) = 1;
2189 }
2190 }
2191}
2192
2193/* Generate the epilogue instructions for returning from a function. */
2194void
2195aarch64_expand_epilogue (bool for_sibcall)
2196{
2197 HOST_WIDE_INT original_frame_size, frame_size, offset;
2198 HOST_WIDE_INT fp_offset;
2199 rtx insn;
44c0e7b9 2200 rtx cfa_reg;
43e9d192
IB
2201
2202 aarch64_layout_frame ();
2203 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2204 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2205 + crtl->outgoing_args_size);
2206 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2207 STACK_BOUNDARY / BITS_PER_UNIT);
2208
2209 fp_offset = (offset
2210 - original_frame_size
2211 - cfun->machine->frame.saved_regs_size);
2212
44c0e7b9
YZ
2213 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2214
2215 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2216 if (offset >= 512)
2217 {
2218 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2219 if (offset >= 512)
2220 offset = cfun->machine->frame.saved_regs_size;
2221
2222 frame_size -= (offset + crtl->outgoing_args_size);
2223 fp_offset = 0;
2224 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2225 {
2226 insn = emit_insn (gen_add2_insn
2227 (stack_pointer_rtx,
2228 GEN_INT (crtl->outgoing_args_size)));
2229 RTX_FRAME_RELATED_P (insn) = 1;
2230 }
2231 }
2232 else
2233 frame_size = -1;
2234
2235 /* If there were outgoing arguments or we've done dynamic stack
2236 allocation, then restore the stack pointer from the frame
2237 pointer. This is at most one insn and more efficient than using
2238 GCC's internal mechanism. */
2239 if (frame_pointer_needed
2240 && (crtl->outgoing_args_size || cfun->calls_alloca))
2241 {
2242 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2243 hard_frame_pointer_rtx,
2244 GEN_INT (- fp_offset)));
2245 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2246 /* As SP is set to (FP - fp_offset), according to the rules in
2247 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2248 from the value of SP from now on. */
2249 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2250 }
2251
2252 aarch64_save_or_restore_callee_save_registers
2253 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2254
2255 /* Restore the frame pointer and lr if the frame pointer is needed. */
2256 if (offset > 0)
2257 {
2258 if (frame_pointer_needed)
2259 {
2260 rtx mem_fp, mem_lr;
2261
2262 if (fp_offset)
2263 {
2264 mem_fp = gen_frame_mem (DImode,
2265 plus_constant (Pmode,
2266 stack_pointer_rtx,
2267 fp_offset));
2268 mem_lr = gen_frame_mem (DImode,
2269 plus_constant (Pmode,
2270 stack_pointer_rtx,
2271 fp_offset
2272 + UNITS_PER_WORD));
2273 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2274 mem_fp,
2275 gen_rtx_REG (DImode,
2276 LR_REGNUM),
2277 mem_lr));
2278 }
2279 else
2280 {
2281 insn = emit_insn (gen_loadwb_pairdi_di
2282 (stack_pointer_rtx,
2283 stack_pointer_rtx,
2284 hard_frame_pointer_rtx,
2285 gen_rtx_REG (DImode, LR_REGNUM),
2286 GEN_INT (offset),
2287 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2288 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2289 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2290 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2291 plus_constant (Pmode, cfa_reg,
2292 offset))));
43e9d192
IB
2293 }
2294
2295 /* The first part of a frame-related parallel insn
2296 is always assumed to be relevant to the frame
2297 calculations; subsequent parts, are only
2298 frame-related if explicitly marked. */
2299 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2300 RTX_FRAME_RELATED_P (insn) = 1;
2301 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2302 add_reg_note (insn, REG_CFA_RESTORE,
2303 gen_rtx_REG (DImode, LR_REGNUM));
2304
2305 if (fp_offset)
2306 {
2307 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2308 GEN_INT (offset)));
2309 RTX_FRAME_RELATED_P (insn) = 1;
2310 }
2311 }
43e9d192
IB
2312 else
2313 {
2314 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2315 GEN_INT (offset)));
2316 RTX_FRAME_RELATED_P (insn) = 1;
2317 }
2318 }
2319
2320 /* Stack adjustment for exception handler. */
2321 if (crtl->calls_eh_return)
2322 {
2323 /* We need to unwind the stack by the offset computed by
2324 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2325 based on SP. Ideally we would update the SP and define the
2326 CFA along the lines of:
2327
2328 SP = SP + EH_RETURN_STACKADJ_RTX
2329 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2330
2331 However the dwarf emitter only understands a constant
2332 register offset.
2333
631b20a7 2334 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2335 as a temporary register to hold the current SP value. The
2336 CFA is described using IP0 then SP is modified. */
2337
2338 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2339
2340 insn = emit_move_insn (ip0, stack_pointer_rtx);
2341 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2342 RTX_FRAME_RELATED_P (insn) = 1;
2343
2344 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2345
2346 /* Ensure the assignment to IP0 does not get optimized away. */
2347 emit_use (ip0);
2348 }
2349
2350 if (frame_size > -1)
2351 {
2352 if (frame_size >= 0x1000000)
2353 {
2354 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2355 emit_move_insn (op0, GEN_INT (frame_size));
2356 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2357 aarch64_set_frame_expr (gen_rtx_SET
2358 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2359 plus_constant (Pmode,
2360 stack_pointer_rtx,
2361 frame_size)));
43e9d192
IB
2362 }
2363 else if (frame_size > 0)
2364 {
2365 if ((frame_size & 0xfff) != 0)
2366 {
2367 insn = emit_insn (gen_add2_insn
2368 (stack_pointer_rtx,
2369 GEN_INT ((frame_size
2370 & (HOST_WIDE_INT) 0xfff))));
2371 RTX_FRAME_RELATED_P (insn) = 1;
2372 }
2373 if ((frame_size & 0xfff) != frame_size)
2374 {
2375 insn = emit_insn (gen_add2_insn
2376 (stack_pointer_rtx,
2377 GEN_INT ((frame_size
2378 & ~ (HOST_WIDE_INT) 0xfff))));
2379 RTX_FRAME_RELATED_P (insn) = 1;
2380 }
2381 }
2382
f6fe771a
RL
2383 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2384 plus_constant (Pmode,
2385 stack_pointer_rtx,
2386 offset)));
43e9d192
IB
2387 }
2388
2389 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2390 if (!for_sibcall)
2391 emit_jump_insn (ret_rtx);
2392}
2393
2394/* Return the place to copy the exception unwinding return address to.
2395 This will probably be a stack slot, but could (in theory be the
2396 return register). */
2397rtx
2398aarch64_final_eh_return_addr (void)
2399{
2400 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2401 aarch64_layout_frame ();
2402 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2403 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2404 + crtl->outgoing_args_size);
2405 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2406 STACK_BOUNDARY / BITS_PER_UNIT);
2407 fp_offset = offset
2408 - original_frame_size
2409 - cfun->machine->frame.saved_regs_size;
2410
2411 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2412 return gen_rtx_REG (DImode, LR_REGNUM);
2413
2414 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2415 result in a store to save LR introduced by builtin_eh_return () being
2416 incorrectly deleted because the alias is not detected.
2417 So in the calculation of the address to copy the exception unwinding
2418 return address to, we note 2 cases.
2419 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2420 we return a SP-relative location since all the addresses are SP-relative
2421 in this case. This prevents the store from being optimized away.
2422 If the fp_offset is not 0, then the addresses will be FP-relative and
2423 therefore we return a FP-relative location. */
2424
2425 if (frame_pointer_needed)
2426 {
2427 if (fp_offset)
2428 return gen_frame_mem (DImode,
2429 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2430 else
2431 return gen_frame_mem (DImode,
2432 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2433 }
2434
2435 /* If FP is not needed, we calculate the location of LR, which would be
2436 at the top of the saved registers block. */
2437
2438 return gen_frame_mem (DImode,
2439 plus_constant (Pmode,
2440 stack_pointer_rtx,
2441 fp_offset
2442 + cfun->machine->frame.saved_regs_size
2443 - 2 * UNITS_PER_WORD));
2444}
2445
2446/* Output code to build up a constant in a register. */
2447static void
d9600ae5 2448aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2449{
2450 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2451 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2452 else
2453 {
2454 int i;
2455 int ncount = 0;
2456 int zcount = 0;
2457 HOST_WIDE_INT valp = val >> 16;
2458 HOST_WIDE_INT valm;
2459 HOST_WIDE_INT tval;
2460
2461 for (i = 16; i < 64; i += 16)
2462 {
2463 valm = (valp & 0xffff);
2464
2465 if (valm != 0)
2466 ++ zcount;
2467
2468 if (valm != 0xffff)
2469 ++ ncount;
2470
2471 valp >>= 16;
2472 }
2473
2474 /* zcount contains the number of additional MOVK instructions
2475 required if the constant is built up with an initial MOVZ instruction,
2476 while ncount is the number of MOVK instructions required if starting
2477 with a MOVN instruction. Choose the sequence that yields the fewest
2478 number of instructions, preferring MOVZ instructions when they are both
2479 the same. */
2480 if (ncount < zcount)
2481 {
d9600ae5 2482 emit_move_insn (gen_rtx_REG (Pmode, regnum),
d103f29b 2483 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192
IB
2484 tval = 0xffff;
2485 }
2486 else
2487 {
d9600ae5
SN
2488 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2489 GEN_INT (val & 0xffff));
43e9d192
IB
2490 tval = 0;
2491 }
2492
2493 val >>= 16;
2494
2495 for (i = 16; i < 64; i += 16)
2496 {
2497 if ((val & 0xffff) != tval)
d9600ae5
SN
2498 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2499 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2500 val >>= 16;
2501 }
2502 }
2503}
2504
2505static void
d9600ae5 2506aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2507{
2508 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2509 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2510 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2511
2512 if (mdelta < 0)
2513 mdelta = -mdelta;
2514
2515 if (mdelta >= 4096 * 4096)
2516 {
d9600ae5
SN
2517 aarch64_build_constant (scratchreg, delta);
2518 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2519 }
2520 else if (mdelta > 0)
2521 {
43e9d192 2522 if (mdelta >= 4096)
d9600ae5
SN
2523 {
2524 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2525 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2526 if (delta < 0)
2527 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2528 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2529 else
2530 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2531 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2532 }
43e9d192 2533 if (mdelta % 4096 != 0)
d9600ae5
SN
2534 {
2535 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2536 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2537 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2538 }
43e9d192
IB
2539 }
2540}
2541
2542/* Output code to add DELTA to the first argument, and then jump
2543 to FUNCTION. Used for C++ multiple inheritance. */
2544static void
2545aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2546 HOST_WIDE_INT delta,
2547 HOST_WIDE_INT vcall_offset,
2548 tree function)
2549{
2550 /* The this pointer is always in x0. Note that this differs from
2551 Arm where the this pointer maybe bumped to r1 if r0 is required
2552 to return a pointer to an aggregate. On AArch64 a result value
2553 pointer will be in x8. */
2554 int this_regno = R0_REGNUM;
75f1d6fc 2555 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2556
75f1d6fc
SN
2557 reload_completed = 1;
2558 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2559
2560 if (vcall_offset == 0)
d9600ae5 2561 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2562 else
2563 {
28514dda 2564 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2565
75f1d6fc
SN
2566 this_rtx = gen_rtx_REG (Pmode, this_regno);
2567 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2568 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2569
75f1d6fc
SN
2570 addr = this_rtx;
2571 if (delta != 0)
2572 {
2573 if (delta >= -256 && delta < 256)
2574 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2575 plus_constant (Pmode, this_rtx, delta));
2576 else
d9600ae5 2577 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2578 }
2579
28514dda
YZ
2580 if (Pmode == ptr_mode)
2581 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2582 else
2583 aarch64_emit_move (temp0,
2584 gen_rtx_ZERO_EXTEND (Pmode,
2585 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2586
28514dda 2587 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2588 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2589 else
2590 {
d9600ae5 2591 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2592 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2593 }
2594
28514dda
YZ
2595 if (Pmode == ptr_mode)
2596 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2597 else
2598 aarch64_emit_move (temp1,
2599 gen_rtx_SIGN_EXTEND (Pmode,
2600 gen_rtx_MEM (ptr_mode, addr)));
2601
75f1d6fc 2602 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2603 }
2604
75f1d6fc
SN
2605 /* Generate a tail call to the target function. */
2606 if (!TREE_USED (function))
2607 {
2608 assemble_external (function);
2609 TREE_USED (function) = 1;
2610 }
2611 funexp = XEXP (DECL_RTL (function), 0);
2612 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2613 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2614 SIBLING_CALL_P (insn) = 1;
2615
2616 insn = get_insns ();
2617 shorten_branches (insn);
2618 final_start_function (insn, file, 1);
2619 final (insn, file, 1);
43e9d192 2620 final_end_function ();
75f1d6fc
SN
2621
2622 /* Stop pretending to be a post-reload pass. */
2623 reload_completed = 0;
43e9d192
IB
2624}
2625
43e9d192
IB
2626static int
2627aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2628{
2629 if (GET_CODE (*x) == SYMBOL_REF)
2630 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2631
2632 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2633 TLS offsets, not real symbol references. */
2634 if (GET_CODE (*x) == UNSPEC
2635 && XINT (*x, 1) == UNSPEC_TLS)
2636 return -1;
2637
2638 return 0;
2639}
2640
2641static bool
2642aarch64_tls_referenced_p (rtx x)
2643{
2644 if (!TARGET_HAVE_TLS)
2645 return false;
2646
2647 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2648}
2649
2650
2651static int
2652aarch64_bitmasks_cmp (const void *i1, const void *i2)
2653{
2654 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2655 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2656
2657 if (*imm1 < *imm2)
2658 return -1;
2659 if (*imm1 > *imm2)
2660 return +1;
2661 return 0;
2662}
2663
2664
2665static void
2666aarch64_build_bitmask_table (void)
2667{
2668 unsigned HOST_WIDE_INT mask, imm;
2669 unsigned int log_e, e, s, r;
2670 unsigned int nimms = 0;
2671
2672 for (log_e = 1; log_e <= 6; log_e++)
2673 {
2674 e = 1 << log_e;
2675 if (e == 64)
2676 mask = ~(HOST_WIDE_INT) 0;
2677 else
2678 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2679 for (s = 1; s < e; s++)
2680 {
2681 for (r = 0; r < e; r++)
2682 {
2683 /* set s consecutive bits to 1 (s < 64) */
2684 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2685 /* rotate right by r */
2686 if (r != 0)
2687 imm = ((imm >> r) | (imm << (e - r))) & mask;
2688 /* replicate the constant depending on SIMD size */
2689 switch (log_e) {
2690 case 1: imm |= (imm << 2);
2691 case 2: imm |= (imm << 4);
2692 case 3: imm |= (imm << 8);
2693 case 4: imm |= (imm << 16);
2694 case 5: imm |= (imm << 32);
2695 case 6:
2696 break;
2697 default:
2698 gcc_unreachable ();
2699 }
2700 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2701 aarch64_bitmasks[nimms++] = imm;
2702 }
2703 }
2704 }
2705
2706 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2707 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2708 aarch64_bitmasks_cmp);
2709}
2710
2711
2712/* Return true if val can be encoded as a 12-bit unsigned immediate with
2713 a left shift of 0 or 12 bits. */
2714bool
2715aarch64_uimm12_shift (HOST_WIDE_INT val)
2716{
2717 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2718 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2719 );
2720}
2721
2722
2723/* Return true if val is an immediate that can be loaded into a
2724 register by a MOVZ instruction. */
2725static bool
2726aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2727{
2728 if (GET_MODE_SIZE (mode) > 4)
2729 {
2730 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2731 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2732 return 1;
2733 }
2734 else
2735 {
2736 /* Ignore sign extension. */
2737 val &= (HOST_WIDE_INT) 0xffffffff;
2738 }
2739 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2740 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2741}
2742
2743
2744/* Return true if val is a valid bitmask immediate. */
2745bool
2746aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2747{
2748 if (GET_MODE_SIZE (mode) < 8)
2749 {
2750 /* Replicate bit pattern. */
2751 val &= (HOST_WIDE_INT) 0xffffffff;
2752 val |= val << 32;
2753 }
2754 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2755 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2756}
2757
2758
2759/* Return true if val is an immediate that can be loaded into a
2760 register in a single instruction. */
2761bool
2762aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2763{
2764 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2765 return 1;
2766 return aarch64_bitmask_imm (val, mode);
2767}
2768
2769static bool
2770aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2771{
2772 rtx base, offset;
7eda14e1 2773
43e9d192
IB
2774 if (GET_CODE (x) == HIGH)
2775 return true;
2776
2777 split_const (x, &base, &offset);
2778 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2779 {
2780 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2781 != SYMBOL_FORCE_TO_MEM)
2782 return true;
2783 else
2784 /* Avoid generating a 64-bit relocation in ILP32; leave
2785 to aarch64_expand_mov_immediate to handle it properly. */
2786 return mode != ptr_mode;
2787 }
43e9d192
IB
2788
2789 return aarch64_tls_referenced_p (x);
2790}
2791
2792/* Return true if register REGNO is a valid index register.
2793 STRICT_P is true if REG_OK_STRICT is in effect. */
2794
2795bool
2796aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2797{
2798 if (!HARD_REGISTER_NUM_P (regno))
2799 {
2800 if (!strict_p)
2801 return true;
2802
2803 if (!reg_renumber)
2804 return false;
2805
2806 regno = reg_renumber[regno];
2807 }
2808 return GP_REGNUM_P (regno);
2809}
2810
2811/* Return true if register REGNO is a valid base register for mode MODE.
2812 STRICT_P is true if REG_OK_STRICT is in effect. */
2813
2814bool
2815aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2816{
2817 if (!HARD_REGISTER_NUM_P (regno))
2818 {
2819 if (!strict_p)
2820 return true;
2821
2822 if (!reg_renumber)
2823 return false;
2824
2825 regno = reg_renumber[regno];
2826 }
2827
2828 /* The fake registers will be eliminated to either the stack or
2829 hard frame pointer, both of which are usually valid base registers.
2830 Reload deals with the cases where the eliminated form isn't valid. */
2831 return (GP_REGNUM_P (regno)
2832 || regno == SP_REGNUM
2833 || regno == FRAME_POINTER_REGNUM
2834 || regno == ARG_POINTER_REGNUM);
2835}
2836
2837/* Return true if X is a valid base register for mode MODE.
2838 STRICT_P is true if REG_OK_STRICT is in effect. */
2839
2840static bool
2841aarch64_base_register_rtx_p (rtx x, bool strict_p)
2842{
2843 if (!strict_p && GET_CODE (x) == SUBREG)
2844 x = SUBREG_REG (x);
2845
2846 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2847}
2848
2849/* Return true if address offset is a valid index. If it is, fill in INFO
2850 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2851
2852static bool
2853aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2854 enum machine_mode mode, bool strict_p)
2855{
2856 enum aarch64_address_type type;
2857 rtx index;
2858 int shift;
2859
2860 /* (reg:P) */
2861 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2862 && GET_MODE (x) == Pmode)
2863 {
2864 type = ADDRESS_REG_REG;
2865 index = x;
2866 shift = 0;
2867 }
2868 /* (sign_extend:DI (reg:SI)) */
2869 else if ((GET_CODE (x) == SIGN_EXTEND
2870 || GET_CODE (x) == ZERO_EXTEND)
2871 && GET_MODE (x) == DImode
2872 && GET_MODE (XEXP (x, 0)) == SImode)
2873 {
2874 type = (GET_CODE (x) == SIGN_EXTEND)
2875 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2876 index = XEXP (x, 0);
2877 shift = 0;
2878 }
2879 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2880 else if (GET_CODE (x) == MULT
2881 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2882 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2883 && GET_MODE (XEXP (x, 0)) == DImode
2884 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2885 && CONST_INT_P (XEXP (x, 1)))
2886 {
2887 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2888 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2889 index = XEXP (XEXP (x, 0), 0);
2890 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2891 }
2892 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2893 else if (GET_CODE (x) == ASHIFT
2894 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2895 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2896 && GET_MODE (XEXP (x, 0)) == DImode
2897 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2898 && CONST_INT_P (XEXP (x, 1)))
2899 {
2900 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2901 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2902 index = XEXP (XEXP (x, 0), 0);
2903 shift = INTVAL (XEXP (x, 1));
2904 }
2905 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2906 else if ((GET_CODE (x) == SIGN_EXTRACT
2907 || GET_CODE (x) == ZERO_EXTRACT)
2908 && GET_MODE (x) == DImode
2909 && GET_CODE (XEXP (x, 0)) == MULT
2910 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2911 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2912 {
2913 type = (GET_CODE (x) == SIGN_EXTRACT)
2914 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2915 index = XEXP (XEXP (x, 0), 0);
2916 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2917 if (INTVAL (XEXP (x, 1)) != 32 + shift
2918 || INTVAL (XEXP (x, 2)) != 0)
2919 shift = -1;
2920 }
2921 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2922 (const_int 0xffffffff<<shift)) */
2923 else if (GET_CODE (x) == AND
2924 && GET_MODE (x) == DImode
2925 && GET_CODE (XEXP (x, 0)) == MULT
2926 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2927 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2928 && CONST_INT_P (XEXP (x, 1)))
2929 {
2930 type = ADDRESS_REG_UXTW;
2931 index = XEXP (XEXP (x, 0), 0);
2932 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2933 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2934 shift = -1;
2935 }
2936 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2937 else if ((GET_CODE (x) == SIGN_EXTRACT
2938 || GET_CODE (x) == ZERO_EXTRACT)
2939 && GET_MODE (x) == DImode
2940 && GET_CODE (XEXP (x, 0)) == ASHIFT
2941 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2942 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2943 {
2944 type = (GET_CODE (x) == SIGN_EXTRACT)
2945 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2946 index = XEXP (XEXP (x, 0), 0);
2947 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2948 if (INTVAL (XEXP (x, 1)) != 32 + shift
2949 || INTVAL (XEXP (x, 2)) != 0)
2950 shift = -1;
2951 }
2952 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2953 (const_int 0xffffffff<<shift)) */
2954 else if (GET_CODE (x) == AND
2955 && GET_MODE (x) == DImode
2956 && GET_CODE (XEXP (x, 0)) == ASHIFT
2957 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2958 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2959 && CONST_INT_P (XEXP (x, 1)))
2960 {
2961 type = ADDRESS_REG_UXTW;
2962 index = XEXP (XEXP (x, 0), 0);
2963 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2964 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2965 shift = -1;
2966 }
2967 /* (mult:P (reg:P) (const_int scale)) */
2968 else if (GET_CODE (x) == MULT
2969 && GET_MODE (x) == Pmode
2970 && GET_MODE (XEXP (x, 0)) == Pmode
2971 && CONST_INT_P (XEXP (x, 1)))
2972 {
2973 type = ADDRESS_REG_REG;
2974 index = XEXP (x, 0);
2975 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2976 }
2977 /* (ashift:P (reg:P) (const_int shift)) */
2978 else if (GET_CODE (x) == ASHIFT
2979 && GET_MODE (x) == Pmode
2980 && GET_MODE (XEXP (x, 0)) == Pmode
2981 && CONST_INT_P (XEXP (x, 1)))
2982 {
2983 type = ADDRESS_REG_REG;
2984 index = XEXP (x, 0);
2985 shift = INTVAL (XEXP (x, 1));
2986 }
2987 else
2988 return false;
2989
2990 if (GET_CODE (index) == SUBREG)
2991 index = SUBREG_REG (index);
2992
2993 if ((shift == 0 ||
2994 (shift > 0 && shift <= 3
2995 && (1 << shift) == GET_MODE_SIZE (mode)))
2996 && REG_P (index)
2997 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2998 {
2999 info->type = type;
3000 info->offset = index;
3001 info->shift = shift;
3002 return true;
3003 }
3004
3005 return false;
3006}
3007
3008static inline bool
3009offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3010{
3011 return (offset >= -64 * GET_MODE_SIZE (mode)
3012 && offset < 64 * GET_MODE_SIZE (mode)
3013 && offset % GET_MODE_SIZE (mode) == 0);
3014}
3015
3016static inline bool
3017offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3018 HOST_WIDE_INT offset)
3019{
3020 return offset >= -256 && offset < 256;
3021}
3022
3023static inline bool
3024offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3025{
3026 return (offset >= 0
3027 && offset < 4096 * GET_MODE_SIZE (mode)
3028 && offset % GET_MODE_SIZE (mode) == 0);
3029}
3030
3031/* Return true if X is a valid address for machine mode MODE. If it is,
3032 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3033 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3034
3035static bool
3036aarch64_classify_address (struct aarch64_address_info *info,
3037 rtx x, enum machine_mode mode,
3038 RTX_CODE outer_code, bool strict_p)
3039{
3040 enum rtx_code code = GET_CODE (x);
3041 rtx op0, op1;
3042 bool allow_reg_index_p =
3043 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3044
3045 /* Don't support anything other than POST_INC or REG addressing for
3046 AdvSIMD. */
3047 if (aarch64_vector_mode_p (mode)
3048 && (code != POST_INC && code != REG))
3049 return false;
3050
3051 switch (code)
3052 {
3053 case REG:
3054 case SUBREG:
3055 info->type = ADDRESS_REG_IMM;
3056 info->base = x;
3057 info->offset = const0_rtx;
3058 return aarch64_base_register_rtx_p (x, strict_p);
3059
3060 case PLUS:
3061 op0 = XEXP (x, 0);
3062 op1 = XEXP (x, 1);
3063 if (GET_MODE_SIZE (mode) != 0
3064 && CONST_INT_P (op1)
3065 && aarch64_base_register_rtx_p (op0, strict_p))
3066 {
3067 HOST_WIDE_INT offset = INTVAL (op1);
3068
3069 info->type = ADDRESS_REG_IMM;
3070 info->base = op0;
3071 info->offset = op1;
3072
3073 /* TImode and TFmode values are allowed in both pairs of X
3074 registers and individual Q registers. The available
3075 address modes are:
3076 X,X: 7-bit signed scaled offset
3077 Q: 9-bit signed offset
3078 We conservatively require an offset representable in either mode.
3079 */
3080 if (mode == TImode || mode == TFmode)
3081 return (offset_7bit_signed_scaled_p (mode, offset)
3082 && offset_9bit_signed_unscaled_p (mode, offset));
3083
3084 if (outer_code == PARALLEL)
3085 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3086 && offset_7bit_signed_scaled_p (mode, offset));
3087 else
3088 return (offset_9bit_signed_unscaled_p (mode, offset)
3089 || offset_12bit_unsigned_scaled_p (mode, offset));
3090 }
3091
3092 if (allow_reg_index_p)
3093 {
3094 /* Look for base + (scaled/extended) index register. */
3095 if (aarch64_base_register_rtx_p (op0, strict_p)
3096 && aarch64_classify_index (info, op1, mode, strict_p))
3097 {
3098 info->base = op0;
3099 return true;
3100 }
3101 if (aarch64_base_register_rtx_p (op1, strict_p)
3102 && aarch64_classify_index (info, op0, mode, strict_p))
3103 {
3104 info->base = op1;
3105 return true;
3106 }
3107 }
3108
3109 return false;
3110
3111 case POST_INC:
3112 case POST_DEC:
3113 case PRE_INC:
3114 case PRE_DEC:
3115 info->type = ADDRESS_REG_WB;
3116 info->base = XEXP (x, 0);
3117 info->offset = NULL_RTX;
3118 return aarch64_base_register_rtx_p (info->base, strict_p);
3119
3120 case POST_MODIFY:
3121 case PRE_MODIFY:
3122 info->type = ADDRESS_REG_WB;
3123 info->base = XEXP (x, 0);
3124 if (GET_CODE (XEXP (x, 1)) == PLUS
3125 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3126 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3127 && aarch64_base_register_rtx_p (info->base, strict_p))
3128 {
3129 HOST_WIDE_INT offset;
3130 info->offset = XEXP (XEXP (x, 1), 1);
3131 offset = INTVAL (info->offset);
3132
3133 /* TImode and TFmode values are allowed in both pairs of X
3134 registers and individual Q registers. The available
3135 address modes are:
3136 X,X: 7-bit signed scaled offset
3137 Q: 9-bit signed offset
3138 We conservatively require an offset representable in either mode.
3139 */
3140 if (mode == TImode || mode == TFmode)
3141 return (offset_7bit_signed_scaled_p (mode, offset)
3142 && offset_9bit_signed_unscaled_p (mode, offset));
3143
3144 if (outer_code == PARALLEL)
3145 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3146 && offset_7bit_signed_scaled_p (mode, offset));
3147 else
3148 return offset_9bit_signed_unscaled_p (mode, offset);
3149 }
3150 return false;
3151
3152 case CONST:
3153 case SYMBOL_REF:
3154 case LABEL_REF:
79517551
SN
3155 /* load literal: pc-relative constant pool entry. Only supported
3156 for SI mode or larger. */
43e9d192 3157 info->type = ADDRESS_SYMBOLIC;
79517551 3158 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3159 {
3160 rtx sym, addend;
3161
3162 split_const (x, &sym, &addend);
3163 return (GET_CODE (sym) == LABEL_REF
3164 || (GET_CODE (sym) == SYMBOL_REF
3165 && CONSTANT_POOL_ADDRESS_P (sym)));
3166 }
3167 return false;
3168
3169 case LO_SUM:
3170 info->type = ADDRESS_LO_SUM;
3171 info->base = XEXP (x, 0);
3172 info->offset = XEXP (x, 1);
3173 if (allow_reg_index_p
3174 && aarch64_base_register_rtx_p (info->base, strict_p))
3175 {
3176 rtx sym, offs;
3177 split_const (info->offset, &sym, &offs);
3178 if (GET_CODE (sym) == SYMBOL_REF
3179 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3180 == SYMBOL_SMALL_ABSOLUTE))
3181 {
3182 /* The symbol and offset must be aligned to the access size. */
3183 unsigned int align;
3184 unsigned int ref_size;
3185
3186 if (CONSTANT_POOL_ADDRESS_P (sym))
3187 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3188 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3189 {
3190 tree exp = SYMBOL_REF_DECL (sym);
3191 align = TYPE_ALIGN (TREE_TYPE (exp));
3192 align = CONSTANT_ALIGNMENT (exp, align);
3193 }
3194 else if (SYMBOL_REF_DECL (sym))
3195 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3196 else
3197 align = BITS_PER_UNIT;
3198
3199 ref_size = GET_MODE_SIZE (mode);
3200 if (ref_size == 0)
3201 ref_size = GET_MODE_SIZE (DImode);
3202
3203 return ((INTVAL (offs) & (ref_size - 1)) == 0
3204 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3205 }
3206 }
3207 return false;
3208
3209 default:
3210 return false;
3211 }
3212}
3213
3214bool
3215aarch64_symbolic_address_p (rtx x)
3216{
3217 rtx offset;
3218
3219 split_const (x, &x, &offset);
3220 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3221}
3222
3223/* Classify the base of symbolic expression X, given that X appears in
3224 context CONTEXT. */
da4f13a4
MS
3225
3226enum aarch64_symbol_type
3227aarch64_classify_symbolic_expression (rtx x,
3228 enum aarch64_symbol_context context)
43e9d192
IB
3229{
3230 rtx offset;
da4f13a4 3231
43e9d192
IB
3232 split_const (x, &x, &offset);
3233 return aarch64_classify_symbol (x, context);
3234}
3235
3236
3237/* Return TRUE if X is a legitimate address for accessing memory in
3238 mode MODE. */
3239static bool
3240aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3241{
3242 struct aarch64_address_info addr;
3243
3244 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3245}
3246
3247/* Return TRUE if X is a legitimate address for accessing memory in
3248 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3249 pair operation. */
3250bool
3251aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3252 RTX_CODE outer_code, bool strict_p)
3253{
3254 struct aarch64_address_info addr;
3255
3256 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3257}
3258
3259/* Return TRUE if rtx X is immediate constant 0.0 */
3260bool
3520f7cc 3261aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3262{
3263 REAL_VALUE_TYPE r;
3264
3265 if (GET_MODE (x) == VOIDmode)
3266 return false;
3267
3268 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3269 if (REAL_VALUE_MINUS_ZERO (r))
3270 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3271 return REAL_VALUES_EQUAL (r, dconst0);
3272}
3273
70f09188
AP
3274/* Return the fixed registers used for condition codes. */
3275
3276static bool
3277aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3278{
3279 *p1 = CC_REGNUM;
3280 *p2 = INVALID_REGNUM;
3281 return true;
3282}
3283
43e9d192
IB
3284enum machine_mode
3285aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3286{
3287 /* All floating point compares return CCFP if it is an equality
3288 comparison, and CCFPE otherwise. */
3289 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3290 {
3291 switch (code)
3292 {
3293 case EQ:
3294 case NE:
3295 case UNORDERED:
3296 case ORDERED:
3297 case UNLT:
3298 case UNLE:
3299 case UNGT:
3300 case UNGE:
3301 case UNEQ:
3302 case LTGT:
3303 return CCFPmode;
3304
3305 case LT:
3306 case LE:
3307 case GT:
3308 case GE:
3309 return CCFPEmode;
3310
3311 default:
3312 gcc_unreachable ();
3313 }
3314 }
3315
3316 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3317 && y == const0_rtx
3318 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3319 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3320 || GET_CODE (x) == NEG))
43e9d192
IB
3321 return CC_NZmode;
3322
1c992d1e 3323 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3324 the comparison will have to be swapped when we emit the assembly
3325 code. */
3326 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3327 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3328 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3329 || GET_CODE (x) == LSHIFTRT
1c992d1e 3330 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3331 return CC_SWPmode;
3332
1c992d1e
RE
3333 /* Similarly for a negated operand, but we can only do this for
3334 equalities. */
3335 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3336 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3337 && (code == EQ || code == NE)
3338 && GET_CODE (x) == NEG)
3339 return CC_Zmode;
3340
43e9d192
IB
3341 /* A compare of a mode narrower than SI mode against zero can be done
3342 by extending the value in the comparison. */
3343 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3344 && y == const0_rtx)
3345 /* Only use sign-extension if we really need it. */
3346 return ((code == GT || code == GE || code == LE || code == LT)
3347 ? CC_SESWPmode : CC_ZESWPmode);
3348
3349 /* For everything else, return CCmode. */
3350 return CCmode;
3351}
3352
3353static unsigned
3354aarch64_get_condition_code (rtx x)
3355{
3356 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3357 enum rtx_code comp_code = GET_CODE (x);
3358
3359 if (GET_MODE_CLASS (mode) != MODE_CC)
3360 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3361
3362 switch (mode)
3363 {
3364 case CCFPmode:
3365 case CCFPEmode:
3366 switch (comp_code)
3367 {
3368 case GE: return AARCH64_GE;
3369 case GT: return AARCH64_GT;
3370 case LE: return AARCH64_LS;
3371 case LT: return AARCH64_MI;
3372 case NE: return AARCH64_NE;
3373 case EQ: return AARCH64_EQ;
3374 case ORDERED: return AARCH64_VC;
3375 case UNORDERED: return AARCH64_VS;
3376 case UNLT: return AARCH64_LT;
3377 case UNLE: return AARCH64_LE;
3378 case UNGT: return AARCH64_HI;
3379 case UNGE: return AARCH64_PL;
3380 default: gcc_unreachable ();
3381 }
3382 break;
3383
3384 case CCmode:
3385 switch (comp_code)
3386 {
3387 case NE: return AARCH64_NE;
3388 case EQ: return AARCH64_EQ;
3389 case GE: return AARCH64_GE;
3390 case GT: return AARCH64_GT;
3391 case LE: return AARCH64_LE;
3392 case LT: return AARCH64_LT;
3393 case GEU: return AARCH64_CS;
3394 case GTU: return AARCH64_HI;
3395 case LEU: return AARCH64_LS;
3396 case LTU: return AARCH64_CC;
3397 default: gcc_unreachable ();
3398 }
3399 break;
3400
3401 case CC_SWPmode:
3402 case CC_ZESWPmode:
3403 case CC_SESWPmode:
3404 switch (comp_code)
3405 {
3406 case NE: return AARCH64_NE;
3407 case EQ: return AARCH64_EQ;
3408 case GE: return AARCH64_LE;
3409 case GT: return AARCH64_LT;
3410 case LE: return AARCH64_GE;
3411 case LT: return AARCH64_GT;
3412 case GEU: return AARCH64_LS;
3413 case GTU: return AARCH64_CC;
3414 case LEU: return AARCH64_CS;
3415 case LTU: return AARCH64_HI;
3416 default: gcc_unreachable ();
3417 }
3418 break;
3419
3420 case CC_NZmode:
3421 switch (comp_code)
3422 {
3423 case NE: return AARCH64_NE;
3424 case EQ: return AARCH64_EQ;
3425 case GE: return AARCH64_PL;
3426 case LT: return AARCH64_MI;
3427 default: gcc_unreachable ();
3428 }
3429 break;
3430
1c992d1e
RE
3431 case CC_Zmode:
3432 switch (comp_code)
3433 {
3434 case NE: return AARCH64_NE;
3435 case EQ: return AARCH64_EQ;
3436 default: gcc_unreachable ();
3437 }
3438 break;
3439
43e9d192
IB
3440 default:
3441 gcc_unreachable ();
3442 break;
3443 }
3444}
3445
3446static unsigned
3447bit_count (unsigned HOST_WIDE_INT value)
3448{
3449 unsigned count = 0;
3450
3451 while (value)
3452 {
3453 count++;
3454 value &= value - 1;
3455 }
3456
3457 return count;
3458}
3459
3460void
3461aarch64_print_operand (FILE *f, rtx x, char code)
3462{
3463 switch (code)
3464 {
f541a481
KT
3465 /* An integer or symbol address without a preceding # sign. */
3466 case 'c':
3467 switch (GET_CODE (x))
3468 {
3469 case CONST_INT:
3470 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3471 break;
3472
3473 case SYMBOL_REF:
3474 output_addr_const (f, x);
3475 break;
3476
3477 case CONST:
3478 if (GET_CODE (XEXP (x, 0)) == PLUS
3479 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3480 {
3481 output_addr_const (f, x);
3482 break;
3483 }
3484 /* Fall through. */
3485
3486 default:
3487 output_operand_lossage ("Unsupported operand for code '%c'", code);
3488 }
3489 break;
3490
43e9d192
IB
3491 case 'e':
3492 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3493 {
3494 int n;
3495
3496 if (GET_CODE (x) != CONST_INT
3497 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3498 {
3499 output_operand_lossage ("invalid operand for '%%%c'", code);
3500 return;
3501 }
3502
3503 switch (n)
3504 {
3505 case 3:
3506 fputc ('b', f);
3507 break;
3508 case 4:
3509 fputc ('h', f);
3510 break;
3511 case 5:
3512 fputc ('w', f);
3513 break;
3514 default:
3515 output_operand_lossage ("invalid operand for '%%%c'", code);
3516 return;
3517 }
3518 }
3519 break;
3520
3521 case 'p':
3522 {
3523 int n;
3524
3525 /* Print N such that 2^N == X. */
3526 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3527 {
3528 output_operand_lossage ("invalid operand for '%%%c'", code);
3529 return;
3530 }
3531
3532 asm_fprintf (f, "%d", n);
3533 }
3534 break;
3535
3536 case 'P':
3537 /* Print the number of non-zero bits in X (a const_int). */
3538 if (GET_CODE (x) != CONST_INT)
3539 {
3540 output_operand_lossage ("invalid operand for '%%%c'", code);
3541 return;
3542 }
3543
3544 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3545 break;
3546
3547 case 'H':
3548 /* Print the higher numbered register of a pair (TImode) of regs. */
3549 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3550 {
3551 output_operand_lossage ("invalid operand for '%%%c'", code);
3552 return;
3553 }
3554
01a3a324 3555 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3556 break;
3557
43e9d192
IB
3558 case 'm':
3559 /* Print a condition (eq, ne, etc). */
3560
3561 /* CONST_TRUE_RTX means always -- that's the default. */
3562 if (x == const_true_rtx)
3563 return;
3564
3565 if (!COMPARISON_P (x))
3566 {
3567 output_operand_lossage ("invalid operand for '%%%c'", code);
3568 return;
3569 }
3570
3571 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3572 break;
3573
3574 case 'M':
3575 /* Print the inverse of a condition (eq <-> ne, etc). */
3576
3577 /* CONST_TRUE_RTX means never -- that's the default. */
3578 if (x == const_true_rtx)
3579 {
3580 fputs ("nv", f);
3581 return;
3582 }
3583
3584 if (!COMPARISON_P (x))
3585 {
3586 output_operand_lossage ("invalid operand for '%%%c'", code);
3587 return;
3588 }
3589
3590 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3591 (aarch64_get_condition_code (x))], f);
3592 break;
3593
3594 case 'b':
3595 case 'h':
3596 case 's':
3597 case 'd':
3598 case 'q':
3599 /* Print a scalar FP/SIMD register name. */
3600 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3601 {
3602 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3603 return;
3604 }
50ce6f88 3605 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3606 break;
3607
3608 case 'S':
3609 case 'T':
3610 case 'U':
3611 case 'V':
3612 /* Print the first FP/SIMD register name in a list. */
3613 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3614 {
3615 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3616 return;
3617 }
50ce6f88 3618 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3619 break;
3620
a05c0ddf 3621 case 'X':
50d38551 3622 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3623 if (GET_CODE (x) != CONST_INT)
3624 {
3625 output_operand_lossage ("invalid operand for '%%%c'", code);
3626 return;
3627 }
50d38551 3628 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3629 break;
3630
43e9d192
IB
3631 case 'w':
3632 case 'x':
3633 /* Print a general register name or the zero register (32-bit or
3634 64-bit). */
3520f7cc
JG
3635 if (x == const0_rtx
3636 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3637 {
50ce6f88 3638 asm_fprintf (f, "%czr", code);
43e9d192
IB
3639 break;
3640 }
3641
3642 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3643 {
50ce6f88 3644 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3645 break;
3646 }
3647
3648 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3649 {
50ce6f88 3650 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3651 break;
3652 }
3653
3654 /* Fall through */
3655
3656 case 0:
3657 /* Print a normal operand, if it's a general register, then we
3658 assume DImode. */
3659 if (x == NULL)
3660 {
3661 output_operand_lossage ("missing operand");
3662 return;
3663 }
3664
3665 switch (GET_CODE (x))
3666 {
3667 case REG:
01a3a324 3668 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3669 break;
3670
3671 case MEM:
3672 aarch64_memory_reference_mode = GET_MODE (x);
3673 output_address (XEXP (x, 0));
3674 break;
3675
3676 case LABEL_REF:
3677 case SYMBOL_REF:
3678 output_addr_const (asm_out_file, x);
3679 break;
3680
3681 case CONST_INT:
3682 asm_fprintf (f, "%wd", INTVAL (x));
3683 break;
3684
3685 case CONST_VECTOR:
3520f7cc
JG
3686 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3687 {
3688 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3689 HOST_WIDE_INT_MIN,
3690 HOST_WIDE_INT_MAX));
3691 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3692 }
3693 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3694 {
3695 fputc ('0', f);
3696 }
3697 else
3698 gcc_unreachable ();
43e9d192
IB
3699 break;
3700
3520f7cc
JG
3701 case CONST_DOUBLE:
3702 /* CONST_DOUBLE can represent a double-width integer.
3703 In this case, the mode of x is VOIDmode. */
3704 if (GET_MODE (x) == VOIDmode)
3705 ; /* Do Nothing. */
3706 else if (aarch64_float_const_zero_rtx_p (x))
3707 {
3708 fputc ('0', f);
3709 break;
3710 }
3711 else if (aarch64_float_const_representable_p (x))
3712 {
3713#define buf_size 20
3714 char float_buf[buf_size] = {'\0'};
3715 REAL_VALUE_TYPE r;
3716 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3717 real_to_decimal_for_mode (float_buf, &r,
3718 buf_size, buf_size,
3719 1, GET_MODE (x));
3720 asm_fprintf (asm_out_file, "%s", float_buf);
3721 break;
3722#undef buf_size
3723 }
3724 output_operand_lossage ("invalid constant");
3725 return;
43e9d192
IB
3726 default:
3727 output_operand_lossage ("invalid operand");
3728 return;
3729 }
3730 break;
3731
3732 case 'A':
3733 if (GET_CODE (x) == HIGH)
3734 x = XEXP (x, 0);
3735
3736 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3737 {
3738 case SYMBOL_SMALL_GOT:
3739 asm_fprintf (asm_out_file, ":got:");
3740 break;
3741
3742 case SYMBOL_SMALL_TLSGD:
3743 asm_fprintf (asm_out_file, ":tlsgd:");
3744 break;
3745
3746 case SYMBOL_SMALL_TLSDESC:
3747 asm_fprintf (asm_out_file, ":tlsdesc:");
3748 break;
3749
3750 case SYMBOL_SMALL_GOTTPREL:
3751 asm_fprintf (asm_out_file, ":gottprel:");
3752 break;
3753
3754 case SYMBOL_SMALL_TPREL:
3755 asm_fprintf (asm_out_file, ":tprel:");
3756 break;
3757
87dd8ab0
MS
3758 case SYMBOL_TINY_GOT:
3759 gcc_unreachable ();
3760 break;
3761
43e9d192
IB
3762 default:
3763 break;
3764 }
3765 output_addr_const (asm_out_file, x);
3766 break;
3767
3768 case 'L':
3769 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3770 {
3771 case SYMBOL_SMALL_GOT:
3772 asm_fprintf (asm_out_file, ":lo12:");
3773 break;
3774
3775 case SYMBOL_SMALL_TLSGD:
3776 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3777 break;
3778
3779 case SYMBOL_SMALL_TLSDESC:
3780 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3781 break;
3782
3783 case SYMBOL_SMALL_GOTTPREL:
3784 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3785 break;
3786
3787 case SYMBOL_SMALL_TPREL:
3788 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3789 break;
3790
87dd8ab0
MS
3791 case SYMBOL_TINY_GOT:
3792 asm_fprintf (asm_out_file, ":got:");
3793 break;
3794
43e9d192
IB
3795 default:
3796 break;
3797 }
3798 output_addr_const (asm_out_file, x);
3799 break;
3800
3801 case 'G':
3802
3803 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3804 {
3805 case SYMBOL_SMALL_TPREL:
3806 asm_fprintf (asm_out_file, ":tprel_hi12:");
3807 break;
3808 default:
3809 break;
3810 }
3811 output_addr_const (asm_out_file, x);
3812 break;
3813
3814 default:
3815 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3816 return;
3817 }
3818}
3819
3820void
3821aarch64_print_operand_address (FILE *f, rtx x)
3822{
3823 struct aarch64_address_info addr;
3824
3825 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3826 MEM, true))
3827 switch (addr.type)
3828 {
3829 case ADDRESS_REG_IMM:
3830 if (addr.offset == const0_rtx)
01a3a324 3831 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3832 else
01a3a324 3833 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3834 INTVAL (addr.offset));
3835 return;
3836
3837 case ADDRESS_REG_REG:
3838 if (addr.shift == 0)
01a3a324
N
3839 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3840 reg_names [REGNO (addr.offset)]);
43e9d192 3841 else
01a3a324
N
3842 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3843 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3844 return;
3845
3846 case ADDRESS_REG_UXTW:
3847 if (addr.shift == 0)
01a3a324 3848 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3849 REGNO (addr.offset) - R0_REGNUM);
3850 else
01a3a324 3851 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3852 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3853 return;
3854
3855 case ADDRESS_REG_SXTW:
3856 if (addr.shift == 0)
01a3a324 3857 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3858 REGNO (addr.offset) - R0_REGNUM);
3859 else
01a3a324 3860 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3861 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3862 return;
3863
3864 case ADDRESS_REG_WB:
3865 switch (GET_CODE (x))
3866 {
3867 case PRE_INC:
01a3a324 3868 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3869 GET_MODE_SIZE (aarch64_memory_reference_mode));
3870 return;
3871 case POST_INC:
01a3a324 3872 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3873 GET_MODE_SIZE (aarch64_memory_reference_mode));
3874 return;
3875 case PRE_DEC:
01a3a324 3876 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3877 GET_MODE_SIZE (aarch64_memory_reference_mode));
3878 return;
3879 case POST_DEC:
01a3a324 3880 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3881 GET_MODE_SIZE (aarch64_memory_reference_mode));
3882 return;
3883 case PRE_MODIFY:
01a3a324 3884 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3885 INTVAL (addr.offset));
3886 return;
3887 case POST_MODIFY:
01a3a324 3888 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3889 INTVAL (addr.offset));
3890 return;
3891 default:
3892 break;
3893 }
3894 break;
3895
3896 case ADDRESS_LO_SUM:
01a3a324 3897 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3898 output_addr_const (f, addr.offset);
3899 asm_fprintf (f, "]");
3900 return;
3901
3902 case ADDRESS_SYMBOLIC:
3903 break;
3904 }
3905
3906 output_addr_const (f, x);
3907}
3908
43e9d192
IB
3909bool
3910aarch64_label_mentioned_p (rtx x)
3911{
3912 const char *fmt;
3913 int i;
3914
3915 if (GET_CODE (x) == LABEL_REF)
3916 return true;
3917
3918 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3919 referencing instruction, but they are constant offsets, not
3920 symbols. */
3921 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3922 return false;
3923
3924 fmt = GET_RTX_FORMAT (GET_CODE (x));
3925 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3926 {
3927 if (fmt[i] == 'E')
3928 {
3929 int j;
3930
3931 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3932 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3933 return 1;
3934 }
3935 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3936 return 1;
3937 }
3938
3939 return 0;
3940}
3941
3942/* Implement REGNO_REG_CLASS. */
3943
3944enum reg_class
3945aarch64_regno_regclass (unsigned regno)
3946{
3947 if (GP_REGNUM_P (regno))
3948 return CORE_REGS;
3949
3950 if (regno == SP_REGNUM)
3951 return STACK_REG;
3952
3953 if (regno == FRAME_POINTER_REGNUM
3954 || regno == ARG_POINTER_REGNUM)
f24bb080 3955 return POINTER_REGS;
43e9d192
IB
3956
3957 if (FP_REGNUM_P (regno))
3958 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3959
3960 return NO_REGS;
3961}
3962
3963/* Try a machine-dependent way of reloading an illegitimate address
3964 operand. If we find one, push the reload and return the new rtx. */
3965
3966rtx
3967aarch64_legitimize_reload_address (rtx *x_p,
3968 enum machine_mode mode,
3969 int opnum, int type,
3970 int ind_levels ATTRIBUTE_UNUSED)
3971{
3972 rtx x = *x_p;
3973
3974 /* Do not allow mem (plus (reg, const)) if vector mode. */
3975 if (aarch64_vector_mode_p (mode)
3976 && GET_CODE (x) == PLUS
3977 && REG_P (XEXP (x, 0))
3978 && CONST_INT_P (XEXP (x, 1)))
3979 {
3980 rtx orig_rtx = x;
3981 x = copy_rtx (x);
3982 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3983 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3984 opnum, (enum reload_type) type);
3985 return x;
3986 }
3987
3988 /* We must recognize output that we have already generated ourselves. */
3989 if (GET_CODE (x) == PLUS
3990 && GET_CODE (XEXP (x, 0)) == PLUS
3991 && REG_P (XEXP (XEXP (x, 0), 0))
3992 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3993 && CONST_INT_P (XEXP (x, 1)))
3994 {
3995 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3996 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3997 opnum, (enum reload_type) type);
3998 return x;
3999 }
4000
4001 /* We wish to handle large displacements off a base register by splitting
4002 the addend across an add and the mem insn. This can cut the number of
4003 extra insns needed from 3 to 1. It is only useful for load/store of a
4004 single register with 12 bit offset field. */
4005 if (GET_CODE (x) == PLUS
4006 && REG_P (XEXP (x, 0))
4007 && CONST_INT_P (XEXP (x, 1))
4008 && HARD_REGISTER_P (XEXP (x, 0))
4009 && mode != TImode
4010 && mode != TFmode
4011 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4012 {
4013 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4014 HOST_WIDE_INT low = val & 0xfff;
4015 HOST_WIDE_INT high = val - low;
4016 HOST_WIDE_INT offs;
4017 rtx cst;
28514dda
YZ
4018 enum machine_mode xmode = GET_MODE (x);
4019
4020 /* In ILP32, xmode can be either DImode or SImode. */
4021 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4022
4023 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4024 BLKmode alignment. */
4025 if (GET_MODE_SIZE (mode) == 0)
4026 return NULL_RTX;
4027
4028 offs = low % GET_MODE_SIZE (mode);
4029
4030 /* Align misaligned offset by adjusting high part to compensate. */
4031 if (offs != 0)
4032 {
4033 if (aarch64_uimm12_shift (high + offs))
4034 {
4035 /* Align down. */
4036 low = low - offs;
4037 high = high + offs;
4038 }
4039 else
4040 {
4041 /* Align up. */
4042 offs = GET_MODE_SIZE (mode) - offs;
4043 low = low + offs;
4044 high = high + (low & 0x1000) - offs;
4045 low &= 0xfff;
4046 }
4047 }
4048
4049 /* Check for overflow. */
4050 if (high + low != val)
4051 return NULL_RTX;
4052
4053 cst = GEN_INT (high);
4054 if (!aarch64_uimm12_shift (high))
28514dda 4055 cst = force_const_mem (xmode, cst);
43e9d192
IB
4056
4057 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4058 in the mem instruction.
4059 Note that replacing this gen_rtx_PLUS with plus_constant is
4060 wrong in this case because we rely on the
4061 (plus (plus reg c1) c2) structure being preserved so that
4062 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4063 x = gen_rtx_PLUS (xmode,
4064 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4065 GEN_INT (low));
43e9d192
IB
4066
4067 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4068 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4069 opnum, (enum reload_type) type);
4070 return x;
4071 }
4072
4073 return NULL_RTX;
4074}
4075
4076
4077static reg_class_t
4078aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4079 reg_class_t rclass,
4080 enum machine_mode mode,
4081 secondary_reload_info *sri)
4082{
43e9d192
IB
4083 /* Without the TARGET_SIMD instructions we cannot move a Q register
4084 to a Q register directly. We need a scratch. */
4085 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4086 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4087 && reg_class_subset_p (rclass, FP_REGS))
4088 {
4089 if (mode == TFmode)
4090 sri->icode = CODE_FOR_aarch64_reload_movtf;
4091 else if (mode == TImode)
4092 sri->icode = CODE_FOR_aarch64_reload_movti;
4093 return NO_REGS;
4094 }
4095
4096 /* A TFmode or TImode memory access should be handled via an FP_REGS
4097 because AArch64 has richer addressing modes for LDR/STR instructions
4098 than LDP/STP instructions. */
4099 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4100 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4101 return FP_REGS;
4102
4103 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4104 return CORE_REGS;
4105
4106 return NO_REGS;
4107}
4108
4109static bool
4110aarch64_can_eliminate (const int from, const int to)
4111{
4112 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4113 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4114
4115 if (frame_pointer_needed)
4116 {
4117 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4118 return true;
4119 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4120 return false;
4121 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4122 && !cfun->calls_alloca)
4123 return true;
4124 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4125 return true;
4126 return false;
4127 }
4128 else
4129 {
777e6976
IB
4130 /* If we decided that we didn't need a leaf frame pointer but then used
4131 LR in the function, then we'll want a frame pointer after all, so
4132 prevent this elimination to ensure a frame pointer is used.
4133
4134 NOTE: the original value of flag_omit_frame_pointer gets trashed
4135 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4136 of faked_omit_frame_pointer here (which is true when we always
4137 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4138 pointers when LR is clobbered). */
77436791 4139 if (to == STACK_POINTER_REGNUM
777e6976
IB
4140 && df_regs_ever_live_p (LR_REGNUM)
4141 && faked_omit_frame_pointer)
43e9d192
IB
4142 return false;
4143 }
777e6976 4144
43e9d192
IB
4145 return true;
4146}
4147
4148HOST_WIDE_INT
4149aarch64_initial_elimination_offset (unsigned from, unsigned to)
4150{
4151 HOST_WIDE_INT frame_size;
4152 HOST_WIDE_INT offset;
4153
4154 aarch64_layout_frame ();
4155 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4156 + crtl->outgoing_args_size
4157 + cfun->machine->saved_varargs_size);
4158
4159 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4160 offset = frame_size;
4161
4162 if (to == HARD_FRAME_POINTER_REGNUM)
4163 {
4164 if (from == ARG_POINTER_REGNUM)
4165 return offset - crtl->outgoing_args_size;
4166
4167 if (from == FRAME_POINTER_REGNUM)
6991c977 4168 return cfun->machine->frame.saved_regs_size + get_frame_size ();
43e9d192
IB
4169 }
4170
4171 if (to == STACK_POINTER_REGNUM)
4172 {
4173 if (from == FRAME_POINTER_REGNUM)
4174 {
4175 HOST_WIDE_INT elim = crtl->outgoing_args_size
4176 + cfun->machine->frame.saved_regs_size
6991c977 4177 + get_frame_size ()
43e9d192
IB
4178 - cfun->machine->frame.fp_lr_offset;
4179 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4180 return elim;
4181 }
4182 }
4183
4184 return offset;
4185}
4186
4187
4188/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4189 previous frame. */
4190
4191rtx
4192aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4193{
4194 if (count != 0)
4195 return const0_rtx;
4196 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4197}
4198
4199
4200static void
4201aarch64_asm_trampoline_template (FILE *f)
4202{
28514dda
YZ
4203 if (TARGET_ILP32)
4204 {
4205 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4206 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4207 }
4208 else
4209 {
4210 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4211 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4212 }
01a3a324 4213 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4214 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4215 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4216 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4217}
4218
4219static void
4220aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4221{
4222 rtx fnaddr, mem, a_tramp;
28514dda 4223 const int tramp_code_sz = 16;
43e9d192
IB
4224
4225 /* Don't need to copy the trailing D-words, we fill those in below. */
4226 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4227 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4228 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4229 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4230 if (GET_MODE (fnaddr) != ptr_mode)
4231 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4232 emit_move_insn (mem, fnaddr);
4233
28514dda 4234 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4235 emit_move_insn (mem, chain_value);
4236
4237 /* XXX We should really define a "clear_cache" pattern and use
4238 gen_clear_cache(). */
4239 a_tramp = XEXP (m_tramp, 0);
4240 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4241 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4242 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4243 ptr_mode);
43e9d192
IB
4244}
4245
4246static unsigned char
4247aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4248{
4249 switch (regclass)
4250 {
4251 case CORE_REGS:
4252 case POINTER_REGS:
4253 case GENERAL_REGS:
4254 case ALL_REGS:
4255 case FP_REGS:
4256 case FP_LO_REGS:
4257 return
4258 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4259 (GET_MODE_SIZE (mode) + 7) / 8;
4260 case STACK_REG:
4261 return 1;
4262
4263 case NO_REGS:
4264 return 0;
4265
4266 default:
4267 break;
4268 }
4269 gcc_unreachable ();
4270}
4271
4272static reg_class_t
78d8b9f0 4273aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4274{
51bb310d 4275 if (regclass == POINTER_REGS)
78d8b9f0
IB
4276 return GENERAL_REGS;
4277
51bb310d
MS
4278 if (regclass == STACK_REG)
4279 {
4280 if (REG_P(x)
4281 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4282 return regclass;
4283
4284 return NO_REGS;
4285 }
4286
78d8b9f0
IB
4287 /* If it's an integer immediate that MOVI can't handle, then
4288 FP_REGS is not an option, so we return NO_REGS instead. */
4289 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4290 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4291 return NO_REGS;
4292
27bd251b
IB
4293 /* Register eliminiation can result in a request for
4294 SP+constant->FP_REGS. We cannot support such operations which
4295 use SP as source and an FP_REG as destination, so reject out
4296 right now. */
4297 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4298 {
4299 rtx lhs = XEXP (x, 0);
4300
4301 /* Look through a possible SUBREG introduced by ILP32. */
4302 if (GET_CODE (lhs) == SUBREG)
4303 lhs = SUBREG_REG (lhs);
4304
4305 gcc_assert (REG_P (lhs));
4306 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4307 POINTER_REGS));
4308 return NO_REGS;
4309 }
4310
78d8b9f0 4311 return regclass;
43e9d192
IB
4312}
4313
4314void
4315aarch64_asm_output_labelref (FILE* f, const char *name)
4316{
4317 asm_fprintf (f, "%U%s", name);
4318}
4319
4320static void
4321aarch64_elf_asm_constructor (rtx symbol, int priority)
4322{
4323 if (priority == DEFAULT_INIT_PRIORITY)
4324 default_ctor_section_asm_out_constructor (symbol, priority);
4325 else
4326 {
4327 section *s;
4328 char buf[18];
4329 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4330 s = get_section (buf, SECTION_WRITE, NULL);
4331 switch_to_section (s);
4332 assemble_align (POINTER_SIZE);
28514dda 4333 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4334 }
4335}
4336
4337static void
4338aarch64_elf_asm_destructor (rtx symbol, int priority)
4339{
4340 if (priority == DEFAULT_INIT_PRIORITY)
4341 default_dtor_section_asm_out_destructor (symbol, priority);
4342 else
4343 {
4344 section *s;
4345 char buf[18];
4346 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4347 s = get_section (buf, SECTION_WRITE, NULL);
4348 switch_to_section (s);
4349 assemble_align (POINTER_SIZE);
28514dda 4350 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4351 }
4352}
4353
4354const char*
4355aarch64_output_casesi (rtx *operands)
4356{
4357 char buf[100];
4358 char label[100];
592a16fc 4359 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4360 int index;
4361 static const char *const patterns[4][2] =
4362 {
4363 {
4364 "ldrb\t%w3, [%0,%w1,uxtw]",
4365 "add\t%3, %4, %w3, sxtb #2"
4366 },
4367 {
4368 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4369 "add\t%3, %4, %w3, sxth #2"
4370 },
4371 {
4372 "ldr\t%w3, [%0,%w1,uxtw #2]",
4373 "add\t%3, %4, %w3, sxtw #2"
4374 },
4375 /* We assume that DImode is only generated when not optimizing and
4376 that we don't really need 64-bit address offsets. That would
4377 imply an object file with 8GB of code in a single function! */
4378 {
4379 "ldr\t%w3, [%0,%w1,uxtw #2]",
4380 "add\t%3, %4, %w3, sxtw #2"
4381 }
4382 };
4383
4384 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4385
4386 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4387
4388 gcc_assert (index >= 0 && index <= 3);
4389
4390 /* Need to implement table size reduction, by chaning the code below. */
4391 output_asm_insn (patterns[index][0], operands);
4392 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4393 snprintf (buf, sizeof (buf),
4394 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4395 output_asm_insn (buf, operands);
4396 output_asm_insn (patterns[index][1], operands);
4397 output_asm_insn ("br\t%3", operands);
4398 assemble_label (asm_out_file, label);
4399 return "";
4400}
4401
4402
4403/* Return size in bits of an arithmetic operand which is shifted/scaled and
4404 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4405 operator. */
4406
4407int
4408aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4409{
4410 if (shift >= 0 && shift <= 3)
4411 {
4412 int size;
4413 for (size = 8; size <= 32; size *= 2)
4414 {
4415 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4416 if (mask == bits << shift)
4417 return size;
4418 }
4419 }
4420 return 0;
4421}
4422
4423static bool
4424aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4425 const_rtx x ATTRIBUTE_UNUSED)
4426{
4427 /* We can't use blocks for constants when we're using a per-function
4428 constant pool. */
4429 return false;
4430}
4431
4432static section *
4433aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4434 rtx x ATTRIBUTE_UNUSED,
4435 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4436{
4437 /* Force all constant pool entries into the current function section. */
4438 return function_section (current_function_decl);
4439}
4440
4441
4442/* Costs. */
4443
4444/* Helper function for rtx cost calculation. Strip a shift expression
4445 from X. Returns the inner operand if successful, or the original
4446 expression on failure. */
4447static rtx
4448aarch64_strip_shift (rtx x)
4449{
4450 rtx op = x;
4451
4452 if ((GET_CODE (op) == ASHIFT
4453 || GET_CODE (op) == ASHIFTRT
4454 || GET_CODE (op) == LSHIFTRT)
4455 && CONST_INT_P (XEXP (op, 1)))
4456 return XEXP (op, 0);
4457
4458 if (GET_CODE (op) == MULT
4459 && CONST_INT_P (XEXP (op, 1))
4460 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4461 return XEXP (op, 0);
4462
4463 return x;
4464}
4465
4466/* Helper function for rtx cost calculation. Strip a shift or extend
4467 expression from X. Returns the inner operand if successful, or the
4468 original expression on failure. We deal with a number of possible
4469 canonicalization variations here. */
4470static rtx
4471aarch64_strip_shift_or_extend (rtx x)
4472{
4473 rtx op = x;
4474
4475 /* Zero and sign extraction of a widened value. */
4476 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4477 && XEXP (op, 2) == const0_rtx
4478 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4479 XEXP (op, 1)))
4480 return XEXP (XEXP (op, 0), 0);
4481
4482 /* It can also be represented (for zero-extend) as an AND with an
4483 immediate. */
4484 if (GET_CODE (op) == AND
4485 && GET_CODE (XEXP (op, 0)) == MULT
4486 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4487 && CONST_INT_P (XEXP (op, 1))
4488 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4489 INTVAL (XEXP (op, 1))) != 0)
4490 return XEXP (XEXP (op, 0), 0);
4491
4492 /* Now handle extended register, as this may also have an optional
4493 left shift by 1..4. */
4494 if (GET_CODE (op) == ASHIFT
4495 && CONST_INT_P (XEXP (op, 1))
4496 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4497 op = XEXP (op, 0);
4498
4499 if (GET_CODE (op) == ZERO_EXTEND
4500 || GET_CODE (op) == SIGN_EXTEND)
4501 op = XEXP (op, 0);
4502
4503 if (op != x)
4504 return op;
4505
4506 return aarch64_strip_shift (x);
4507}
4508
4509/* Calculate the cost of calculating X, storing it in *COST. Result
4510 is true if the total cost of the operation has now been calculated. */
4511static bool
4512aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4513 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4514{
4515 rtx op0, op1;
73250c4c 4516 const struct cpu_cost_table *extra_cost
43e9d192
IB
4517 = aarch64_tune_params->insn_extra_cost;
4518
4519 switch (code)
4520 {
4521 case SET:
4522 op0 = SET_DEST (x);
4523 op1 = SET_SRC (x);
4524
4525 switch (GET_CODE (op0))
4526 {
4527 case MEM:
4528 if (speed)
73250c4c 4529 *cost += extra_cost->ldst.store;
43e9d192
IB
4530
4531 if (op1 != const0_rtx)
4532 *cost += rtx_cost (op1, SET, 1, speed);
4533 return true;
4534
4535 case SUBREG:
4536 if (! REG_P (SUBREG_REG (op0)))
4537 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4538 /* Fall through. */
4539 case REG:
4540 /* Cost is just the cost of the RHS of the set. */
4541 *cost += rtx_cost (op1, SET, 1, true);
4542 return true;
4543
4544 case ZERO_EXTRACT: /* Bit-field insertion. */
4545 case SIGN_EXTRACT:
4546 /* Strip any redundant widening of the RHS to meet the width of
4547 the target. */
4548 if (GET_CODE (op1) == SUBREG)
4549 op1 = SUBREG_REG (op1);
4550 if ((GET_CODE (op1) == ZERO_EXTEND
4551 || GET_CODE (op1) == SIGN_EXTEND)
4552 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4553 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4554 >= INTVAL (XEXP (op0, 1))))
4555 op1 = XEXP (op1, 0);
4556 *cost += rtx_cost (op1, SET, 1, speed);
4557 return true;
4558
4559 default:
4560 break;
4561 }
4562 return false;
4563
4564 case MEM:
4565 if (speed)
73250c4c 4566 *cost += extra_cost->ldst.load;
43e9d192
IB
4567
4568 return true;
4569
4570 case NEG:
4571 op0 = CONST0_RTX (GET_MODE (x));
4572 op1 = XEXP (x, 0);
4573 goto cost_minus;
4574
4575 case COMPARE:
4576 op0 = XEXP (x, 0);
4577 op1 = XEXP (x, 1);
4578
4579 if (op1 == const0_rtx
4580 && GET_CODE (op0) == AND)
4581 {
4582 x = op0;
4583 goto cost_logic;
4584 }
4585
4586 /* Comparisons can work if the order is swapped.
4587 Canonicalization puts the more complex operation first, but
4588 we want it in op1. */
4589 if (! (REG_P (op0)
4590 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4591 {
4592 op0 = XEXP (x, 1);
4593 op1 = XEXP (x, 0);
4594 }
4595 goto cost_minus;
4596
4597 case MINUS:
4598 op0 = XEXP (x, 0);
4599 op1 = XEXP (x, 1);
4600
4601 cost_minus:
4602 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4603 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4604 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4605 {
4606 if (op0 != const0_rtx)
4607 *cost += rtx_cost (op0, MINUS, 0, speed);
4608
4609 if (CONST_INT_P (op1))
4610 {
4611 if (!aarch64_uimm12_shift (INTVAL (op1)))
4612 *cost += rtx_cost (op1, MINUS, 1, speed);
4613 }
4614 else
4615 {
4616 op1 = aarch64_strip_shift_or_extend (op1);
4617 *cost += rtx_cost (op1, MINUS, 1, speed);
4618 }
4619 return true;
4620 }
4621
4622 return false;
4623
4624 case PLUS:
4625 op0 = XEXP (x, 0);
4626 op1 = XEXP (x, 1);
4627
4628 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4629 {
4630 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4631 {
4632 *cost += rtx_cost (op0, PLUS, 0, speed);
4633 }
4634 else
4635 {
4636 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4637
4638 if (new_op0 == op0
4639 && GET_CODE (op0) == MULT)
4640 {
4641 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4642 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4643 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4644 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4645 {
4646 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4647 speed)
4648 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4649 speed)
4650 + rtx_cost (op1, PLUS, 1, speed));
4651 if (speed)
73250c4c
KT
4652 *cost +=
4653 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
43e9d192
IB
4654 return true;
4655 }
328402a9 4656
43e9d192
IB
4657 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4658 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4659 + rtx_cost (op1, PLUS, 1, speed));
4660
4661 if (speed)
73250c4c 4662 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
328402a9
RE
4663
4664 return true;
43e9d192
IB
4665 }
4666
4667 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4668 + rtx_cost (op1, PLUS, 1, speed));
4669 }
4670 return true;
4671 }
4672
4673 return false;
4674
4675 case IOR:
4676 case XOR:
4677 case AND:
4678 cost_logic:
4679 op0 = XEXP (x, 0);
4680 op1 = XEXP (x, 1);
4681
4682 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4683 {
4684 if (CONST_INT_P (op1)
4685 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4686 {
4687 *cost += rtx_cost (op0, AND, 0, speed);
4688 }
4689 else
4690 {
4691 if (GET_CODE (op0) == NOT)
4692 op0 = XEXP (op0, 0);
4693 op0 = aarch64_strip_shift (op0);
4694 *cost += (rtx_cost (op0, AND, 0, speed)
4695 + rtx_cost (op1, AND, 1, speed));
4696 }
4697 return true;
4698 }
4699 return false;
4700
4701 case ZERO_EXTEND:
4702 if ((GET_MODE (x) == DImode
4703 && GET_MODE (XEXP (x, 0)) == SImode)
4704 || GET_CODE (XEXP (x, 0)) == MEM)
4705 {
4706 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4707 return true;
4708 }
4709 return false;
4710
4711 case SIGN_EXTEND:
4712 if (GET_CODE (XEXP (x, 0)) == MEM)
4713 {
4714 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4715 return true;
4716 }
4717 return false;
4718
4719 case ROTATE:
4720 if (!CONST_INT_P (XEXP (x, 1)))
4721 *cost += COSTS_N_INSNS (2);
4722 /* Fall through. */
4723 case ROTATERT:
4724 case LSHIFTRT:
4725 case ASHIFT:
4726 case ASHIFTRT:
4727
4728 /* Shifting by a register often takes an extra cycle. */
4729 if (speed && !CONST_INT_P (XEXP (x, 1)))
73250c4c 4730 *cost += extra_cost->alu.arith_shift_reg;
43e9d192
IB
4731
4732 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4733 return true;
4734
4735 case HIGH:
4736 if (!CONSTANT_P (XEXP (x, 0)))
4737 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4738 return true;
4739
4740 case LO_SUM:
4741 if (!CONSTANT_P (XEXP (x, 1)))
4742 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4743 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4744 return true;
4745
4746 case ZERO_EXTRACT:
4747 case SIGN_EXTRACT:
4748 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4749 return true;
4750
4751 case MULT:
4752 op0 = XEXP (x, 0);
4753 op1 = XEXP (x, 1);
4754
4755 *cost = COSTS_N_INSNS (1);
4756 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4757 {
4758 if (CONST_INT_P (op1)
4759 && exact_log2 (INTVAL (op1)) > 0)
4760 {
4761 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4762 return true;
4763 }
4764
4765 if ((GET_CODE (op0) == ZERO_EXTEND
4766 && GET_CODE (op1) == ZERO_EXTEND)
4767 || (GET_CODE (op0) == SIGN_EXTEND
4768 && GET_CODE (op1) == SIGN_EXTEND))
4769 {
4770 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4771 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4772 if (speed)
73250c4c 4773 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
43e9d192
IB
4774 return true;
4775 }
4776
4777 if (speed)
73250c4c 4778 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
43e9d192
IB
4779 }
4780 else if (speed)
4781 {
4782 if (GET_MODE (x) == DFmode)
73250c4c 4783 *cost += extra_cost->fp[1].mult;
43e9d192 4784 else if (GET_MODE (x) == SFmode)
73250c4c 4785 *cost += extra_cost->fp[0].mult;
43e9d192
IB
4786 }
4787
4788 return false; /* All arguments need to be in registers. */
4789
4790 case MOD:
4791 case UMOD:
4792 *cost = COSTS_N_INSNS (2);
4793 if (speed)
4794 {
4795 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
4796 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4797 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 4798 else if (GET_MODE (x) == DFmode)
73250c4c
KT
4799 *cost += (extra_cost->fp[1].mult
4800 + extra_cost->fp[1].div);
43e9d192 4801 else if (GET_MODE (x) == SFmode)
73250c4c
KT
4802 *cost += (extra_cost->fp[0].mult
4803 + extra_cost->fp[0].div);
43e9d192
IB
4804 }
4805 return false; /* All arguments need to be in registers. */
4806
4807 case DIV:
4808 case UDIV:
4809 *cost = COSTS_N_INSNS (1);
4810 if (speed)
4811 {
4812 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c 4813 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
43e9d192 4814 else if (GET_MODE (x) == DFmode)
73250c4c 4815 *cost += extra_cost->fp[1].div;
43e9d192 4816 else if (GET_MODE (x) == SFmode)
73250c4c 4817 *cost += extra_cost->fp[0].div;
43e9d192
IB
4818 }
4819 return false; /* All arguments need to be in registers. */
4820
4821 default:
4822 break;
4823 }
4824 return false;
4825}
4826
4827static int
4828aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4829 enum machine_mode mode ATTRIBUTE_UNUSED,
4830 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4831{
4832 enum rtx_code c = GET_CODE (x);
4833 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4834
4835 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4836 return addr_cost->pre_modify;
4837
4838 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4839 return addr_cost->post_modify;
4840
4841 if (c == PLUS)
4842 {
4843 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4844 return addr_cost->imm_offset;
4845 else if (GET_CODE (XEXP (x, 0)) == MULT
4846 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4847 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4848 return addr_cost->register_extend;
4849
4850 return addr_cost->register_offset;
4851 }
4852 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4853 return addr_cost->imm_offset;
4854
4855 return 0;
4856}
4857
4858static int
4859aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4860 reg_class_t from, reg_class_t to)
4861{
4862 const struct cpu_regmove_cost *regmove_cost
4863 = aarch64_tune_params->regmove_cost;
4864
6ee70f81
AP
4865 /* Moving between GPR and stack cost is the same as GP2GP. */
4866 if ((from == GENERAL_REGS && to == STACK_REG)
4867 || (to == GENERAL_REGS && from == STACK_REG))
4868 return regmove_cost->GP2GP;
4869
4870 /* To/From the stack register, we move via the gprs. */
4871 if (to == STACK_REG || from == STACK_REG)
4872 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
4873 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
4874
43e9d192
IB
4875 if (from == GENERAL_REGS && to == GENERAL_REGS)
4876 return regmove_cost->GP2GP;
4877 else if (from == GENERAL_REGS)
4878 return regmove_cost->GP2FP;
4879 else if (to == GENERAL_REGS)
4880 return regmove_cost->FP2GP;
4881
4882 /* When AdvSIMD instructions are disabled it is not possible to move
4883 a 128-bit value directly between Q registers. This is handled in
4884 secondary reload. A general register is used as a scratch to move
4885 the upper DI value and the lower DI value is moved directly,
4886 hence the cost is the sum of three moves. */
4887
4888 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4889 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4890
4891 return regmove_cost->FP2FP;
4892}
4893
4894static int
4895aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4896 reg_class_t rclass ATTRIBUTE_UNUSED,
4897 bool in ATTRIBUTE_UNUSED)
4898{
4899 return aarch64_tune_params->memmov_cost;
4900}
4901
d126a4ae
AP
4902/* Return the number of instructions that can be issued per cycle. */
4903static int
4904aarch64_sched_issue_rate (void)
4905{
4906 return aarch64_tune_params->issue_rate;
4907}
4908
8990e73a
TB
4909/* Vectorizer cost model target hooks. */
4910
4911/* Implement targetm.vectorize.builtin_vectorization_cost. */
4912static int
4913aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4914 tree vectype,
4915 int misalign ATTRIBUTE_UNUSED)
4916{
4917 unsigned elements;
4918
4919 switch (type_of_cost)
4920 {
4921 case scalar_stmt:
4922 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4923
4924 case scalar_load:
4925 return aarch64_tune_params->vec_costs->scalar_load_cost;
4926
4927 case scalar_store:
4928 return aarch64_tune_params->vec_costs->scalar_store_cost;
4929
4930 case vector_stmt:
4931 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4932
4933 case vector_load:
4934 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4935
4936 case vector_store:
4937 return aarch64_tune_params->vec_costs->vec_store_cost;
4938
4939 case vec_to_scalar:
4940 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4941
4942 case scalar_to_vec:
4943 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4944
4945 case unaligned_load:
4946 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4947
4948 case unaligned_store:
4949 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4950
4951 case cond_branch_taken:
4952 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4953
4954 case cond_branch_not_taken:
4955 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4956
4957 case vec_perm:
4958 case vec_promote_demote:
4959 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4960
4961 case vec_construct:
4962 elements = TYPE_VECTOR_SUBPARTS (vectype);
4963 return elements / 2 + 1;
4964
4965 default:
4966 gcc_unreachable ();
4967 }
4968}
4969
4970/* Implement targetm.vectorize.add_stmt_cost. */
4971static unsigned
4972aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4973 struct _stmt_vec_info *stmt_info, int misalign,
4974 enum vect_cost_model_location where)
4975{
4976 unsigned *cost = (unsigned *) data;
4977 unsigned retval = 0;
4978
4979 if (flag_vect_cost_model)
4980 {
4981 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4982 int stmt_cost =
4983 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4984
4985 /* Statements in an inner loop relative to the loop being
4986 vectorized are weighted more heavily. The value here is
4987 a function (linear for now) of the loop nest level. */
4988 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4989 {
4990 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4991 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4992 unsigned nest_level = loop_depth (loop);
4993
4994 count *= nest_level;
4995 }
4996
4997 retval = (unsigned) (count * stmt_cost);
4998 cost[where] += retval;
4999 }
5000
5001 return retval;
5002}
5003
43e9d192
IB
5004static void initialize_aarch64_code_model (void);
5005
5006/* Parse the architecture extension string. */
5007
5008static void
5009aarch64_parse_extension (char *str)
5010{
5011 /* The extension string is parsed left to right. */
5012 const struct aarch64_option_extension *opt = NULL;
5013
5014 /* Flag to say whether we are adding or removing an extension. */
5015 int adding_ext = -1;
5016
5017 while (str != NULL && *str != 0)
5018 {
5019 char *ext;
5020 size_t len;
5021
5022 str++;
5023 ext = strchr (str, '+');
5024
5025 if (ext != NULL)
5026 len = ext - str;
5027 else
5028 len = strlen (str);
5029
5030 if (len >= 2 && strncmp (str, "no", 2) == 0)
5031 {
5032 adding_ext = 0;
5033 len -= 2;
5034 str += 2;
5035 }
5036 else if (len > 0)
5037 adding_ext = 1;
5038
5039 if (len == 0)
5040 {
5041 error ("missing feature modifier after %qs", "+no");
5042 return;
5043 }
5044
5045 /* Scan over the extensions table trying to find an exact match. */
5046 for (opt = all_extensions; opt->name != NULL; opt++)
5047 {
5048 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5049 {
5050 /* Add or remove the extension. */
5051 if (adding_ext)
5052 aarch64_isa_flags |= opt->flags_on;
5053 else
5054 aarch64_isa_flags &= ~(opt->flags_off);
5055 break;
5056 }
5057 }
5058
5059 if (opt->name == NULL)
5060 {
5061 /* Extension not found in list. */
5062 error ("unknown feature modifier %qs", str);
5063 return;
5064 }
5065
5066 str = ext;
5067 };
5068
5069 return;
5070}
5071
5072/* Parse the ARCH string. */
5073
5074static void
5075aarch64_parse_arch (void)
5076{
5077 char *ext;
5078 const struct processor *arch;
5079 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5080 size_t len;
5081
5082 strcpy (str, aarch64_arch_string);
5083
5084 ext = strchr (str, '+');
5085
5086 if (ext != NULL)
5087 len = ext - str;
5088 else
5089 len = strlen (str);
5090
5091 if (len == 0)
5092 {
5093 error ("missing arch name in -march=%qs", str);
5094 return;
5095 }
5096
5097 /* Loop through the list of supported ARCHs to find a match. */
5098 for (arch = all_architectures; arch->name != NULL; arch++)
5099 {
5100 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5101 {
5102 selected_arch = arch;
5103 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
5104
5105 if (!selected_cpu)
5106 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
5107
5108 if (ext != NULL)
5109 {
5110 /* ARCH string contains at least one extension. */
5111 aarch64_parse_extension (ext);
5112 }
5113
ffee7aa9
JG
5114 if (strcmp (selected_arch->arch, selected_cpu->arch))
5115 {
5116 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5117 selected_cpu->name, selected_arch->name);
5118 }
5119
43e9d192
IB
5120 return;
5121 }
5122 }
5123
5124 /* ARCH name not found in list. */
5125 error ("unknown value %qs for -march", str);
5126 return;
5127}
5128
5129/* Parse the CPU string. */
5130
5131static void
5132aarch64_parse_cpu (void)
5133{
5134 char *ext;
5135 const struct processor *cpu;
5136 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5137 size_t len;
5138
5139 strcpy (str, aarch64_cpu_string);
5140
5141 ext = strchr (str, '+');
5142
5143 if (ext != NULL)
5144 len = ext - str;
5145 else
5146 len = strlen (str);
5147
5148 if (len == 0)
5149 {
5150 error ("missing cpu name in -mcpu=%qs", str);
5151 return;
5152 }
5153
5154 /* Loop through the list of supported CPUs to find a match. */
5155 for (cpu = all_cores; cpu->name != NULL; cpu++)
5156 {
5157 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5158 {
5159 selected_cpu = cpu;
192ed1dd 5160 selected_tune = cpu;
43e9d192
IB
5161 aarch64_isa_flags = selected_cpu->flags;
5162
5163 if (ext != NULL)
5164 {
5165 /* CPU string contains at least one extension. */
5166 aarch64_parse_extension (ext);
5167 }
5168
5169 return;
5170 }
5171 }
5172
5173 /* CPU name not found in list. */
5174 error ("unknown value %qs for -mcpu", str);
5175 return;
5176}
5177
5178/* Parse the TUNE string. */
5179
5180static void
5181aarch64_parse_tune (void)
5182{
5183 const struct processor *cpu;
5184 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5185 strcpy (str, aarch64_tune_string);
5186
5187 /* Loop through the list of supported CPUs to find a match. */
5188 for (cpu = all_cores; cpu->name != NULL; cpu++)
5189 {
5190 if (strcmp (cpu->name, str) == 0)
5191 {
5192 selected_tune = cpu;
5193 return;
5194 }
5195 }
5196
5197 /* CPU name not found in list. */
5198 error ("unknown value %qs for -mtune", str);
5199 return;
5200}
5201
5202
5203/* Implement TARGET_OPTION_OVERRIDE. */
5204
5205static void
5206aarch64_override_options (void)
5207{
ffee7aa9
JG
5208 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5209 If either of -march or -mtune is given, they override their
5210 respective component of -mcpu.
43e9d192 5211
ffee7aa9
JG
5212 So, first parse AARCH64_CPU_STRING, then the others, be careful
5213 with -march as, if -mcpu is not present on the command line, march
5214 must set a sensible default CPU. */
5215 if (aarch64_cpu_string)
43e9d192 5216 {
ffee7aa9 5217 aarch64_parse_cpu ();
43e9d192
IB
5218 }
5219
ffee7aa9 5220 if (aarch64_arch_string)
43e9d192 5221 {
ffee7aa9 5222 aarch64_parse_arch ();
43e9d192
IB
5223 }
5224
5225 if (aarch64_tune_string)
5226 {
5227 aarch64_parse_tune ();
5228 }
5229
63892fa2
KV
5230#ifndef HAVE_AS_MABI_OPTION
5231 /* The compiler may have been configured with 2.23.* binutils, which does
5232 not have support for ILP32. */
5233 if (TARGET_ILP32)
5234 error ("Assembler does not support -mabi=ilp32");
5235#endif
5236
43e9d192
IB
5237 initialize_aarch64_code_model ();
5238
5239 aarch64_build_bitmask_table ();
5240
5241 /* This target defaults to strict volatile bitfields. */
5242 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5243 flag_strict_volatile_bitfields = 1;
5244
5245 /* If the user did not specify a processor, choose the default
5246 one for them. This will be the CPU set during configuration using
02fdbd5b 5247 --with-cpu, otherwise it is "coretex-a53". */
43e9d192
IB
5248 if (!selected_cpu)
5249 {
5250 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5251 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5252 }
5253
5254 gcc_assert (selected_cpu);
5255
5256 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5257 if (!selected_tune)
5258 selected_tune = &all_cores[selected_cpu->core];
5259
5260 aarch64_tune_flags = selected_tune->flags;
5261 aarch64_tune = selected_tune->core;
5262 aarch64_tune_params = selected_tune->tune;
5263
5264 aarch64_override_options_after_change ();
5265}
5266
5267/* Implement targetm.override_options_after_change. */
5268
5269static void
5270aarch64_override_options_after_change (void)
5271{
5272 faked_omit_frame_pointer = false;
5273
5274 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5275 that aarch64_frame_pointer_required will be called. We need to remember
5276 whether flag_omit_frame_pointer was turned on normally or just faked. */
5277
5278 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5279 {
5280 flag_omit_frame_pointer = true;
5281 faked_omit_frame_pointer = true;
5282 }
5283}
5284
5285static struct machine_function *
5286aarch64_init_machine_status (void)
5287{
5288 struct machine_function *machine;
5289 machine = ggc_alloc_cleared_machine_function ();
5290 return machine;
5291}
5292
5293void
5294aarch64_init_expanders (void)
5295{
5296 init_machine_status = aarch64_init_machine_status;
5297}
5298
5299/* A checking mechanism for the implementation of the various code models. */
5300static void
5301initialize_aarch64_code_model (void)
5302{
5303 if (flag_pic)
5304 {
5305 switch (aarch64_cmodel_var)
5306 {
5307 case AARCH64_CMODEL_TINY:
5308 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5309 break;
5310 case AARCH64_CMODEL_SMALL:
5311 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5312 break;
5313 case AARCH64_CMODEL_LARGE:
5314 sorry ("code model %qs with -f%s", "large",
5315 flag_pic > 1 ? "PIC" : "pic");
5316 default:
5317 gcc_unreachable ();
5318 }
5319 }
5320 else
5321 aarch64_cmodel = aarch64_cmodel_var;
5322}
5323
5324/* Return true if SYMBOL_REF X binds locally. */
5325
5326static bool
5327aarch64_symbol_binds_local_p (const_rtx x)
5328{
5329 return (SYMBOL_REF_DECL (x)
5330 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5331 : SYMBOL_REF_LOCAL_P (x));
5332}
5333
5334/* Return true if SYMBOL_REF X is thread local */
5335static bool
5336aarch64_tls_symbol_p (rtx x)
5337{
5338 if (! TARGET_HAVE_TLS)
5339 return false;
5340
5341 if (GET_CODE (x) != SYMBOL_REF)
5342 return false;
5343
5344 return SYMBOL_REF_TLS_MODEL (x) != 0;
5345}
5346
5347/* Classify a TLS symbol into one of the TLS kinds. */
5348enum aarch64_symbol_type
5349aarch64_classify_tls_symbol (rtx x)
5350{
5351 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5352
5353 switch (tls_kind)
5354 {
5355 case TLS_MODEL_GLOBAL_DYNAMIC:
5356 case TLS_MODEL_LOCAL_DYNAMIC:
5357 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5358
5359 case TLS_MODEL_INITIAL_EXEC:
5360 return SYMBOL_SMALL_GOTTPREL;
5361
5362 case TLS_MODEL_LOCAL_EXEC:
5363 return SYMBOL_SMALL_TPREL;
5364
5365 case TLS_MODEL_EMULATED:
5366 case TLS_MODEL_NONE:
5367 return SYMBOL_FORCE_TO_MEM;
5368
5369 default:
5370 gcc_unreachable ();
5371 }
5372}
5373
5374/* Return the method that should be used to access SYMBOL_REF or
5375 LABEL_REF X in context CONTEXT. */
17f4d4bf 5376
43e9d192
IB
5377enum aarch64_symbol_type
5378aarch64_classify_symbol (rtx x,
5379 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5380{
5381 if (GET_CODE (x) == LABEL_REF)
5382 {
5383 switch (aarch64_cmodel)
5384 {
5385 case AARCH64_CMODEL_LARGE:
5386 return SYMBOL_FORCE_TO_MEM;
5387
5388 case AARCH64_CMODEL_TINY_PIC:
5389 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5390 return SYMBOL_TINY_ABSOLUTE;
5391
43e9d192
IB
5392 case AARCH64_CMODEL_SMALL_PIC:
5393 case AARCH64_CMODEL_SMALL:
5394 return SYMBOL_SMALL_ABSOLUTE;
5395
5396 default:
5397 gcc_unreachable ();
5398 }
5399 }
5400
17f4d4bf 5401 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5402 {
17f4d4bf
CSS
5403 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5404 || CONSTANT_POOL_ADDRESS_P (x))
43e9d192
IB
5405 return SYMBOL_FORCE_TO_MEM;
5406
5407 if (aarch64_tls_symbol_p (x))
5408 return aarch64_classify_tls_symbol (x);
5409
17f4d4bf
CSS
5410 switch (aarch64_cmodel)
5411 {
5412 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5413 if (SYMBOL_REF_WEAK (x))
5414 return SYMBOL_FORCE_TO_MEM;
5415 return SYMBOL_TINY_ABSOLUTE;
5416
17f4d4bf
CSS
5417 case AARCH64_CMODEL_SMALL:
5418 if (SYMBOL_REF_WEAK (x))
5419 return SYMBOL_FORCE_TO_MEM;
5420 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5421
17f4d4bf 5422 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 5423 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 5424 return SYMBOL_TINY_GOT;
38e6c9a6
MS
5425 return SYMBOL_TINY_ABSOLUTE;
5426
17f4d4bf
CSS
5427 case AARCH64_CMODEL_SMALL_PIC:
5428 if (!aarch64_symbol_binds_local_p (x))
5429 return SYMBOL_SMALL_GOT;
5430 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5431
17f4d4bf
CSS
5432 default:
5433 gcc_unreachable ();
5434 }
43e9d192 5435 }
17f4d4bf 5436
43e9d192
IB
5437 /* By default push everything into the constant pool. */
5438 return SYMBOL_FORCE_TO_MEM;
5439}
5440
43e9d192
IB
5441bool
5442aarch64_constant_address_p (rtx x)
5443{
5444 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5445}
5446
5447bool
5448aarch64_legitimate_pic_operand_p (rtx x)
5449{
5450 if (GET_CODE (x) == SYMBOL_REF
5451 || (GET_CODE (x) == CONST
5452 && GET_CODE (XEXP (x, 0)) == PLUS
5453 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5454 return false;
5455
5456 return true;
5457}
5458
3520f7cc
JG
5459/* Return true if X holds either a quarter-precision or
5460 floating-point +0.0 constant. */
5461static bool
5462aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5463{
5464 if (!CONST_DOUBLE_P (x))
5465 return false;
5466
5467 /* TODO: We could handle moving 0.0 to a TFmode register,
5468 but first we would like to refactor the movtf_aarch64
5469 to be more amicable to split moves properly and
5470 correctly gate on TARGET_SIMD. For now - reject all
5471 constants which are not to SFmode or DFmode registers. */
5472 if (!(mode == SFmode || mode == DFmode))
5473 return false;
5474
5475 if (aarch64_float_const_zero_rtx_p (x))
5476 return true;
5477 return aarch64_float_const_representable_p (x);
5478}
5479
43e9d192
IB
5480static bool
5481aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5482{
5483 /* Do not allow vector struct mode constants. We could support
5484 0 and -1 easily, but they need support in aarch64-simd.md. */
5485 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5486 return false;
5487
5488 /* This could probably go away because
5489 we now decompose CONST_INTs according to expand_mov_immediate. */
5490 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5491 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5492 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5493 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5494
5495 if (GET_CODE (x) == HIGH
5496 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5497 return true;
5498
5499 return aarch64_constant_address_p (x);
5500}
5501
a5bc806c 5502rtx
43e9d192
IB
5503aarch64_load_tp (rtx target)
5504{
5505 if (!target
5506 || GET_MODE (target) != Pmode
5507 || !register_operand (target, Pmode))
5508 target = gen_reg_rtx (Pmode);
5509
5510 /* Can return in any reg. */
5511 emit_insn (gen_aarch64_load_tp_hard (target));
5512 return target;
5513}
5514
43e9d192
IB
5515/* On AAPCS systems, this is the "struct __va_list". */
5516static GTY(()) tree va_list_type;
5517
5518/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5519 Return the type to use as __builtin_va_list.
5520
5521 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5522
5523 struct __va_list
5524 {
5525 void *__stack;
5526 void *__gr_top;
5527 void *__vr_top;
5528 int __gr_offs;
5529 int __vr_offs;
5530 }; */
5531
5532static tree
5533aarch64_build_builtin_va_list (void)
5534{
5535 tree va_list_name;
5536 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5537
5538 /* Create the type. */
5539 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5540 /* Give it the required name. */
5541 va_list_name = build_decl (BUILTINS_LOCATION,
5542 TYPE_DECL,
5543 get_identifier ("__va_list"),
5544 va_list_type);
5545 DECL_ARTIFICIAL (va_list_name) = 1;
5546 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5547 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5548
5549 /* Create the fields. */
5550 f_stack = build_decl (BUILTINS_LOCATION,
5551 FIELD_DECL, get_identifier ("__stack"),
5552 ptr_type_node);
5553 f_grtop = build_decl (BUILTINS_LOCATION,
5554 FIELD_DECL, get_identifier ("__gr_top"),
5555 ptr_type_node);
5556 f_vrtop = build_decl (BUILTINS_LOCATION,
5557 FIELD_DECL, get_identifier ("__vr_top"),
5558 ptr_type_node);
5559 f_groff = build_decl (BUILTINS_LOCATION,
5560 FIELD_DECL, get_identifier ("__gr_offs"),
5561 integer_type_node);
5562 f_vroff = build_decl (BUILTINS_LOCATION,
5563 FIELD_DECL, get_identifier ("__vr_offs"),
5564 integer_type_node);
5565
5566 DECL_ARTIFICIAL (f_stack) = 1;
5567 DECL_ARTIFICIAL (f_grtop) = 1;
5568 DECL_ARTIFICIAL (f_vrtop) = 1;
5569 DECL_ARTIFICIAL (f_groff) = 1;
5570 DECL_ARTIFICIAL (f_vroff) = 1;
5571
5572 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5573 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5574 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5575 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5576 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5577
5578 TYPE_FIELDS (va_list_type) = f_stack;
5579 DECL_CHAIN (f_stack) = f_grtop;
5580 DECL_CHAIN (f_grtop) = f_vrtop;
5581 DECL_CHAIN (f_vrtop) = f_groff;
5582 DECL_CHAIN (f_groff) = f_vroff;
5583
5584 /* Compute its layout. */
5585 layout_type (va_list_type);
5586
5587 return va_list_type;
5588}
5589
5590/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5591static void
5592aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5593{
5594 const CUMULATIVE_ARGS *cum;
5595 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5596 tree stack, grtop, vrtop, groff, vroff;
5597 tree t;
5598 int gr_save_area_size;
5599 int vr_save_area_size;
5600 int vr_offset;
5601
5602 cum = &crtl->args.info;
5603 gr_save_area_size
5604 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5605 vr_save_area_size
5606 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5607
5608 if (TARGET_GENERAL_REGS_ONLY)
5609 {
5610 if (cum->aapcs_nvrn > 0)
5611 sorry ("%qs and floating point or vector arguments",
5612 "-mgeneral-regs-only");
5613 vr_save_area_size = 0;
5614 }
5615
5616 f_stack = TYPE_FIELDS (va_list_type_node);
5617 f_grtop = DECL_CHAIN (f_stack);
5618 f_vrtop = DECL_CHAIN (f_grtop);
5619 f_groff = DECL_CHAIN (f_vrtop);
5620 f_vroff = DECL_CHAIN (f_groff);
5621
5622 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5623 NULL_TREE);
5624 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5625 NULL_TREE);
5626 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5627 NULL_TREE);
5628 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5629 NULL_TREE);
5630 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5631 NULL_TREE);
5632
5633 /* Emit code to initialize STACK, which points to the next varargs stack
5634 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5635 by named arguments. STACK is 8-byte aligned. */
5636 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5637 if (cum->aapcs_stack_size > 0)
5638 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5639 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5640 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5641
5642 /* Emit code to initialize GRTOP, the top of the GR save area.
5643 virtual_incoming_args_rtx should have been 16 byte aligned. */
5644 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5645 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5646 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5647
5648 /* Emit code to initialize VRTOP, the top of the VR save area.
5649 This address is gr_save_area_bytes below GRTOP, rounded
5650 down to the next 16-byte boundary. */
5651 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5652 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5653 STACK_BOUNDARY / BITS_PER_UNIT);
5654
5655 if (vr_offset)
5656 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5657 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5658 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5659
5660 /* Emit code to initialize GROFF, the offset from GRTOP of the
5661 next GPR argument. */
5662 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5663 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5664 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5665
5666 /* Likewise emit code to initialize VROFF, the offset from FTOP
5667 of the next VR argument. */
5668 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5669 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5670 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5671}
5672
5673/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5674
5675static tree
5676aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5677 gimple_seq *post_p ATTRIBUTE_UNUSED)
5678{
5679 tree addr;
5680 bool indirect_p;
5681 bool is_ha; /* is HFA or HVA. */
5682 bool dw_align; /* double-word align. */
5683 enum machine_mode ag_mode = VOIDmode;
5684 int nregs;
5685 enum machine_mode mode;
5686
5687 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5688 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5689 HOST_WIDE_INT size, rsize, adjust, align;
5690 tree t, u, cond1, cond2;
5691
5692 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5693 if (indirect_p)
5694 type = build_pointer_type (type);
5695
5696 mode = TYPE_MODE (type);
5697
5698 f_stack = TYPE_FIELDS (va_list_type_node);
5699 f_grtop = DECL_CHAIN (f_stack);
5700 f_vrtop = DECL_CHAIN (f_grtop);
5701 f_groff = DECL_CHAIN (f_vrtop);
5702 f_vroff = DECL_CHAIN (f_groff);
5703
5704 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5705 f_stack, NULL_TREE);
5706 size = int_size_in_bytes (type);
5707 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5708
5709 dw_align = false;
5710 adjust = 0;
5711 if (aarch64_vfp_is_call_or_return_candidate (mode,
5712 type,
5713 &ag_mode,
5714 &nregs,
5715 &is_ha))
5716 {
5717 /* TYPE passed in fp/simd registers. */
5718 if (TARGET_GENERAL_REGS_ONLY)
5719 sorry ("%qs and floating point or vector arguments",
5720 "-mgeneral-regs-only");
5721
5722 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5723 unshare_expr (valist), f_vrtop, NULL_TREE);
5724 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5725 unshare_expr (valist), f_vroff, NULL_TREE);
5726
5727 rsize = nregs * UNITS_PER_VREG;
5728
5729 if (is_ha)
5730 {
5731 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5732 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5733 }
5734 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5735 && size < UNITS_PER_VREG)
5736 {
5737 adjust = UNITS_PER_VREG - size;
5738 }
5739 }
5740 else
5741 {
5742 /* TYPE passed in general registers. */
5743 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5744 unshare_expr (valist), f_grtop, NULL_TREE);
5745 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5746 unshare_expr (valist), f_groff, NULL_TREE);
5747 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5748 nregs = rsize / UNITS_PER_WORD;
5749
5750 if (align > 8)
5751 dw_align = true;
5752
5753 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5754 && size < UNITS_PER_WORD)
5755 {
5756 adjust = UNITS_PER_WORD - size;
5757 }
5758 }
5759
5760 /* Get a local temporary for the field value. */
5761 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5762
5763 /* Emit code to branch if off >= 0. */
5764 t = build2 (GE_EXPR, boolean_type_node, off,
5765 build_int_cst (TREE_TYPE (off), 0));
5766 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5767
5768 if (dw_align)
5769 {
5770 /* Emit: offs = (offs + 15) & -16. */
5771 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5772 build_int_cst (TREE_TYPE (off), 15));
5773 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5774 build_int_cst (TREE_TYPE (off), -16));
5775 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5776 }
5777 else
5778 roundup = NULL;
5779
5780 /* Update ap.__[g|v]r_offs */
5781 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5782 build_int_cst (TREE_TYPE (off), rsize));
5783 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5784
5785 /* String up. */
5786 if (roundup)
5787 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5788
5789 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5790 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5791 build_int_cst (TREE_TYPE (f_off), 0));
5792 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5793
5794 /* String up: make sure the assignment happens before the use. */
5795 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5796 COND_EXPR_ELSE (cond1) = t;
5797
5798 /* Prepare the trees handling the argument that is passed on the stack;
5799 the top level node will store in ON_STACK. */
5800 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5801 if (align > 8)
5802 {
5803 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5804 t = fold_convert (intDI_type_node, arg);
5805 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5806 build_int_cst (TREE_TYPE (t), 15));
5807 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5808 build_int_cst (TREE_TYPE (t), -16));
5809 t = fold_convert (TREE_TYPE (arg), t);
5810 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5811 }
5812 else
5813 roundup = NULL;
5814 /* Advance ap.__stack */
5815 t = fold_convert (intDI_type_node, arg);
5816 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5817 build_int_cst (TREE_TYPE (t), size + 7));
5818 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5819 build_int_cst (TREE_TYPE (t), -8));
5820 t = fold_convert (TREE_TYPE (arg), t);
5821 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5822 /* String up roundup and advance. */
5823 if (roundup)
5824 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5825 /* String up with arg */
5826 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5827 /* Big-endianness related address adjustment. */
5828 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5829 && size < UNITS_PER_WORD)
5830 {
5831 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5832 size_int (UNITS_PER_WORD - size));
5833 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5834 }
5835
5836 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5837 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5838
5839 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5840 t = off;
5841 if (adjust)
5842 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5843 build_int_cst (TREE_TYPE (off), adjust));
5844
5845 t = fold_convert (sizetype, t);
5846 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5847
5848 if (is_ha)
5849 {
5850 /* type ha; // treat as "struct {ftype field[n];}"
5851 ... [computing offs]
5852 for (i = 0; i <nregs; ++i, offs += 16)
5853 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5854 return ha; */
5855 int i;
5856 tree tmp_ha, field_t, field_ptr_t;
5857
5858 /* Declare a local variable. */
5859 tmp_ha = create_tmp_var_raw (type, "ha");
5860 gimple_add_tmp_var (tmp_ha);
5861
5862 /* Establish the base type. */
5863 switch (ag_mode)
5864 {
5865 case SFmode:
5866 field_t = float_type_node;
5867 field_ptr_t = float_ptr_type_node;
5868 break;
5869 case DFmode:
5870 field_t = double_type_node;
5871 field_ptr_t = double_ptr_type_node;
5872 break;
5873 case TFmode:
5874 field_t = long_double_type_node;
5875 field_ptr_t = long_double_ptr_type_node;
5876 break;
5877/* The half precision and quad precision are not fully supported yet. Enable
5878 the following code after the support is complete. Need to find the correct
5879 type node for __fp16 *. */
5880#if 0
5881 case HFmode:
5882 field_t = float_type_node;
5883 field_ptr_t = float_ptr_type_node;
5884 break;
5885#endif
5886 case V2SImode:
5887 case V4SImode:
5888 {
5889 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5890 field_t = build_vector_type_for_mode (innertype, ag_mode);
5891 field_ptr_t = build_pointer_type (field_t);
5892 }
5893 break;
5894 default:
5895 gcc_assert (0);
5896 }
5897
5898 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5899 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5900 addr = t;
5901 t = fold_convert (field_ptr_t, addr);
5902 t = build2 (MODIFY_EXPR, field_t,
5903 build1 (INDIRECT_REF, field_t, tmp_ha),
5904 build1 (INDIRECT_REF, field_t, t));
5905
5906 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5907 for (i = 1; i < nregs; ++i)
5908 {
5909 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5910 u = fold_convert (field_ptr_t, addr);
5911 u = build2 (MODIFY_EXPR, field_t,
5912 build2 (MEM_REF, field_t, tmp_ha,
5913 build_int_cst (field_ptr_t,
5914 (i *
5915 int_size_in_bytes (field_t)))),
5916 build1 (INDIRECT_REF, field_t, u));
5917 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5918 }
5919
5920 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5921 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5922 }
5923
5924 COND_EXPR_ELSE (cond2) = t;
5925 addr = fold_convert (build_pointer_type (type), cond1);
5926 addr = build_va_arg_indirect_ref (addr);
5927
5928 if (indirect_p)
5929 addr = build_va_arg_indirect_ref (addr);
5930
5931 return addr;
5932}
5933
5934/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5935
5936static void
5937aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5938 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5939 int no_rtl)
5940{
5941 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5942 CUMULATIVE_ARGS local_cum;
5943 int gr_saved, vr_saved;
5944
5945 /* The caller has advanced CUM up to, but not beyond, the last named
5946 argument. Advance a local copy of CUM past the last "real" named
5947 argument, to find out how many registers are left over. */
5948 local_cum = *cum;
5949 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5950
5951 /* Found out how many registers we need to save. */
5952 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5953 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5954
5955 if (TARGET_GENERAL_REGS_ONLY)
5956 {
5957 if (local_cum.aapcs_nvrn > 0)
5958 sorry ("%qs and floating point or vector arguments",
5959 "-mgeneral-regs-only");
5960 vr_saved = 0;
5961 }
5962
5963 if (!no_rtl)
5964 {
5965 if (gr_saved > 0)
5966 {
5967 rtx ptr, mem;
5968
5969 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5970 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5971 - gr_saved * UNITS_PER_WORD);
5972 mem = gen_frame_mem (BLKmode, ptr);
5973 set_mem_alias_set (mem, get_varargs_alias_set ());
5974
5975 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5976 mem, gr_saved);
5977 }
5978 if (vr_saved > 0)
5979 {
5980 /* We can't use move_block_from_reg, because it will use
5981 the wrong mode, storing D regs only. */
5982 enum machine_mode mode = TImode;
5983 int off, i;
5984
5985 /* Set OFF to the offset from virtual_incoming_args_rtx of
5986 the first vector register. The VR save area lies below
5987 the GR one, and is aligned to 16 bytes. */
5988 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5989 STACK_BOUNDARY / BITS_PER_UNIT);
5990 off -= vr_saved * UNITS_PER_VREG;
5991
5992 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5993 {
5994 rtx ptr, mem;
5995
5996 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5997 mem = gen_frame_mem (mode, ptr);
5998 set_mem_alias_set (mem, get_varargs_alias_set ());
5999 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6000 off += UNITS_PER_VREG;
6001 }
6002 }
6003 }
6004
6005 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6006 any complication of having crtl->args.pretend_args_size changed. */
6007 cfun->machine->saved_varargs_size
6008 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6009 STACK_BOUNDARY / BITS_PER_UNIT)
6010 + vr_saved * UNITS_PER_VREG);
6011}
6012
6013static void
6014aarch64_conditional_register_usage (void)
6015{
6016 int i;
6017 if (!TARGET_FLOAT)
6018 {
6019 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6020 {
6021 fixed_regs[i] = 1;
6022 call_used_regs[i] = 1;
6023 }
6024 }
6025}
6026
6027/* Walk down the type tree of TYPE counting consecutive base elements.
6028 If *MODEP is VOIDmode, then set it to the first valid floating point
6029 type. If a non-floating point type is found, or if a floating point
6030 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6031 otherwise return the count in the sub-tree. */
6032static int
6033aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6034{
6035 enum machine_mode mode;
6036 HOST_WIDE_INT size;
6037
6038 switch (TREE_CODE (type))
6039 {
6040 case REAL_TYPE:
6041 mode = TYPE_MODE (type);
6042 if (mode != DFmode && mode != SFmode && mode != TFmode)
6043 return -1;
6044
6045 if (*modep == VOIDmode)
6046 *modep = mode;
6047
6048 if (*modep == mode)
6049 return 1;
6050
6051 break;
6052
6053 case COMPLEX_TYPE:
6054 mode = TYPE_MODE (TREE_TYPE (type));
6055 if (mode != DFmode && mode != SFmode && mode != TFmode)
6056 return -1;
6057
6058 if (*modep == VOIDmode)
6059 *modep = mode;
6060
6061 if (*modep == mode)
6062 return 2;
6063
6064 break;
6065
6066 case VECTOR_TYPE:
6067 /* Use V2SImode and V4SImode as representatives of all 64-bit
6068 and 128-bit vector types. */
6069 size = int_size_in_bytes (type);
6070 switch (size)
6071 {
6072 case 8:
6073 mode = V2SImode;
6074 break;
6075 case 16:
6076 mode = V4SImode;
6077 break;
6078 default:
6079 return -1;
6080 }
6081
6082 if (*modep == VOIDmode)
6083 *modep = mode;
6084
6085 /* Vector modes are considered to be opaque: two vectors are
6086 equivalent for the purposes of being homogeneous aggregates
6087 if they are the same size. */
6088 if (*modep == mode)
6089 return 1;
6090
6091 break;
6092
6093 case ARRAY_TYPE:
6094 {
6095 int count;
6096 tree index = TYPE_DOMAIN (type);
6097
6098 /* Can't handle incomplete types. */
6099 if (!COMPLETE_TYPE_P (type))
6100 return -1;
6101
6102 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6103 if (count == -1
6104 || !index
6105 || !TYPE_MAX_VALUE (index)
cc269bb6 6106 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 6107 || !TYPE_MIN_VALUE (index)
cc269bb6 6108 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
6109 || count < 0)
6110 return -1;
6111
ae7e9ddd
RS
6112 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6113 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
6114
6115 /* There must be no padding. */
cc269bb6 6116 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6117 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6118 != count * GET_MODE_BITSIZE (*modep)))
6119 return -1;
6120
6121 return count;
6122 }
6123
6124 case RECORD_TYPE:
6125 {
6126 int count = 0;
6127 int sub_count;
6128 tree field;
6129
6130 /* Can't handle incomplete types. */
6131 if (!COMPLETE_TYPE_P (type))
6132 return -1;
6133
6134 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6135 {
6136 if (TREE_CODE (field) != FIELD_DECL)
6137 continue;
6138
6139 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6140 if (sub_count < 0)
6141 return -1;
6142 count += sub_count;
6143 }
6144
6145 /* There must be no padding. */
cc269bb6 6146 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6147 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6148 != count * GET_MODE_BITSIZE (*modep)))
6149 return -1;
6150
6151 return count;
6152 }
6153
6154 case UNION_TYPE:
6155 case QUAL_UNION_TYPE:
6156 {
6157 /* These aren't very interesting except in a degenerate case. */
6158 int count = 0;
6159 int sub_count;
6160 tree field;
6161
6162 /* Can't handle incomplete types. */
6163 if (!COMPLETE_TYPE_P (type))
6164 return -1;
6165
6166 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6167 {
6168 if (TREE_CODE (field) != FIELD_DECL)
6169 continue;
6170
6171 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6172 if (sub_count < 0)
6173 return -1;
6174 count = count > sub_count ? count : sub_count;
6175 }
6176
6177 /* There must be no padding. */
cc269bb6 6178 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6179 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6180 != count * GET_MODE_BITSIZE (*modep)))
6181 return -1;
6182
6183 return count;
6184 }
6185
6186 default:
6187 break;
6188 }
6189
6190 return -1;
6191}
6192
38e8f663
YR
6193/* Return true if we use LRA instead of reload pass. */
6194static bool
6195aarch64_lra_p (void)
6196{
6197 return aarch64_lra_flag;
6198}
6199
43e9d192
IB
6200/* Return TRUE if the type, as described by TYPE and MODE, is a composite
6201 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6202 array types. The C99 floating-point complex types are also considered
6203 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6204 types, which are GCC extensions and out of the scope of AAPCS64, are
6205 treated as composite types here as well.
6206
6207 Note that MODE itself is not sufficient in determining whether a type
6208 is such a composite type or not. This is because
6209 stor-layout.c:compute_record_mode may have already changed the MODE
6210 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6211 structure with only one field may have its MODE set to the mode of the
6212 field. Also an integer mode whose size matches the size of the
6213 RECORD_TYPE type may be used to substitute the original mode
6214 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6215 solely relied on. */
6216
6217static bool
6218aarch64_composite_type_p (const_tree type,
6219 enum machine_mode mode)
6220{
6221 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6222 return true;
6223
6224 if (mode == BLKmode
6225 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6226 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6227 return true;
6228
6229 return false;
6230}
6231
6232/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6233 type as described in AAPCS64 \S 4.1.2.
6234
6235 See the comment above aarch64_composite_type_p for the notes on MODE. */
6236
6237static bool
6238aarch64_short_vector_p (const_tree type,
6239 enum machine_mode mode)
6240{
6241 HOST_WIDE_INT size = -1;
6242
6243 if (type && TREE_CODE (type) == VECTOR_TYPE)
6244 size = int_size_in_bytes (type);
6245 else if (!aarch64_composite_type_p (type, mode)
6246 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6247 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6248 size = GET_MODE_SIZE (mode);
6249
6250 return (size == 8 || size == 16) ? true : false;
6251}
6252
6253/* Return TRUE if an argument, whose type is described by TYPE and MODE,
6254 shall be passed or returned in simd/fp register(s) (providing these
6255 parameter passing registers are available).
6256
6257 Upon successful return, *COUNT returns the number of needed registers,
6258 *BASE_MODE returns the mode of the individual register and when IS_HAF
6259 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6260 floating-point aggregate or a homogeneous short-vector aggregate. */
6261
6262static bool
6263aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6264 const_tree type,
6265 enum machine_mode *base_mode,
6266 int *count,
6267 bool *is_ha)
6268{
6269 enum machine_mode new_mode = VOIDmode;
6270 bool composite_p = aarch64_composite_type_p (type, mode);
6271
6272 if (is_ha != NULL) *is_ha = false;
6273
6274 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6275 || aarch64_short_vector_p (type, mode))
6276 {
6277 *count = 1;
6278 new_mode = mode;
6279 }
6280 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6281 {
6282 if (is_ha != NULL) *is_ha = true;
6283 *count = 2;
6284 new_mode = GET_MODE_INNER (mode);
6285 }
6286 else if (type && composite_p)
6287 {
6288 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6289
6290 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6291 {
6292 if (is_ha != NULL) *is_ha = true;
6293 *count = ag_count;
6294 }
6295 else
6296 return false;
6297 }
6298 else
6299 return false;
6300
6301 *base_mode = new_mode;
6302 return true;
6303}
6304
6305/* Implement TARGET_STRUCT_VALUE_RTX. */
6306
6307static rtx
6308aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6309 int incoming ATTRIBUTE_UNUSED)
6310{
6311 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6312}
6313
6314/* Implements target hook vector_mode_supported_p. */
6315static bool
6316aarch64_vector_mode_supported_p (enum machine_mode mode)
6317{
6318 if (TARGET_SIMD
6319 && (mode == V4SImode || mode == V8HImode
6320 || mode == V16QImode || mode == V2DImode
6321 || mode == V2SImode || mode == V4HImode
6322 || mode == V8QImode || mode == V2SFmode
6323 || mode == V4SFmode || mode == V2DFmode))
6324 return true;
6325
6326 return false;
6327}
6328
b7342d25
IB
6329/* Return appropriate SIMD container
6330 for MODE within a vector of WIDTH bits. */
43e9d192 6331static enum machine_mode
b7342d25 6332aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6333{
b7342d25 6334 gcc_assert (width == 64 || width == 128);
43e9d192 6335 if (TARGET_SIMD)
b7342d25
IB
6336 {
6337 if (width == 128)
6338 switch (mode)
6339 {
6340 case DFmode:
6341 return V2DFmode;
6342 case SFmode:
6343 return V4SFmode;
6344 case SImode:
6345 return V4SImode;
6346 case HImode:
6347 return V8HImode;
6348 case QImode:
6349 return V16QImode;
6350 case DImode:
6351 return V2DImode;
6352 default:
6353 break;
6354 }
6355 else
6356 switch (mode)
6357 {
6358 case SFmode:
6359 return V2SFmode;
6360 case SImode:
6361 return V2SImode;
6362 case HImode:
6363 return V4HImode;
6364 case QImode:
6365 return V8QImode;
6366 default:
6367 break;
6368 }
6369 }
43e9d192
IB
6370 return word_mode;
6371}
6372
b7342d25
IB
6373/* Return 128-bit container as the preferred SIMD mode for MODE. */
6374static enum machine_mode
6375aarch64_preferred_simd_mode (enum machine_mode mode)
6376{
6377 return aarch64_simd_container_mode (mode, 128);
6378}
6379
3b357264
JG
6380/* Return the bitmask of possible vector sizes for the vectorizer
6381 to iterate over. */
6382static unsigned int
6383aarch64_autovectorize_vector_sizes (void)
6384{
6385 return (16 | 8);
6386}
6387
c6fc9e43
YZ
6388/* A table to help perform AArch64-specific name mangling for AdvSIMD
6389 vector types in order to conform to the AAPCS64 (see "Procedure
6390 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6391 qualify for emission with the mangled names defined in that document,
6392 a vector type must not only be of the correct mode but also be
6393 composed of AdvSIMD vector element types (e.g.
6394 _builtin_aarch64_simd_qi); these types are registered by
6395 aarch64_init_simd_builtins (). In other words, vector types defined
6396 in other ways e.g. via vector_size attribute will get default
6397 mangled names. */
6398typedef struct
6399{
6400 enum machine_mode mode;
6401 const char *element_type_name;
6402 const char *mangled_name;
6403} aarch64_simd_mangle_map_entry;
6404
6405static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6406 /* 64-bit containerized types. */
6407 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6408 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6409 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6410 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6411 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6412 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6413 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6414 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6415 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6416 /* 128-bit containerized types. */
6417 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6418 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6419 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6420 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6421 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6422 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6423 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6424 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6425 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6426 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6427 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6428 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 6429 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
6430 { VOIDmode, NULL, NULL }
6431};
6432
ac2b960f
YZ
6433/* Implement TARGET_MANGLE_TYPE. */
6434
6f549691 6435static const char *
ac2b960f
YZ
6436aarch64_mangle_type (const_tree type)
6437{
6438 /* The AArch64 ABI documents say that "__va_list" has to be
6439 managled as if it is in the "std" namespace. */
6440 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6441 return "St9__va_list";
6442
c6fc9e43
YZ
6443 /* Check the mode of the vector type, and the name of the vector
6444 element type, against the table. */
6445 if (TREE_CODE (type) == VECTOR_TYPE)
6446 {
6447 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6448
6449 while (pos->mode != VOIDmode)
6450 {
6451 tree elt_type = TREE_TYPE (type);
6452
6453 if (pos->mode == TYPE_MODE (type)
6454 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6455 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6456 pos->element_type_name))
6457 return pos->mangled_name;
6458
6459 pos++;
6460 }
6461 }
6462
ac2b960f
YZ
6463 /* Use the default mangling. */
6464 return NULL;
6465}
6466
43e9d192 6467/* Return the equivalent letter for size. */
81c2dfb9 6468static char
43e9d192
IB
6469sizetochar (int size)
6470{
6471 switch (size)
6472 {
6473 case 64: return 'd';
6474 case 32: return 's';
6475 case 16: return 'h';
6476 case 8 : return 'b';
6477 default: gcc_unreachable ();
6478 }
6479}
6480
3520f7cc
JG
6481/* Return true iff x is a uniform vector of floating-point
6482 constants, and the constant can be represented in
6483 quarter-precision form. Note, as aarch64_float_const_representable
6484 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6485static bool
6486aarch64_vect_float_const_representable_p (rtx x)
6487{
6488 int i = 0;
6489 REAL_VALUE_TYPE r0, ri;
6490 rtx x0, xi;
6491
6492 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6493 return false;
6494
6495 x0 = CONST_VECTOR_ELT (x, 0);
6496 if (!CONST_DOUBLE_P (x0))
6497 return false;
6498
6499 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6500
6501 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6502 {
6503 xi = CONST_VECTOR_ELT (x, i);
6504 if (!CONST_DOUBLE_P (xi))
6505 return false;
6506
6507 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6508 if (!REAL_VALUES_EQUAL (r0, ri))
6509 return false;
6510 }
6511
6512 return aarch64_float_const_representable_p (x0);
6513}
6514
d8edd899 6515/* Return true for valid and false for invalid. */
3ea63f60 6516bool
48063b9d
IB
6517aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6518 struct simd_immediate_info *info)
43e9d192
IB
6519{
6520#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6521 matches = 1; \
6522 for (i = 0; i < idx; i += (STRIDE)) \
6523 if (!(TEST)) \
6524 matches = 0; \
6525 if (matches) \
6526 { \
6527 immtype = (CLASS); \
6528 elsize = (ELSIZE); \
43e9d192
IB
6529 eshift = (SHIFT); \
6530 emvn = (NEG); \
6531 break; \
6532 }
6533
6534 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6535 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6536 unsigned char bytes[16];
43e9d192
IB
6537 int immtype = -1, matches;
6538 unsigned int invmask = inverse ? 0xff : 0;
6539 int eshift, emvn;
6540
43e9d192 6541 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6542 {
81c2dfb9
IB
6543 if (! (aarch64_simd_imm_zero_p (op, mode)
6544 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6545 return false;
3520f7cc 6546
48063b9d
IB
6547 if (info)
6548 {
6549 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6550 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6551 info->mvn = false;
6552 info->shift = 0;
6553 }
3520f7cc 6554
d8edd899 6555 return true;
3520f7cc 6556 }
43e9d192
IB
6557
6558 /* Splat vector constant out into a byte vector. */
6559 for (i = 0; i < n_elts; i++)
6560 {
6561 rtx el = CONST_VECTOR_ELT (op, i);
6562 unsigned HOST_WIDE_INT elpart;
6563 unsigned int part, parts;
6564
6565 if (GET_CODE (el) == CONST_INT)
6566 {
6567 elpart = INTVAL (el);
6568 parts = 1;
6569 }
6570 else if (GET_CODE (el) == CONST_DOUBLE)
6571 {
6572 elpart = CONST_DOUBLE_LOW (el);
6573 parts = 2;
6574 }
6575 else
6576 gcc_unreachable ();
6577
6578 for (part = 0; part < parts; part++)
6579 {
6580 unsigned int byte;
6581 for (byte = 0; byte < innersize; byte++)
6582 {
6583 bytes[idx++] = (elpart & 0xff) ^ invmask;
6584 elpart >>= BITS_PER_UNIT;
6585 }
6586 if (GET_CODE (el) == CONST_DOUBLE)
6587 elpart = CONST_DOUBLE_HIGH (el);
6588 }
6589 }
6590
6591 /* Sanity check. */
6592 gcc_assert (idx == GET_MODE_SIZE (mode));
6593
6594 do
6595 {
6596 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6597 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6598
6599 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6600 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6601
6602 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6603 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6604
6605 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6606 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6607
6608 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6609
6610 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6611
6612 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6613 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6614
6615 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6616 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6617
6618 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6619 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6620
6621 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6622 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6623
6624 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6625
6626 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6627
6628 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 6629 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
6630
6631 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 6632 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
6633
6634 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 6635 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
6636
6637 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 6638 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
6639
6640 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6641
6642 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6643 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6644 }
6645 while (0);
6646
e4f0f84d 6647 if (immtype == -1)
d8edd899 6648 return false;
43e9d192 6649
48063b9d 6650 if (info)
43e9d192 6651 {
48063b9d 6652 info->element_width = elsize;
48063b9d
IB
6653 info->mvn = emvn != 0;
6654 info->shift = eshift;
6655
43e9d192
IB
6656 unsigned HOST_WIDE_INT imm = 0;
6657
e4f0f84d
TB
6658 if (immtype >= 12 && immtype <= 15)
6659 info->msl = true;
6660
43e9d192
IB
6661 /* Un-invert bytes of recognized vector, if necessary. */
6662 if (invmask != 0)
6663 for (i = 0; i < idx; i++)
6664 bytes[i] ^= invmask;
6665
6666 if (immtype == 17)
6667 {
6668 /* FIXME: Broken on 32-bit H_W_I hosts. */
6669 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6670
6671 for (i = 0; i < 8; i++)
6672 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6673 << (i * BITS_PER_UNIT);
6674
43e9d192 6675
48063b9d
IB
6676 info->value = GEN_INT (imm);
6677 }
6678 else
6679 {
6680 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6681 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6682
6683 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6684 generic constants. */
6685 if (info->mvn)
43e9d192 6686 imm = ~imm;
48063b9d
IB
6687 imm = (imm >> info->shift) & 0xff;
6688 info->value = GEN_INT (imm);
6689 }
43e9d192
IB
6690 }
6691
48063b9d 6692 return true;
43e9d192
IB
6693#undef CHECK
6694}
6695
43e9d192
IB
6696static bool
6697aarch64_const_vec_all_same_int_p (rtx x,
6698 HOST_WIDE_INT minval,
6699 HOST_WIDE_INT maxval)
6700{
6701 HOST_WIDE_INT firstval;
6702 int count, i;
6703
6704 if (GET_CODE (x) != CONST_VECTOR
6705 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6706 return false;
6707
6708 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6709 if (firstval < minval || firstval > maxval)
6710 return false;
6711
6712 count = CONST_VECTOR_NUNITS (x);
6713 for (i = 1; i < count; i++)
6714 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6715 return false;
6716
6717 return true;
6718}
6719
6720/* Check of immediate shift constants are within range. */
6721bool
6722aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6723{
6724 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6725 if (left)
6726 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6727 else
6728 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6729}
6730
3520f7cc
JG
6731/* Return true if X is a uniform vector where all elements
6732 are either the floating-point constant 0.0 or the
6733 integer constant 0. */
43e9d192
IB
6734bool
6735aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6736{
3520f7cc 6737 return x == CONST0_RTX (mode);
43e9d192
IB
6738}
6739
6740bool
6741aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6742{
6743 HOST_WIDE_INT imm = INTVAL (x);
6744 int i;
6745
6746 for (i = 0; i < 8; i++)
6747 {
6748 unsigned int byte = imm & 0xff;
6749 if (byte != 0xff && byte != 0)
6750 return false;
6751 imm >>= 8;
6752 }
6753
6754 return true;
6755}
6756
83f8c414
CSS
6757bool
6758aarch64_mov_operand_p (rtx x,
a5350ddc 6759 enum aarch64_symbol_context context,
83f8c414
CSS
6760 enum machine_mode mode)
6761{
83f8c414
CSS
6762 if (GET_CODE (x) == HIGH
6763 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6764 return true;
6765
6766 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6767 return true;
6768
6769 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6770 return true;
6771
a5350ddc
CSS
6772 return aarch64_classify_symbolic_expression (x, context)
6773 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6774}
6775
43e9d192
IB
6776/* Return a const_int vector of VAL. */
6777rtx
6778aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6779{
6780 int nunits = GET_MODE_NUNITS (mode);
6781 rtvec v = rtvec_alloc (nunits);
6782 int i;
6783
6784 for (i=0; i < nunits; i++)
6785 RTVEC_ELT (v, i) = GEN_INT (val);
6786
6787 return gen_rtx_CONST_VECTOR (mode, v);
6788}
6789
051d0e2f
SN
6790/* Check OP is a legal scalar immediate for the MOVI instruction. */
6791
6792bool
6793aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6794{
6795 enum machine_mode vmode;
6796
6797 gcc_assert (!VECTOR_MODE_P (mode));
6798 vmode = aarch64_preferred_simd_mode (mode);
6799 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6800 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6801}
6802
43e9d192
IB
6803/* Construct and return a PARALLEL RTX vector. */
6804rtx
6805aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6806{
6807 int nunits = GET_MODE_NUNITS (mode);
6808 rtvec v = rtvec_alloc (nunits / 2);
6809 int base = high ? nunits / 2 : 0;
6810 rtx t1;
6811 int i;
6812
6813 for (i=0; i < nunits / 2; i++)
6814 RTVEC_ELT (v, i) = GEN_INT (base + i);
6815
6816 t1 = gen_rtx_PARALLEL (mode, v);
6817 return t1;
6818}
6819
6820/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6821 HIGH (exclusive). */
6822void
6823aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6824{
6825 HOST_WIDE_INT lane;
6826 gcc_assert (GET_CODE (operand) == CONST_INT);
6827 lane = INTVAL (operand);
6828
6829 if (lane < low || lane >= high)
6830 error ("lane out of range");
6831}
6832
6833void
6834aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6835{
6836 gcc_assert (GET_CODE (operand) == CONST_INT);
6837 HOST_WIDE_INT lane = INTVAL (operand);
6838
6839 if (lane < low || lane >= high)
6840 error ("constant out of range");
6841}
6842
6843/* Emit code to reinterpret one AdvSIMD type as another,
6844 without altering bits. */
6845void
6846aarch64_simd_reinterpret (rtx dest, rtx src)
6847{
6848 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6849}
6850
6851/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6852 registers). */
6853void
6854aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6855 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6856 rtx op1)
6857{
6858 rtx mem = gen_rtx_MEM (mode, destaddr);
6859 rtx tmp1 = gen_reg_rtx (mode);
6860 rtx tmp2 = gen_reg_rtx (mode);
6861
6862 emit_insn (intfn (tmp1, op1, tmp2));
6863
6864 emit_move_insn (mem, tmp1);
6865 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6866 emit_move_insn (mem, tmp2);
6867}
6868
6869/* Return TRUE if OP is a valid vector addressing mode. */
6870bool
6871aarch64_simd_mem_operand_p (rtx op)
6872{
6873 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6874 || GET_CODE (XEXP (op, 0)) == REG);
6875}
6876
6877/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6878 not to early-clobber SRC registers in the process.
6879
6880 We assume that the operands described by SRC and DEST represent a
6881 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6882 number of components into which the copy has been decomposed. */
6883void
6884aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6885 rtx *src, unsigned int count)
6886{
6887 unsigned int i;
6888
6889 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6890 || REGNO (operands[0]) < REGNO (operands[1]))
6891 {
6892 for (i = 0; i < count; i++)
6893 {
6894 operands[2 * i] = dest[i];
6895 operands[2 * i + 1] = src[i];
6896 }
6897 }
6898 else
6899 {
6900 for (i = 0; i < count; i++)
6901 {
6902 operands[2 * i] = dest[count - i - 1];
6903 operands[2 * i + 1] = src[count - i - 1];
6904 }
6905 }
6906}
6907
6908/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6909 one of VSTRUCT modes: OI, CI or XI. */
6910int
6911aarch64_simd_attr_length_move (rtx insn)
6912{
43e9d192
IB
6913 enum machine_mode mode;
6914
6915 extract_insn_cached (insn);
6916
6917 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6918 {
6919 mode = GET_MODE (recog_data.operand[0]);
6920 switch (mode)
6921 {
6922 case OImode:
6923 return 8;
6924 case CImode:
6925 return 12;
6926 case XImode:
6927 return 16;
6928 default:
6929 gcc_unreachable ();
6930 }
6931 }
6932 return 4;
6933}
6934
db0253a4
TB
6935/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6936 alignment of a vector to 128 bits. */
6937static HOST_WIDE_INT
6938aarch64_simd_vector_alignment (const_tree type)
6939{
9439e9a1 6940 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
6941 return MIN (align, 128);
6942}
6943
6944/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6945static bool
6946aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6947{
6948 if (is_packed)
6949 return false;
6950
6951 /* We guarantee alignment for vectors up to 128-bits. */
6952 if (tree_int_cst_compare (TYPE_SIZE (type),
6953 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6954 return false;
6955
6956 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6957 return true;
6958}
6959
4369c11e
TB
6960/* If VALS is a vector constant that can be loaded into a register
6961 using DUP, generate instructions to do so and return an RTX to
6962 assign to the register. Otherwise return NULL_RTX. */
6963static rtx
6964aarch64_simd_dup_constant (rtx vals)
6965{
6966 enum machine_mode mode = GET_MODE (vals);
6967 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6968 int n_elts = GET_MODE_NUNITS (mode);
6969 bool all_same = true;
6970 rtx x;
6971 int i;
6972
6973 if (GET_CODE (vals) != CONST_VECTOR)
6974 return NULL_RTX;
6975
6976 for (i = 1; i < n_elts; ++i)
6977 {
6978 x = CONST_VECTOR_ELT (vals, i);
6979 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6980 all_same = false;
6981 }
6982
6983 if (!all_same)
6984 return NULL_RTX;
6985
6986 /* We can load this constant by using DUP and a constant in a
6987 single ARM register. This will be cheaper than a vector
6988 load. */
6989 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6990 return gen_rtx_VEC_DUPLICATE (mode, x);
6991}
6992
6993
6994/* Generate code to load VALS, which is a PARALLEL containing only
6995 constants (for vec_init) or CONST_VECTOR, efficiently into a
6996 register. Returns an RTX to copy into the register, or NULL_RTX
6997 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6998static rtx
4369c11e
TB
6999aarch64_simd_make_constant (rtx vals)
7000{
7001 enum machine_mode mode = GET_MODE (vals);
7002 rtx const_dup;
7003 rtx const_vec = NULL_RTX;
7004 int n_elts = GET_MODE_NUNITS (mode);
7005 int n_const = 0;
7006 int i;
7007
7008 if (GET_CODE (vals) == CONST_VECTOR)
7009 const_vec = vals;
7010 else if (GET_CODE (vals) == PARALLEL)
7011 {
7012 /* A CONST_VECTOR must contain only CONST_INTs and
7013 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7014 Only store valid constants in a CONST_VECTOR. */
7015 for (i = 0; i < n_elts; ++i)
7016 {
7017 rtx x = XVECEXP (vals, 0, i);
7018 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7019 n_const++;
7020 }
7021 if (n_const == n_elts)
7022 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7023 }
7024 else
7025 gcc_unreachable ();
7026
7027 if (const_vec != NULL_RTX
48063b9d 7028 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
7029 /* Load using MOVI/MVNI. */
7030 return const_vec;
7031 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7032 /* Loaded using DUP. */
7033 return const_dup;
7034 else if (const_vec != NULL_RTX)
7035 /* Load from constant pool. We can not take advantage of single-cycle
7036 LD1 because we need a PC-relative addressing mode. */
7037 return const_vec;
7038 else
7039 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7040 We can not construct an initializer. */
7041 return NULL_RTX;
7042}
7043
7044void
7045aarch64_expand_vector_init (rtx target, rtx vals)
7046{
7047 enum machine_mode mode = GET_MODE (target);
7048 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7049 int n_elts = GET_MODE_NUNITS (mode);
7050 int n_var = 0, one_var = -1;
7051 bool all_same = true;
7052 rtx x, mem;
7053 int i;
7054
7055 x = XVECEXP (vals, 0, 0);
7056 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7057 n_var = 1, one_var = 0;
7058
7059 for (i = 1; i < n_elts; ++i)
7060 {
7061 x = XVECEXP (vals, 0, i);
7062 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7063 ++n_var, one_var = i;
7064
7065 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7066 all_same = false;
7067 }
7068
7069 if (n_var == 0)
7070 {
7071 rtx constant = aarch64_simd_make_constant (vals);
7072 if (constant != NULL_RTX)
7073 {
7074 emit_move_insn (target, constant);
7075 return;
7076 }
7077 }
7078
7079 /* Splat a single non-constant element if we can. */
7080 if (all_same)
7081 {
7082 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7083 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7084 return;
7085 }
7086
7087 /* One field is non-constant. Load constant then overwrite varying
7088 field. This is more efficient than using the stack. */
7089 if (n_var == 1)
7090 {
7091 rtx copy = copy_rtx (vals);
7092 rtx index = GEN_INT (one_var);
7093 enum insn_code icode;
7094
7095 /* Load constant part of vector, substitute neighboring value for
7096 varying element. */
7097 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7098 aarch64_expand_vector_init (target, copy);
7099
7100 /* Insert variable. */
7101 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7102 icode = optab_handler (vec_set_optab, mode);
7103 gcc_assert (icode != CODE_FOR_nothing);
7104 emit_insn (GEN_FCN (icode) (target, x, index));
7105 return;
7106 }
7107
7108 /* Construct the vector in memory one field at a time
7109 and load the whole vector. */
7110 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7111 for (i = 0; i < n_elts; i++)
7112 emit_move_insn (adjust_address_nv (mem, inner_mode,
7113 i * GET_MODE_SIZE (inner_mode)),
7114 XVECEXP (vals, 0, i));
7115 emit_move_insn (target, mem);
7116
7117}
7118
43e9d192
IB
7119static unsigned HOST_WIDE_INT
7120aarch64_shift_truncation_mask (enum machine_mode mode)
7121{
7122 return
7123 (aarch64_vector_mode_supported_p (mode)
7124 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7125}
7126
7127#ifndef TLS_SECTION_ASM_FLAG
7128#define TLS_SECTION_ASM_FLAG 'T'
7129#endif
7130
7131void
7132aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7133 tree decl ATTRIBUTE_UNUSED)
7134{
7135 char flagchars[10], *f = flagchars;
7136
7137 /* If we have already declared this section, we can use an
7138 abbreviated form to switch back to it -- unless this section is
7139 part of a COMDAT groups, in which case GAS requires the full
7140 declaration every time. */
7141 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7142 && (flags & SECTION_DECLARED))
7143 {
7144 fprintf (asm_out_file, "\t.section\t%s\n", name);
7145 return;
7146 }
7147
7148 if (!(flags & SECTION_DEBUG))
7149 *f++ = 'a';
7150 if (flags & SECTION_WRITE)
7151 *f++ = 'w';
7152 if (flags & SECTION_CODE)
7153 *f++ = 'x';
7154 if (flags & SECTION_SMALL)
7155 *f++ = 's';
7156 if (flags & SECTION_MERGE)
7157 *f++ = 'M';
7158 if (flags & SECTION_STRINGS)
7159 *f++ = 'S';
7160 if (flags & SECTION_TLS)
7161 *f++ = TLS_SECTION_ASM_FLAG;
7162 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7163 *f++ = 'G';
7164 *f = '\0';
7165
7166 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7167
7168 if (!(flags & SECTION_NOTYPE))
7169 {
7170 const char *type;
7171 const char *format;
7172
7173 if (flags & SECTION_BSS)
7174 type = "nobits";
7175 else
7176 type = "progbits";
7177
7178#ifdef TYPE_OPERAND_FMT
7179 format = "," TYPE_OPERAND_FMT;
7180#else
7181 format = ",@%s";
7182#endif
7183
7184 fprintf (asm_out_file, format, type);
7185
7186 if (flags & SECTION_ENTSIZE)
7187 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7188 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7189 {
7190 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7191 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7192 else
7193 fprintf (asm_out_file, ",%s,comdat",
7194 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7195 }
7196 }
7197
7198 putc ('\n', asm_out_file);
7199}
7200
7201/* Select a format to encode pointers in exception handling data. */
7202int
7203aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7204{
7205 int type;
7206 switch (aarch64_cmodel)
7207 {
7208 case AARCH64_CMODEL_TINY:
7209 case AARCH64_CMODEL_TINY_PIC:
7210 case AARCH64_CMODEL_SMALL:
7211 case AARCH64_CMODEL_SMALL_PIC:
7212 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7213 for everything. */
7214 type = DW_EH_PE_sdata4;
7215 break;
7216 default:
7217 /* No assumptions here. 8-byte relocs required. */
7218 type = DW_EH_PE_sdata8;
7219 break;
7220 }
7221 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7222}
7223
0462169c
SN
7224/* Emit load exclusive. */
7225
7226static void
7227aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7228 rtx mem, rtx model_rtx)
7229{
7230 rtx (*gen) (rtx, rtx, rtx);
7231
7232 switch (mode)
7233 {
7234 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7235 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7236 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7237 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7238 default:
7239 gcc_unreachable ();
7240 }
7241
7242 emit_insn (gen (rval, mem, model_rtx));
7243}
7244
7245/* Emit store exclusive. */
7246
7247static void
7248aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7249 rtx rval, rtx mem, rtx model_rtx)
7250{
7251 rtx (*gen) (rtx, rtx, rtx, rtx);
7252
7253 switch (mode)
7254 {
7255 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7256 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7257 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7258 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7259 default:
7260 gcc_unreachable ();
7261 }
7262
7263 emit_insn (gen (bval, rval, mem, model_rtx));
7264}
7265
7266/* Mark the previous jump instruction as unlikely. */
7267
7268static void
7269aarch64_emit_unlikely_jump (rtx insn)
7270{
e5af9ddd 7271 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
7272
7273 insn = emit_jump_insn (insn);
e5af9ddd 7274 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
7275}
7276
7277/* Expand a compare and swap pattern. */
7278
7279void
7280aarch64_expand_compare_and_swap (rtx operands[])
7281{
7282 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7283 enum machine_mode mode, cmp_mode;
7284 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7285
7286 bval = operands[0];
7287 rval = operands[1];
7288 mem = operands[2];
7289 oldval = operands[3];
7290 newval = operands[4];
7291 is_weak = operands[5];
7292 mod_s = operands[6];
7293 mod_f = operands[7];
7294 mode = GET_MODE (mem);
7295 cmp_mode = mode;
7296
7297 /* Normally the succ memory model must be stronger than fail, but in the
7298 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7299 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7300
7301 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7302 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7303 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7304
7305 switch (mode)
7306 {
7307 case QImode:
7308 case HImode:
7309 /* For short modes, we're going to perform the comparison in SImode,
7310 so do the zero-extension now. */
7311 cmp_mode = SImode;
7312 rval = gen_reg_rtx (SImode);
7313 oldval = convert_modes (SImode, mode, oldval, true);
7314 /* Fall through. */
7315
7316 case SImode:
7317 case DImode:
7318 /* Force the value into a register if needed. */
7319 if (!aarch64_plus_operand (oldval, mode))
7320 oldval = force_reg (cmp_mode, oldval);
7321 break;
7322
7323 default:
7324 gcc_unreachable ();
7325 }
7326
7327 switch (mode)
7328 {
7329 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7330 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7331 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7332 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7333 default:
7334 gcc_unreachable ();
7335 }
7336
7337 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7338
7339 if (mode == QImode || mode == HImode)
7340 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7341
7342 x = gen_rtx_REG (CCmode, CC_REGNUM);
7343 x = gen_rtx_EQ (SImode, x, const0_rtx);
7344 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7345}
7346
7347/* Split a compare and swap pattern. */
7348
7349void
7350aarch64_split_compare_and_swap (rtx operands[])
7351{
7352 rtx rval, mem, oldval, newval, scratch;
7353 enum machine_mode mode;
0462169c
SN
7354 bool is_weak;
7355 rtx label1, label2, x, cond;
7356
7357 rval = operands[0];
7358 mem = operands[1];
7359 oldval = operands[2];
7360 newval = operands[3];
7361 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7362 scratch = operands[7];
7363 mode = GET_MODE (mem);
7364
7365 label1 = NULL_RTX;
7366 if (!is_weak)
7367 {
7368 label1 = gen_label_rtx ();
7369 emit_label (label1);
7370 }
7371 label2 = gen_label_rtx ();
7372
7373 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7374
7375 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7376 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7377 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7378 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7379 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7380
7381 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7382
7383 if (!is_weak)
7384 {
7385 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7386 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7387 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7388 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7389 }
7390 else
7391 {
7392 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7393 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7394 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7395 }
7396
7397 emit_label (label2);
7398}
7399
7400/* Split an atomic operation. */
7401
7402void
7403aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7404 rtx value, rtx model_rtx, rtx cond)
7405{
7406 enum machine_mode mode = GET_MODE (mem);
7407 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7408 rtx label, x;
7409
7410 label = gen_label_rtx ();
7411 emit_label (label);
7412
7413 if (new_out)
7414 new_out = gen_lowpart (wmode, new_out);
7415 if (old_out)
7416 old_out = gen_lowpart (wmode, old_out);
7417 else
7418 old_out = new_out;
7419 value = simplify_gen_subreg (wmode, value, mode, 0);
7420
7421 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7422
7423 switch (code)
7424 {
7425 case SET:
7426 new_out = value;
7427 break;
7428
7429 case NOT:
7430 x = gen_rtx_AND (wmode, old_out, value);
7431 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7432 x = gen_rtx_NOT (wmode, new_out);
7433 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7434 break;
7435
7436 case MINUS:
7437 if (CONST_INT_P (value))
7438 {
7439 value = GEN_INT (-INTVAL (value));
7440 code = PLUS;
7441 }
7442 /* Fall through. */
7443
7444 default:
7445 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7446 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7447 break;
7448 }
7449
7450 aarch64_emit_store_exclusive (mode, cond, mem,
7451 gen_lowpart (mode, new_out), model_rtx);
7452
7453 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7454 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7455 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7456 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7457}
7458
95ca411e
YZ
7459static void
7460aarch64_print_extension (void)
7461{
7462 const struct aarch64_option_extension *opt = NULL;
7463
7464 for (opt = all_extensions; opt->name != NULL; opt++)
7465 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7466 asm_fprintf (asm_out_file, "+%s", opt->name);
7467
7468 asm_fprintf (asm_out_file, "\n");
7469}
7470
43e9d192
IB
7471static void
7472aarch64_start_file (void)
7473{
7474 if (selected_arch)
95ca411e
YZ
7475 {
7476 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7477 aarch64_print_extension ();
7478 }
43e9d192 7479 else if (selected_cpu)
95ca411e 7480 {
682287fb
JG
7481 const char *truncated_name
7482 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7483 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
7484 aarch64_print_extension ();
7485 }
43e9d192
IB
7486 default_file_start();
7487}
7488
7489/* Target hook for c_mode_for_suffix. */
7490static enum machine_mode
7491aarch64_c_mode_for_suffix (char suffix)
7492{
7493 if (suffix == 'q')
7494 return TFmode;
7495
7496 return VOIDmode;
7497}
7498
3520f7cc
JG
7499/* We can only represent floating point constants which will fit in
7500 "quarter-precision" values. These values are characterised by
7501 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7502 by:
7503
7504 (-1)^s * (n/16) * 2^r
7505
7506 Where:
7507 's' is the sign bit.
7508 'n' is an integer in the range 16 <= n <= 31.
7509 'r' is an integer in the range -3 <= r <= 4. */
7510
7511/* Return true iff X can be represented by a quarter-precision
7512 floating point immediate operand X. Note, we cannot represent 0.0. */
7513bool
7514aarch64_float_const_representable_p (rtx x)
7515{
7516 /* This represents our current view of how many bits
7517 make up the mantissa. */
7518 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7519 int exponent;
3520f7cc
JG
7520 unsigned HOST_WIDE_INT mantissa, mask;
7521 HOST_WIDE_INT m1, m2;
7522 REAL_VALUE_TYPE r, m;
7523
7524 if (!CONST_DOUBLE_P (x))
7525 return false;
7526
7527 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7528
7529 /* We cannot represent infinities, NaNs or +/-zero. We won't
7530 know if we have +zero until we analyse the mantissa, but we
7531 can reject the other invalid values. */
7532 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7533 || REAL_VALUE_MINUS_ZERO (r))
7534 return false;
7535
ba96cdfb 7536 /* Extract exponent. */
3520f7cc
JG
7537 r = real_value_abs (&r);
7538 exponent = REAL_EXP (&r);
7539
7540 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7541 highest (sign) bit, with a fixed binary point at bit point_pos.
7542 m1 holds the low part of the mantissa, m2 the high part.
7543 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7544 bits for the mantissa, this can fail (low bits will be lost). */
7545 real_ldexp (&m, &r, point_pos - exponent);
7546 REAL_VALUE_TO_INT (&m1, &m2, m);
7547
7548 /* If the low part of the mantissa has bits set we cannot represent
7549 the value. */
7550 if (m1 != 0)
7551 return false;
7552 /* We have rejected the lower HOST_WIDE_INT, so update our
7553 understanding of how many bits lie in the mantissa and
7554 look only at the high HOST_WIDE_INT. */
7555 mantissa = m2;
7556 point_pos -= HOST_BITS_PER_WIDE_INT;
7557
7558 /* We can only represent values with a mantissa of the form 1.xxxx. */
7559 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7560 if ((mantissa & mask) != 0)
7561 return false;
7562
7563 /* Having filtered unrepresentable values, we may now remove all
7564 but the highest 5 bits. */
7565 mantissa >>= point_pos - 5;
7566
7567 /* We cannot represent the value 0.0, so reject it. This is handled
7568 elsewhere. */
7569 if (mantissa == 0)
7570 return false;
7571
7572 /* Then, as bit 4 is always set, we can mask it off, leaving
7573 the mantissa in the range [0, 15]. */
7574 mantissa &= ~(1 << 4);
7575 gcc_assert (mantissa <= 15);
7576
7577 /* GCC internally does not use IEEE754-like encoding (where normalized
7578 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7579 Our mantissa values are shifted 4 places to the left relative to
7580 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7581 by 5 places to correct for GCC's representation. */
7582 exponent = 5 - exponent;
7583
7584 return (exponent >= 0 && exponent <= 7);
7585}
7586
7587char*
81c2dfb9 7588aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7589 enum machine_mode mode,
7590 unsigned width)
7591{
3ea63f60 7592 bool is_valid;
3520f7cc 7593 static char templ[40];
3520f7cc 7594 const char *mnemonic;
e4f0f84d 7595 const char *shift_op;
3520f7cc 7596 unsigned int lane_count = 0;
81c2dfb9 7597 char element_char;
3520f7cc 7598
e4f0f84d 7599 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
7600
7601 /* This will return true to show const_vector is legal for use as either
7602 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7603 also update INFO to show how the immediate should be generated. */
81c2dfb9 7604 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7605 gcc_assert (is_valid);
7606
81c2dfb9 7607 element_char = sizetochar (info.element_width);
48063b9d
IB
7608 lane_count = width / info.element_width;
7609
3520f7cc
JG
7610 mode = GET_MODE_INNER (mode);
7611 if (mode == SFmode || mode == DFmode)
7612 {
48063b9d
IB
7613 gcc_assert (info.shift == 0 && ! info.mvn);
7614 if (aarch64_float_const_zero_rtx_p (info.value))
7615 info.value = GEN_INT (0);
7616 else
7617 {
7618#define buf_size 20
7619 REAL_VALUE_TYPE r;
7620 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7621 char float_buf[buf_size] = {'\0'};
7622 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7623#undef buf_size
7624
7625 if (lane_count == 1)
7626 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7627 else
7628 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7629 lane_count, element_char, float_buf);
48063b9d
IB
7630 return templ;
7631 }
3520f7cc 7632 }
3520f7cc 7633
48063b9d 7634 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 7635 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
7636
7637 if (lane_count == 1)
48063b9d
IB
7638 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7639 mnemonic, UINTVAL (info.value));
7640 else if (info.shift)
7641 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
7642 ", %s %d", mnemonic, lane_count, element_char,
7643 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 7644 else
48063b9d 7645 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7646 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7647 return templ;
7648}
7649
b7342d25
IB
7650char*
7651aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7652 enum machine_mode mode)
7653{
7654 enum machine_mode vmode;
7655
7656 gcc_assert (!VECTOR_MODE_P (mode));
7657 vmode = aarch64_simd_container_mode (mode, 64);
7658 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7659 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7660}
7661
88b08073
JG
7662/* Split operands into moves from op[1] + op[2] into op[0]. */
7663
7664void
7665aarch64_split_combinev16qi (rtx operands[3])
7666{
7667 unsigned int dest = REGNO (operands[0]);
7668 unsigned int src1 = REGNO (operands[1]);
7669 unsigned int src2 = REGNO (operands[2]);
7670 enum machine_mode halfmode = GET_MODE (operands[1]);
7671 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7672 rtx destlo, desthi;
7673
7674 gcc_assert (halfmode == V16QImode);
7675
7676 if (src1 == dest && src2 == dest + halfregs)
7677 {
7678 /* No-op move. Can't split to nothing; emit something. */
7679 emit_note (NOTE_INSN_DELETED);
7680 return;
7681 }
7682
7683 /* Preserve register attributes for variable tracking. */
7684 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7685 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7686 GET_MODE_SIZE (halfmode));
7687
7688 /* Special case of reversed high/low parts. */
7689 if (reg_overlap_mentioned_p (operands[2], destlo)
7690 && reg_overlap_mentioned_p (operands[1], desthi))
7691 {
7692 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7693 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7694 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7695 }
7696 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7697 {
7698 /* Try to avoid unnecessary moves if part of the result
7699 is in the right place already. */
7700 if (src1 != dest)
7701 emit_move_insn (destlo, operands[1]);
7702 if (src2 != dest + halfregs)
7703 emit_move_insn (desthi, operands[2]);
7704 }
7705 else
7706 {
7707 if (src2 != dest + halfregs)
7708 emit_move_insn (desthi, operands[2]);
7709 if (src1 != dest)
7710 emit_move_insn (destlo, operands[1]);
7711 }
7712}
7713
7714/* vec_perm support. */
7715
7716#define MAX_VECT_LEN 16
7717
7718struct expand_vec_perm_d
7719{
7720 rtx target, op0, op1;
7721 unsigned char perm[MAX_VECT_LEN];
7722 enum machine_mode vmode;
7723 unsigned char nelt;
7724 bool one_vector_p;
7725 bool testing_p;
7726};
7727
7728/* Generate a variable permutation. */
7729
7730static void
7731aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7732{
7733 enum machine_mode vmode = GET_MODE (target);
7734 bool one_vector_p = rtx_equal_p (op0, op1);
7735
7736 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7737 gcc_checking_assert (GET_MODE (op0) == vmode);
7738 gcc_checking_assert (GET_MODE (op1) == vmode);
7739 gcc_checking_assert (GET_MODE (sel) == vmode);
7740 gcc_checking_assert (TARGET_SIMD);
7741
7742 if (one_vector_p)
7743 {
7744 if (vmode == V8QImode)
7745 {
7746 /* Expand the argument to a V16QI mode by duplicating it. */
7747 rtx pair = gen_reg_rtx (V16QImode);
7748 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7749 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7750 }
7751 else
7752 {
7753 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7754 }
7755 }
7756 else
7757 {
7758 rtx pair;
7759
7760 if (vmode == V8QImode)
7761 {
7762 pair = gen_reg_rtx (V16QImode);
7763 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7764 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7765 }
7766 else
7767 {
7768 pair = gen_reg_rtx (OImode);
7769 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7770 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7771 }
7772 }
7773}
7774
7775void
7776aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7777{
7778 enum machine_mode vmode = GET_MODE (target);
7779 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7780 bool one_vector_p = rtx_equal_p (op0, op1);
7781 rtx rmask[MAX_VECT_LEN], mask;
7782
7783 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7784
7785 /* The TBL instruction does not use a modulo index, so we must take care
7786 of that ourselves. */
7787 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7788 for (i = 0; i < nelt; ++i)
7789 rmask[i] = mask;
7790 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7791 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7792
7793 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7794}
7795
cc4d934f
JG
7796/* Recognize patterns suitable for the TRN instructions. */
7797static bool
7798aarch64_evpc_trn (struct expand_vec_perm_d *d)
7799{
7800 unsigned int i, odd, mask, nelt = d->nelt;
7801 rtx out, in0, in1, x;
7802 rtx (*gen) (rtx, rtx, rtx);
7803 enum machine_mode vmode = d->vmode;
7804
7805 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7806 return false;
7807
7808 /* Note that these are little-endian tests.
7809 We correct for big-endian later. */
7810 if (d->perm[0] == 0)
7811 odd = 0;
7812 else if (d->perm[0] == 1)
7813 odd = 1;
7814 else
7815 return false;
7816 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7817
7818 for (i = 0; i < nelt; i += 2)
7819 {
7820 if (d->perm[i] != i + odd)
7821 return false;
7822 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7823 return false;
7824 }
7825
7826 /* Success! */
7827 if (d->testing_p)
7828 return true;
7829
7830 in0 = d->op0;
7831 in1 = d->op1;
7832 if (BYTES_BIG_ENDIAN)
7833 {
7834 x = in0, in0 = in1, in1 = x;
7835 odd = !odd;
7836 }
7837 out = d->target;
7838
7839 if (odd)
7840 {
7841 switch (vmode)
7842 {
7843 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7844 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7845 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7846 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7847 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7848 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7849 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7850 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7851 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7852 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7853 default:
7854 return false;
7855 }
7856 }
7857 else
7858 {
7859 switch (vmode)
7860 {
7861 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7862 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7863 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7864 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7865 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7866 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7867 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7868 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7869 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7870 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7871 default:
7872 return false;
7873 }
7874 }
7875
7876 emit_insn (gen (out, in0, in1));
7877 return true;
7878}
7879
7880/* Recognize patterns suitable for the UZP instructions. */
7881static bool
7882aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7883{
7884 unsigned int i, odd, mask, nelt = d->nelt;
7885 rtx out, in0, in1, x;
7886 rtx (*gen) (rtx, rtx, rtx);
7887 enum machine_mode vmode = d->vmode;
7888
7889 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7890 return false;
7891
7892 /* Note that these are little-endian tests.
7893 We correct for big-endian later. */
7894 if (d->perm[0] == 0)
7895 odd = 0;
7896 else if (d->perm[0] == 1)
7897 odd = 1;
7898 else
7899 return false;
7900 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7901
7902 for (i = 0; i < nelt; i++)
7903 {
7904 unsigned elt = (i * 2 + odd) & mask;
7905 if (d->perm[i] != elt)
7906 return false;
7907 }
7908
7909 /* Success! */
7910 if (d->testing_p)
7911 return true;
7912
7913 in0 = d->op0;
7914 in1 = d->op1;
7915 if (BYTES_BIG_ENDIAN)
7916 {
7917 x = in0, in0 = in1, in1 = x;
7918 odd = !odd;
7919 }
7920 out = d->target;
7921
7922 if (odd)
7923 {
7924 switch (vmode)
7925 {
7926 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7927 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7928 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7929 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7930 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7931 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7932 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7933 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7934 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7935 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7936 default:
7937 return false;
7938 }
7939 }
7940 else
7941 {
7942 switch (vmode)
7943 {
7944 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7945 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7946 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7947 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7948 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7949 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7950 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7951 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7952 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7953 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7954 default:
7955 return false;
7956 }
7957 }
7958
7959 emit_insn (gen (out, in0, in1));
7960 return true;
7961}
7962
7963/* Recognize patterns suitable for the ZIP instructions. */
7964static bool
7965aarch64_evpc_zip (struct expand_vec_perm_d *d)
7966{
7967 unsigned int i, high, mask, nelt = d->nelt;
7968 rtx out, in0, in1, x;
7969 rtx (*gen) (rtx, rtx, rtx);
7970 enum machine_mode vmode = d->vmode;
7971
7972 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7973 return false;
7974
7975 /* Note that these are little-endian tests.
7976 We correct for big-endian later. */
7977 high = nelt / 2;
7978 if (d->perm[0] == high)
7979 /* Do Nothing. */
7980 ;
7981 else if (d->perm[0] == 0)
7982 high = 0;
7983 else
7984 return false;
7985 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7986
7987 for (i = 0; i < nelt / 2; i++)
7988 {
7989 unsigned elt = (i + high) & mask;
7990 if (d->perm[i * 2] != elt)
7991 return false;
7992 elt = (elt + nelt) & mask;
7993 if (d->perm[i * 2 + 1] != elt)
7994 return false;
7995 }
7996
7997 /* Success! */
7998 if (d->testing_p)
7999 return true;
8000
8001 in0 = d->op0;
8002 in1 = d->op1;
8003 if (BYTES_BIG_ENDIAN)
8004 {
8005 x = in0, in0 = in1, in1 = x;
8006 high = !high;
8007 }
8008 out = d->target;
8009
8010 if (high)
8011 {
8012 switch (vmode)
8013 {
8014 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8015 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8016 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8017 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8018 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8019 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8020 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8021 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8022 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8023 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8024 default:
8025 return false;
8026 }
8027 }
8028 else
8029 {
8030 switch (vmode)
8031 {
8032 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8033 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8034 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8035 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8036 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8037 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8038 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8039 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8040 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8041 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8042 default:
8043 return false;
8044 }
8045 }
8046
8047 emit_insn (gen (out, in0, in1));
8048 return true;
8049}
8050
91bd4114
JG
8051static bool
8052aarch64_evpc_dup (struct expand_vec_perm_d *d)
8053{
8054 rtx (*gen) (rtx, rtx, rtx);
8055 rtx out = d->target;
8056 rtx in0;
8057 enum machine_mode vmode = d->vmode;
8058 unsigned int i, elt, nelt = d->nelt;
8059 rtx lane;
8060
8061 /* TODO: This may not be big-endian safe. */
8062 if (BYTES_BIG_ENDIAN)
8063 return false;
8064
8065 elt = d->perm[0];
8066 for (i = 1; i < nelt; i++)
8067 {
8068 if (elt != d->perm[i])
8069 return false;
8070 }
8071
8072 /* The generic preparation in aarch64_expand_vec_perm_const_1
8073 swaps the operand order and the permute indices if it finds
8074 d->perm[0] to be in the second operand. Thus, we can always
8075 use d->op0 and need not do any extra arithmetic to get the
8076 correct lane number. */
8077 in0 = d->op0;
8078 lane = GEN_INT (elt);
8079
8080 switch (vmode)
8081 {
8082 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8083 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8084 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8085 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8086 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8087 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8088 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8089 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8090 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8091 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8092 default:
8093 return false;
8094 }
8095
8096 emit_insn (gen (out, in0, lane));
8097 return true;
8098}
8099
88b08073
JG
8100static bool
8101aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8102{
8103 rtx rperm[MAX_VECT_LEN], sel;
8104 enum machine_mode vmode = d->vmode;
8105 unsigned int i, nelt = d->nelt;
8106
8107 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8108 numbering of elements for big-endian, we must reverse the order. */
8109 if (BYTES_BIG_ENDIAN)
8110 return false;
8111
8112 if (d->testing_p)
8113 return true;
8114
8115 /* Generic code will try constant permutation twice. Once with the
8116 original mode and again with the elements lowered to QImode.
8117 So wait and don't do the selector expansion ourselves. */
8118 if (vmode != V8QImode && vmode != V16QImode)
8119 return false;
8120
8121 for (i = 0; i < nelt; ++i)
8122 rperm[i] = GEN_INT (d->perm[i]);
8123 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8124 sel = force_reg (vmode, sel);
8125
8126 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8127 return true;
8128}
8129
8130static bool
8131aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8132{
8133 /* The pattern matching functions above are written to look for a small
8134 number to begin the sequence (0, 1, N/2). If we begin with an index
8135 from the second operand, we can swap the operands. */
8136 if (d->perm[0] >= d->nelt)
8137 {
8138 unsigned i, nelt = d->nelt;
8139 rtx x;
8140
8141 for (i = 0; i < nelt; ++i)
8142 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8143
8144 x = d->op0;
8145 d->op0 = d->op1;
8146 d->op1 = x;
8147 }
8148
8149 if (TARGET_SIMD)
cc4d934f
JG
8150 {
8151 if (aarch64_evpc_zip (d))
8152 return true;
8153 else if (aarch64_evpc_uzp (d))
8154 return true;
8155 else if (aarch64_evpc_trn (d))
8156 return true;
91bd4114
JG
8157 else if (aarch64_evpc_dup (d))
8158 return true;
cc4d934f
JG
8159 return aarch64_evpc_tbl (d);
8160 }
88b08073
JG
8161 return false;
8162}
8163
8164/* Expand a vec_perm_const pattern. */
8165
8166bool
8167aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8168{
8169 struct expand_vec_perm_d d;
8170 int i, nelt, which;
8171
8172 d.target = target;
8173 d.op0 = op0;
8174 d.op1 = op1;
8175
8176 d.vmode = GET_MODE (target);
8177 gcc_assert (VECTOR_MODE_P (d.vmode));
8178 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8179 d.testing_p = false;
8180
8181 for (i = which = 0; i < nelt; ++i)
8182 {
8183 rtx e = XVECEXP (sel, 0, i);
8184 int ei = INTVAL (e) & (2 * nelt - 1);
8185 which |= (ei < nelt ? 1 : 2);
8186 d.perm[i] = ei;
8187 }
8188
8189 switch (which)
8190 {
8191 default:
8192 gcc_unreachable ();
8193
8194 case 3:
8195 d.one_vector_p = false;
8196 if (!rtx_equal_p (op0, op1))
8197 break;
8198
8199 /* The elements of PERM do not suggest that only the first operand
8200 is used, but both operands are identical. Allow easier matching
8201 of the permutation by folding the permutation into the single
8202 input vector. */
8203 /* Fall Through. */
8204 case 2:
8205 for (i = 0; i < nelt; ++i)
8206 d.perm[i] &= nelt - 1;
8207 d.op0 = op1;
8208 d.one_vector_p = true;
8209 break;
8210
8211 case 1:
8212 d.op1 = op0;
8213 d.one_vector_p = true;
8214 break;
8215 }
8216
8217 return aarch64_expand_vec_perm_const_1 (&d);
8218}
8219
8220static bool
8221aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8222 const unsigned char *sel)
8223{
8224 struct expand_vec_perm_d d;
8225 unsigned int i, nelt, which;
8226 bool ret;
8227
8228 d.vmode = vmode;
8229 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8230 d.testing_p = true;
8231 memcpy (d.perm, sel, nelt);
8232
8233 /* Calculate whether all elements are in one vector. */
8234 for (i = which = 0; i < nelt; ++i)
8235 {
8236 unsigned char e = d.perm[i];
8237 gcc_assert (e < 2 * nelt);
8238 which |= (e < nelt ? 1 : 2);
8239 }
8240
8241 /* If all elements are from the second vector, reindex as if from the
8242 first vector. */
8243 if (which == 2)
8244 for (i = 0; i < nelt; ++i)
8245 d.perm[i] -= nelt;
8246
8247 /* Check whether the mask can be applied to a single vector. */
8248 d.one_vector_p = (which != 3);
8249
8250 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8251 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8252 if (!d.one_vector_p)
8253 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8254
8255 start_sequence ();
8256 ret = aarch64_expand_vec_perm_const_1 (&d);
8257 end_sequence ();
8258
8259 return ret;
8260}
8261
69675d50
TB
8262/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8263bool
8264aarch64_cannot_change_mode_class (enum machine_mode from,
8265 enum machine_mode to,
8266 enum reg_class rclass)
8267{
8268 /* Full-reg subregs are allowed on general regs or any class if they are
8269 the same size. */
8270 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8271 || !reg_classes_intersect_p (FP_REGS, rclass))
8272 return false;
8273
8274 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8275 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8276 2. Scalar to Scalar for integer modes or same size float modes.
8277 3. Vector to Vector modes. */
8278 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8279 {
8280 if (aarch64_vector_mode_supported_p (from)
8281 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8282 return false;
8283
8284 if (GET_MODE_NUNITS (from) == 1
8285 && GET_MODE_NUNITS (to) == 1
8286 && (GET_MODE_CLASS (from) == MODE_INT
8287 || from == to))
8288 return false;
8289
8290 if (aarch64_vector_mode_supported_p (from)
8291 && aarch64_vector_mode_supported_p (to))
8292 return false;
8293 }
8294
8295 return true;
8296}
8297
43e9d192
IB
8298#undef TARGET_ADDRESS_COST
8299#define TARGET_ADDRESS_COST aarch64_address_cost
8300
8301/* This hook will determines whether unnamed bitfields affect the alignment
8302 of the containing structure. The hook returns true if the structure
8303 should inherit the alignment requirements of an unnamed bitfield's
8304 type. */
8305#undef TARGET_ALIGN_ANON_BITFIELD
8306#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8307
8308#undef TARGET_ASM_ALIGNED_DI_OP
8309#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8310
8311#undef TARGET_ASM_ALIGNED_HI_OP
8312#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8313
8314#undef TARGET_ASM_ALIGNED_SI_OP
8315#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8316
8317#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8318#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8319 hook_bool_const_tree_hwi_hwi_const_tree_true
8320
8321#undef TARGET_ASM_FILE_START
8322#define TARGET_ASM_FILE_START aarch64_start_file
8323
8324#undef TARGET_ASM_OUTPUT_MI_THUNK
8325#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8326
8327#undef TARGET_ASM_SELECT_RTX_SECTION
8328#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8329
8330#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8331#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8332
8333#undef TARGET_BUILD_BUILTIN_VA_LIST
8334#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8335
8336#undef TARGET_CALLEE_COPIES
8337#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8338
8339#undef TARGET_CAN_ELIMINATE
8340#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8341
8342#undef TARGET_CANNOT_FORCE_CONST_MEM
8343#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8344
8345#undef TARGET_CONDITIONAL_REGISTER_USAGE
8346#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8347
8348/* Only the least significant bit is used for initialization guard
8349 variables. */
8350#undef TARGET_CXX_GUARD_MASK_BIT
8351#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8352
8353#undef TARGET_C_MODE_FOR_SUFFIX
8354#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8355
8356#ifdef TARGET_BIG_ENDIAN_DEFAULT
8357#undef TARGET_DEFAULT_TARGET_FLAGS
8358#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8359#endif
8360
8361#undef TARGET_CLASS_MAX_NREGS
8362#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8363
119103ca
JG
8364#undef TARGET_BUILTIN_DECL
8365#define TARGET_BUILTIN_DECL aarch64_builtin_decl
8366
43e9d192
IB
8367#undef TARGET_EXPAND_BUILTIN
8368#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8369
8370#undef TARGET_EXPAND_BUILTIN_VA_START
8371#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8372
9697e620
JG
8373#undef TARGET_FOLD_BUILTIN
8374#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8375
43e9d192
IB
8376#undef TARGET_FUNCTION_ARG
8377#define TARGET_FUNCTION_ARG aarch64_function_arg
8378
8379#undef TARGET_FUNCTION_ARG_ADVANCE
8380#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8381
8382#undef TARGET_FUNCTION_ARG_BOUNDARY
8383#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8384
8385#undef TARGET_FUNCTION_OK_FOR_SIBCALL
8386#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8387
8388#undef TARGET_FUNCTION_VALUE
8389#define TARGET_FUNCTION_VALUE aarch64_function_value
8390
8391#undef TARGET_FUNCTION_VALUE_REGNO_P
8392#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8393
8394#undef TARGET_FRAME_POINTER_REQUIRED
8395#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8396
0ac198d3
JG
8397#undef TARGET_GIMPLE_FOLD_BUILTIN
8398#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8399
43e9d192
IB
8400#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8401#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8402
8403#undef TARGET_INIT_BUILTINS
8404#define TARGET_INIT_BUILTINS aarch64_init_builtins
8405
8406#undef TARGET_LEGITIMATE_ADDRESS_P
8407#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8408
8409#undef TARGET_LEGITIMATE_CONSTANT_P
8410#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8411
8412#undef TARGET_LIBGCC_CMP_RETURN_MODE
8413#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8414
38e8f663
YR
8415#undef TARGET_LRA_P
8416#define TARGET_LRA_P aarch64_lra_p
8417
ac2b960f
YZ
8418#undef TARGET_MANGLE_TYPE
8419#define TARGET_MANGLE_TYPE aarch64_mangle_type
8420
43e9d192
IB
8421#undef TARGET_MEMORY_MOVE_COST
8422#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8423
8424#undef TARGET_MUST_PASS_IN_STACK
8425#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8426
8427/* This target hook should return true if accesses to volatile bitfields
8428 should use the narrowest mode possible. It should return false if these
8429 accesses should use the bitfield container type. */
8430#undef TARGET_NARROW_VOLATILE_BITFIELD
8431#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8432
8433#undef TARGET_OPTION_OVERRIDE
8434#define TARGET_OPTION_OVERRIDE aarch64_override_options
8435
8436#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8437#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8438 aarch64_override_options_after_change
8439
8440#undef TARGET_PASS_BY_REFERENCE
8441#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8442
8443#undef TARGET_PREFERRED_RELOAD_CLASS
8444#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8445
8446#undef TARGET_SECONDARY_RELOAD
8447#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8448
8449#undef TARGET_SHIFT_TRUNCATION_MASK
8450#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8451
8452#undef TARGET_SETUP_INCOMING_VARARGS
8453#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8454
8455#undef TARGET_STRUCT_VALUE_RTX
8456#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8457
8458#undef TARGET_REGISTER_MOVE_COST
8459#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8460
8461#undef TARGET_RETURN_IN_MEMORY
8462#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8463
8464#undef TARGET_RETURN_IN_MSB
8465#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8466
8467#undef TARGET_RTX_COSTS
8468#define TARGET_RTX_COSTS aarch64_rtx_costs
8469
d126a4ae
AP
8470#undef TARGET_SCHED_ISSUE_RATE
8471#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8472
43e9d192
IB
8473#undef TARGET_TRAMPOLINE_INIT
8474#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8475
8476#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8477#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8478
8479#undef TARGET_VECTOR_MODE_SUPPORTED_P
8480#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8481
8482#undef TARGET_ARRAY_MODE_SUPPORTED_P
8483#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8484
8990e73a
TB
8485#undef TARGET_VECTORIZE_ADD_STMT_COST
8486#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8487
8488#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8489#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8490 aarch64_builtin_vectorization_cost
8491
43e9d192
IB
8492#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8493#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8494
42fc9a7f
JG
8495#undef TARGET_VECTORIZE_BUILTINS
8496#define TARGET_VECTORIZE_BUILTINS
8497
8498#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8499#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8500 aarch64_builtin_vectorized_function
8501
3b357264
JG
8502#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8503#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8504 aarch64_autovectorize_vector_sizes
8505
43e9d192
IB
8506/* Section anchor support. */
8507
8508#undef TARGET_MIN_ANCHOR_OFFSET
8509#define TARGET_MIN_ANCHOR_OFFSET -256
8510
8511/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8512 byte offset; we can do much more for larger data types, but have no way
8513 to determine the size of the access. We assume accesses are aligned. */
8514#undef TARGET_MAX_ANCHOR_OFFSET
8515#define TARGET_MAX_ANCHOR_OFFSET 4095
8516
db0253a4
TB
8517#undef TARGET_VECTOR_ALIGNMENT
8518#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8519
8520#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8521#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8522 aarch64_simd_vector_alignment_reachable
8523
88b08073
JG
8524/* vec_perm support. */
8525
8526#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8527#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8528 aarch64_vectorize_vec_perm_const_ok
8529
70f09188 8530
706b2314 8531#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8532#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8533
43e9d192
IB
8534struct gcc_target targetm = TARGET_INITIALIZER;
8535
8536#include "gt-aarch64.h"