]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] [-mtune cleanup 1/5] Remove -march=generic.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
d1e082c2 2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
29#include "regs.h"
30#include "df.h"
31#include "hard-reg-set.h"
32#include "output.h"
33#include "expr.h"
34#include "reload.h"
35#include "toplev.h"
36#include "target.h"
37#include "target-def.h"
38#include "targhooks.h"
39#include "ggc.h"
40#include "function.h"
41#include "tm_p.h"
42#include "recog.h"
43#include "langhooks.h"
44#include "diagnostic-core.h"
45b0be94 45#include "gimplify.h"
43e9d192
IB
46#include "optabs.h"
47#include "dwarf2.h"
8990e73a
TB
48#include "cfgloop.h"
49#include "tree-vectorizer.h"
73250c4c 50#include "config/arm/aarch-cost-tables.h"
43e9d192 51
28514dda
YZ
52/* Defined for convenience. */
53#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
54
43e9d192
IB
55/* Classifies an address.
56
57 ADDRESS_REG_IMM
58 A simple base register plus immediate offset.
59
60 ADDRESS_REG_WB
61 A base register indexed by immediate offset with writeback.
62
63 ADDRESS_REG_REG
64 A base register indexed by (optionally scaled) register.
65
66 ADDRESS_REG_UXTW
67 A base register indexed by (optionally scaled) zero-extended register.
68
69 ADDRESS_REG_SXTW
70 A base register indexed by (optionally scaled) sign-extended register.
71
72 ADDRESS_LO_SUM
73 A LO_SUM rtx with a base register and "LO12" symbol relocation.
74
75 ADDRESS_SYMBOLIC:
76 A constant symbolic address, in pc-relative literal pool. */
77
78enum aarch64_address_type {
79 ADDRESS_REG_IMM,
80 ADDRESS_REG_WB,
81 ADDRESS_REG_REG,
82 ADDRESS_REG_UXTW,
83 ADDRESS_REG_SXTW,
84 ADDRESS_LO_SUM,
85 ADDRESS_SYMBOLIC
86};
87
88struct aarch64_address_info {
89 enum aarch64_address_type type;
90 rtx base;
91 rtx offset;
92 int shift;
93 enum aarch64_symbol_type symbol_type;
94};
95
48063b9d
IB
96struct simd_immediate_info
97{
98 rtx value;
99 int shift;
100 int element_width;
48063b9d 101 bool mvn;
e4f0f84d 102 bool msl;
48063b9d
IB
103};
104
43e9d192
IB
105/* The current code model. */
106enum aarch64_code_model aarch64_cmodel;
107
108#ifdef HAVE_AS_TLS
109#undef TARGET_HAVE_TLS
110#define TARGET_HAVE_TLS 1
111#endif
112
38e8f663 113static bool aarch64_lra_p (void);
43e9d192
IB
114static bool aarch64_composite_type_p (const_tree, enum machine_mode);
115static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
116 const_tree,
117 enum machine_mode *, int *,
118 bool *);
119static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
120static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 121static void aarch64_override_options_after_change (void);
43e9d192
IB
122static bool aarch64_vector_mode_supported_p (enum machine_mode);
123static unsigned bit_count (unsigned HOST_WIDE_INT);
124static bool aarch64_const_vec_all_same_int_p (rtx,
125 HOST_WIDE_INT, HOST_WIDE_INT);
126
88b08073
JG
127static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
128 const unsigned char *sel);
129
43e9d192
IB
130/* The processor for which instructions should be scheduled. */
131enum aarch64_processor aarch64_tune = generic;
132
133/* The current tuning set. */
134const struct tune_params *aarch64_tune_params;
135
136/* Mask to specify which instructions we are allowed to generate. */
137unsigned long aarch64_isa_flags = 0;
138
139/* Mask to specify which instruction scheduling options should be used. */
140unsigned long aarch64_tune_flags = 0;
141
142/* Tuning parameters. */
143
144#if HAVE_DESIGNATED_INITIALIZERS
145#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
146#else
147#define NAMED_PARAM(NAME, VAL) (VAL)
148#endif
149
150#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
151__extension__
152#endif
43e9d192
IB
153
154#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
155__extension__
156#endif
157static const struct cpu_addrcost_table generic_addrcost_table =
158{
159 NAMED_PARAM (pre_modify, 0),
160 NAMED_PARAM (post_modify, 0),
161 NAMED_PARAM (register_offset, 0),
162 NAMED_PARAM (register_extend, 0),
163 NAMED_PARAM (imm_offset, 0)
164};
165
166#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
167__extension__
168#endif
169static const struct cpu_regmove_cost generic_regmove_cost =
170{
171 NAMED_PARAM (GP2GP, 1),
172 NAMED_PARAM (GP2FP, 2),
173 NAMED_PARAM (FP2GP, 2),
174 /* We currently do not provide direct support for TFmode Q->Q move.
175 Therefore we need to raise the cost above 2 in order to have
176 reload handle the situation. */
177 NAMED_PARAM (FP2FP, 4)
178};
179
8990e73a
TB
180/* Generic costs for vector insn classes. */
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct cpu_vector_cost generic_vector_cost =
185{
186 NAMED_PARAM (scalar_stmt_cost, 1),
187 NAMED_PARAM (scalar_load_cost, 1),
188 NAMED_PARAM (scalar_store_cost, 1),
189 NAMED_PARAM (vec_stmt_cost, 1),
190 NAMED_PARAM (vec_to_scalar_cost, 1),
191 NAMED_PARAM (scalar_to_vec_cost, 1),
192 NAMED_PARAM (vec_align_load_cost, 1),
193 NAMED_PARAM (vec_unalign_load_cost, 1),
194 NAMED_PARAM (vec_unalign_store_cost, 1),
195 NAMED_PARAM (vec_store_cost, 1),
196 NAMED_PARAM (cond_taken_branch_cost, 3),
197 NAMED_PARAM (cond_not_taken_branch_cost, 1)
198};
199
43e9d192
IB
200#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
201__extension__
202#endif
203static const struct tune_params generic_tunings =
204{
73250c4c 205 &generic_extra_costs,
43e9d192
IB
206 &generic_addrcost_table,
207 &generic_regmove_cost,
8990e73a 208 &generic_vector_cost,
43e9d192
IB
209 NAMED_PARAM (memmov_cost, 4)
210};
211
212/* A processor implementing AArch64. */
213struct processor
214{
215 const char *const name;
216 enum aarch64_processor core;
217 const char *arch;
218 const unsigned long flags;
219 const struct tune_params *const tune;
220};
221
222/* Processor cores implementing AArch64. */
223static const struct processor all_cores[] =
224{
225#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
226 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
227#include "aarch64-cores.def"
228#undef AARCH64_CORE
229 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
230 {NULL, aarch64_none, NULL, 0, NULL}
231};
232
233/* Architectures implementing AArch64. */
234static const struct processor all_architectures[] =
235{
236#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
237 {NAME, CORE, #ARCH, FLAGS, NULL},
238#include "aarch64-arches.def"
239#undef AARCH64_ARCH
43e9d192
IB
240 {NULL, aarch64_none, NULL, 0, NULL}
241};
242
243/* Target specification. These are populated as commandline arguments
244 are processed, or NULL if not specified. */
245static const struct processor *selected_arch;
246static const struct processor *selected_cpu;
247static const struct processor *selected_tune;
248
249#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
250
251/* An ISA extension in the co-processor and main instruction set space. */
252struct aarch64_option_extension
253{
254 const char *const name;
255 const unsigned long flags_on;
256 const unsigned long flags_off;
257};
258
259/* ISA extensions in AArch64. */
260static const struct aarch64_option_extension all_extensions[] =
261{
262#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
263 {NAME, FLAGS_ON, FLAGS_OFF},
264#include "aarch64-option-extensions.def"
265#undef AARCH64_OPT_EXTENSION
266 {NULL, 0, 0}
267};
268
269/* Used to track the size of an address when generating a pre/post
270 increment address. */
271static enum machine_mode aarch64_memory_reference_mode;
272
273/* Used to force GTY into this file. */
274static GTY(()) int gty_dummy;
275
276/* A table of valid AArch64 "bitmask immediate" values for
277 logical instructions. */
278
279#define AARCH64_NUM_BITMASKS 5334
280static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
281
282/* Did we set flag_omit_frame_pointer just so
283 aarch64_frame_pointer_required would be called? */
284static bool faked_omit_frame_pointer;
285
286typedef enum aarch64_cond_code
287{
288 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
289 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
290 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
291}
292aarch64_cc;
293
294#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
295
296/* The condition codes of the processor, and the inverse function. */
297static const char * const aarch64_condition_codes[] =
298{
299 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
300 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
301};
302
303/* Provide a mapping from gcc register numbers to dwarf register numbers. */
304unsigned
305aarch64_dbx_register_number (unsigned regno)
306{
307 if (GP_REGNUM_P (regno))
308 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
309 else if (regno == SP_REGNUM)
310 return AARCH64_DWARF_SP;
311 else if (FP_REGNUM_P (regno))
312 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
313
314 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
315 equivalent DWARF register. */
316 return DWARF_FRAME_REGISTERS;
317}
318
319/* Return TRUE if MODE is any of the large INT modes. */
320static bool
321aarch64_vect_struct_mode_p (enum machine_mode mode)
322{
323 return mode == OImode || mode == CImode || mode == XImode;
324}
325
326/* Return TRUE if MODE is any of the vector modes. */
327static bool
328aarch64_vector_mode_p (enum machine_mode mode)
329{
330 return aarch64_vector_mode_supported_p (mode)
331 || aarch64_vect_struct_mode_p (mode);
332}
333
334/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
335static bool
336aarch64_array_mode_supported_p (enum machine_mode mode,
337 unsigned HOST_WIDE_INT nelems)
338{
339 if (TARGET_SIMD
340 && AARCH64_VALID_SIMD_QREG_MODE (mode)
341 && (nelems >= 2 && nelems <= 4))
342 return true;
343
344 return false;
345}
346
347/* Implement HARD_REGNO_NREGS. */
348
349int
350aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
351{
352 switch (aarch64_regno_regclass (regno))
353 {
354 case FP_REGS:
355 case FP_LO_REGS:
356 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
357 default:
358 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
359 }
360 gcc_unreachable ();
361}
362
363/* Implement HARD_REGNO_MODE_OK. */
364
365int
366aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
367{
368 if (GET_MODE_CLASS (mode) == MODE_CC)
369 return regno == CC_REGNUM;
370
9259db42
YZ
371 if (regno == SP_REGNUM)
372 /* The purpose of comparing with ptr_mode is to support the
373 global register variable associated with the stack pointer
374 register via the syntax of asm ("wsp") in ILP32. */
375 return mode == Pmode || mode == ptr_mode;
376
377 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
378 return mode == Pmode;
379
380 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
381 return 1;
382
383 if (FP_REGNUM_P (regno))
384 {
385 if (aarch64_vect_struct_mode_p (mode))
386 return
387 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
388 else
389 return 1;
390 }
391
392 return 0;
393}
394
395/* Return true if calls to DECL should be treated as
396 long-calls (ie called via a register). */
397static bool
398aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
399{
400 return false;
401}
402
403/* Return true if calls to symbol-ref SYM should be treated as
404 long-calls (ie called via a register). */
405bool
406aarch64_is_long_call_p (rtx sym)
407{
408 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
409}
410
411/* Return true if the offsets to a zero/sign-extract operation
412 represent an expression that matches an extend operation. The
413 operands represent the paramters from
414
415 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
416bool
417aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
418 rtx extract_imm)
419{
420 HOST_WIDE_INT mult_val, extract_val;
421
422 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
423 return false;
424
425 mult_val = INTVAL (mult_imm);
426 extract_val = INTVAL (extract_imm);
427
428 if (extract_val > 8
429 && extract_val < GET_MODE_BITSIZE (mode)
430 && exact_log2 (extract_val & ~7) > 0
431 && (extract_val & 7) <= 4
432 && mult_val == (1 << (extract_val & 7)))
433 return true;
434
435 return false;
436}
437
438/* Emit an insn that's a simple single-set. Both the operands must be
439 known to be valid. */
440inline static rtx
441emit_set_insn (rtx x, rtx y)
442{
443 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
444}
445
446/* X and Y are two things to compare using CODE. Emit the compare insn and
447 return the rtx for register 0 in the proper mode. */
448rtx
449aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
450{
451 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
452 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
453
454 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
455 return cc_reg;
456}
457
458/* Build the SYMBOL_REF for __tls_get_addr. */
459
460static GTY(()) rtx tls_get_addr_libfunc;
461
462rtx
463aarch64_tls_get_addr (void)
464{
465 if (!tls_get_addr_libfunc)
466 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
467 return tls_get_addr_libfunc;
468}
469
470/* Return the TLS model to use for ADDR. */
471
472static enum tls_model
473tls_symbolic_operand_type (rtx addr)
474{
475 enum tls_model tls_kind = TLS_MODEL_NONE;
476 rtx sym, addend;
477
478 if (GET_CODE (addr) == CONST)
479 {
480 split_const (addr, &sym, &addend);
481 if (GET_CODE (sym) == SYMBOL_REF)
482 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
483 }
484 else if (GET_CODE (addr) == SYMBOL_REF)
485 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
486
487 return tls_kind;
488}
489
490/* We'll allow lo_sum's in addresses in our legitimate addresses
491 so that combine would take care of combining addresses where
492 necessary, but for generation purposes, we'll generate the address
493 as :
494 RTL Absolute
495 tmp = hi (symbol_ref); adrp x1, foo
496 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
497 nop
498
499 PIC TLS
500 adrp x1, :got:foo adrp tmp, :tlsgd:foo
501 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
502 bl __tls_get_addr
503 nop
504
505 Load TLS symbol, depending on TLS mechanism and TLS access model.
506
507 Global Dynamic - Traditional TLS:
508 adrp tmp, :tlsgd:imm
509 add dest, tmp, #:tlsgd_lo12:imm
510 bl __tls_get_addr
511
512 Global Dynamic - TLS Descriptors:
513 adrp dest, :tlsdesc:imm
514 ldr tmp, [dest, #:tlsdesc_lo12:imm]
515 add dest, dest, #:tlsdesc_lo12:imm
516 blr tmp
517 mrs tp, tpidr_el0
518 add dest, dest, tp
519
520 Initial Exec:
521 mrs tp, tpidr_el0
522 adrp tmp, :gottprel:imm
523 ldr dest, [tmp, #:gottprel_lo12:imm]
524 add dest, dest, tp
525
526 Local Exec:
527 mrs tp, tpidr_el0
528 add t0, tp, #:tprel_hi12:imm
529 add t0, #:tprel_lo12_nc:imm
530*/
531
532static void
533aarch64_load_symref_appropriately (rtx dest, rtx imm,
534 enum aarch64_symbol_type type)
535{
536 switch (type)
537 {
538 case SYMBOL_SMALL_ABSOLUTE:
539 {
28514dda 540 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 541 rtx tmp_reg = dest;
28514dda
YZ
542 enum machine_mode mode = GET_MODE (dest);
543
544 gcc_assert (mode == Pmode || mode == ptr_mode);
545
43e9d192 546 if (can_create_pseudo_p ())
28514dda 547 tmp_reg = gen_reg_rtx (mode);
43e9d192 548
28514dda 549 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
550 emit_insn (gen_add_losym (dest, tmp_reg, imm));
551 return;
552 }
553
a5350ddc
CSS
554 case SYMBOL_TINY_ABSOLUTE:
555 emit_insn (gen_rtx_SET (Pmode, dest, imm));
556 return;
557
43e9d192
IB
558 case SYMBOL_SMALL_GOT:
559 {
28514dda
YZ
560 /* In ILP32, the mode of dest can be either SImode or DImode,
561 while the got entry is always of SImode size. The mode of
562 dest depends on how dest is used: if dest is assigned to a
563 pointer (e.g. in the memory), it has SImode; it may have
564 DImode if dest is dereferenced to access the memeory.
565 This is why we have to handle three different ldr_got_small
566 patterns here (two patterns for ILP32). */
43e9d192 567 rtx tmp_reg = dest;
28514dda
YZ
568 enum machine_mode mode = GET_MODE (dest);
569
43e9d192 570 if (can_create_pseudo_p ())
28514dda
YZ
571 tmp_reg = gen_reg_rtx (mode);
572
573 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
574 if (mode == ptr_mode)
575 {
576 if (mode == DImode)
577 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
578 else
579 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
580 }
581 else
582 {
583 gcc_assert (mode == Pmode);
584 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
585 }
586
43e9d192
IB
587 return;
588 }
589
590 case SYMBOL_SMALL_TLSGD:
591 {
592 rtx insns;
593 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
594
595 start_sequence ();
596 emit_call_insn (gen_tlsgd_small (result, imm));
597 insns = get_insns ();
598 end_sequence ();
599
600 RTL_CONST_CALL_P (insns) = 1;
601 emit_libcall_block (insns, dest, result, imm);
602 return;
603 }
604
605 case SYMBOL_SMALL_TLSDESC:
606 {
607 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
608 rtx tp;
609
610 emit_insn (gen_tlsdesc_small (imm));
611 tp = aarch64_load_tp (NULL);
612 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
613 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
614 return;
615 }
616
617 case SYMBOL_SMALL_GOTTPREL:
618 {
619 rtx tmp_reg = gen_reg_rtx (Pmode);
620 rtx tp = aarch64_load_tp (NULL);
621 emit_insn (gen_tlsie_small (tmp_reg, imm));
622 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
623 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
624 return;
625 }
626
627 case SYMBOL_SMALL_TPREL:
628 {
629 rtx tp = aarch64_load_tp (NULL);
630 emit_insn (gen_tlsle_small (dest, tp, imm));
631 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
632 return;
633 }
634
87dd8ab0
MS
635 case SYMBOL_TINY_GOT:
636 emit_insn (gen_ldr_got_tiny (dest, imm));
637 return;
638
43e9d192
IB
639 default:
640 gcc_unreachable ();
641 }
642}
643
644/* Emit a move from SRC to DEST. Assume that the move expanders can
645 handle all moves if !can_create_pseudo_p (). The distinction is
646 important because, unlike emit_move_insn, the move expanders know
647 how to force Pmode objects into the constant pool even when the
648 constant pool address is not itself legitimate. */
649static rtx
650aarch64_emit_move (rtx dest, rtx src)
651{
652 return (can_create_pseudo_p ()
653 ? emit_move_insn (dest, src)
654 : emit_move_insn_1 (dest, src));
655}
656
657void
658aarch64_split_128bit_move (rtx dst, rtx src)
659{
660 rtx low_dst;
661
12dc6974
SN
662 enum machine_mode src_mode = GET_MODE (src);
663 enum machine_mode dst_mode = GET_MODE (dst);
664 int src_regno = REGNO (src);
665 int dst_regno = REGNO (dst);
666
667 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
43e9d192
IB
668
669 if (REG_P (dst) && REG_P (src))
670 {
12dc6974 671 gcc_assert (src_mode == TImode || src_mode == TFmode);
43e9d192
IB
672
673 /* Handle r -> w, w -> r. */
674 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
675 {
12dc6974
SN
676 switch (src_mode) {
677 case TImode:
678 emit_insn
679 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
680 emit_insn
681 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
682 return;
683 case TFmode:
684 emit_insn
685 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
686 emit_insn
687 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
688 return;
689 default:
690 gcc_unreachable ();
691 }
43e9d192
IB
692 }
693 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
694 {
12dc6974
SN
695 switch (src_mode) {
696 case TImode:
697 emit_insn
698 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
699 emit_insn
700 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
701 return;
702 case TFmode:
703 emit_insn
704 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
705 emit_insn
706 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
707 return;
708 default:
709 gcc_unreachable ();
710 }
43e9d192
IB
711 }
712 /* Fall through to r -> r cases. */
713 }
714
12dc6974
SN
715 switch (dst_mode) {
716 case TImode:
717 low_dst = gen_lowpart (word_mode, dst);
718 if (REG_P (low_dst)
719 && reg_overlap_mentioned_p (low_dst, src))
720 {
721 aarch64_emit_move (gen_highpart (word_mode, dst),
722 gen_highpart_mode (word_mode, TImode, src));
723 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
724 }
725 else
726 {
727 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
728 aarch64_emit_move (gen_highpart (word_mode, dst),
729 gen_highpart_mode (word_mode, TImode, src));
730 }
731 return;
732 case TFmode:
733 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
734 gen_rtx_REG (DFmode, src_regno));
735 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
736 gen_rtx_REG (DFmode, src_regno + 1));
737 return;
738 default:
739 gcc_unreachable ();
740 }
43e9d192
IB
741}
742
743bool
744aarch64_split_128bit_move_p (rtx dst, rtx src)
745{
746 return (! REG_P (src)
747 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
748}
749
8b033a8a
SN
750/* Split a complex SIMD combine. */
751
752void
753aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
754{
755 enum machine_mode src_mode = GET_MODE (src1);
756 enum machine_mode dst_mode = GET_MODE (dst);
757
758 gcc_assert (VECTOR_MODE_P (dst_mode));
759
760 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
761 {
762 rtx (*gen) (rtx, rtx, rtx);
763
764 switch (src_mode)
765 {
766 case V8QImode:
767 gen = gen_aarch64_simd_combinev8qi;
768 break;
769 case V4HImode:
770 gen = gen_aarch64_simd_combinev4hi;
771 break;
772 case V2SImode:
773 gen = gen_aarch64_simd_combinev2si;
774 break;
775 case V2SFmode:
776 gen = gen_aarch64_simd_combinev2sf;
777 break;
778 case DImode:
779 gen = gen_aarch64_simd_combinedi;
780 break;
781 case DFmode:
782 gen = gen_aarch64_simd_combinedf;
783 break;
784 default:
785 gcc_unreachable ();
786 }
787
788 emit_insn (gen (dst, src1, src2));
789 return;
790 }
791}
792
fd4842cd
SN
793/* Split a complex SIMD move. */
794
795void
796aarch64_split_simd_move (rtx dst, rtx src)
797{
798 enum machine_mode src_mode = GET_MODE (src);
799 enum machine_mode dst_mode = GET_MODE (dst);
800
801 gcc_assert (VECTOR_MODE_P (dst_mode));
802
803 if (REG_P (dst) && REG_P (src))
804 {
c59b7e28
SN
805 rtx (*gen) (rtx, rtx);
806
fd4842cd
SN
807 gcc_assert (VECTOR_MODE_P (src_mode));
808
809 switch (src_mode)
810 {
811 case V16QImode:
c59b7e28 812 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
813 break;
814 case V8HImode:
c59b7e28 815 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
816 break;
817 case V4SImode:
c59b7e28 818 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
819 break;
820 case V2DImode:
c59b7e28 821 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
822 break;
823 case V4SFmode:
c59b7e28 824 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
825 break;
826 case V2DFmode:
c59b7e28 827 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
828 break;
829 default:
830 gcc_unreachable ();
831 }
c59b7e28
SN
832
833 emit_insn (gen (dst, src));
fd4842cd
SN
834 return;
835 }
836}
837
43e9d192 838static rtx
e18b4a81 839aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
840{
841 if (can_create_pseudo_p ())
e18b4a81 842 return force_reg (mode, value);
43e9d192
IB
843 else
844 {
845 x = aarch64_emit_move (x, value);
846 return x;
847 }
848}
849
850
851static rtx
852aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
853{
9c023bf0 854 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
855 {
856 rtx high;
857 /* Load the full offset into a register. This
858 might be improvable in the future. */
859 high = GEN_INT (offset);
860 offset = 0;
e18b4a81
YZ
861 high = aarch64_force_temporary (mode, temp, high);
862 reg = aarch64_force_temporary (mode, temp,
863 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
864 }
865 return plus_constant (mode, reg, offset);
866}
867
868void
869aarch64_expand_mov_immediate (rtx dest, rtx imm)
870{
871 enum machine_mode mode = GET_MODE (dest);
872 unsigned HOST_WIDE_INT mask;
873 int i;
874 bool first;
875 unsigned HOST_WIDE_INT val;
876 bool subtargets;
877 rtx subtarget;
878 int one_match, zero_match;
879
880 gcc_assert (mode == SImode || mode == DImode);
881
882 /* Check on what type of symbol it is. */
883 if (GET_CODE (imm) == SYMBOL_REF
884 || GET_CODE (imm) == LABEL_REF
885 || GET_CODE (imm) == CONST)
886 {
887 rtx mem, base, offset;
888 enum aarch64_symbol_type sty;
889
890 /* If we have (const (plus symbol offset)), separate out the offset
891 before we start classifying the symbol. */
892 split_const (imm, &base, &offset);
893
894 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
895 switch (sty)
896 {
897 case SYMBOL_FORCE_TO_MEM:
898 if (offset != const0_rtx
899 && targetm.cannot_force_const_mem (mode, imm))
900 {
901 gcc_assert(can_create_pseudo_p ());
e18b4a81 902 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
903 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
904 aarch64_emit_move (dest, base);
905 return;
906 }
28514dda 907 mem = force_const_mem (ptr_mode, imm);
43e9d192 908 gcc_assert (mem);
28514dda
YZ
909 if (mode != ptr_mode)
910 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
911 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
912 return;
913
914 case SYMBOL_SMALL_TLSGD:
915 case SYMBOL_SMALL_TLSDESC:
916 case SYMBOL_SMALL_GOTTPREL:
917 case SYMBOL_SMALL_GOT:
87dd8ab0 918 case SYMBOL_TINY_GOT:
43e9d192
IB
919 if (offset != const0_rtx)
920 {
921 gcc_assert(can_create_pseudo_p ());
e18b4a81 922 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
923 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
924 aarch64_emit_move (dest, base);
925 return;
926 }
927 /* FALLTHRU */
928
929 case SYMBOL_SMALL_TPREL:
930 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 931 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
932 aarch64_load_symref_appropriately (dest, imm, sty);
933 return;
934
935 default:
936 gcc_unreachable ();
937 }
938 }
939
940 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
941 {
942 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
943 return;
944 }
945
946 if (!CONST_INT_P (imm))
947 {
948 if (GET_CODE (imm) == HIGH)
949 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
950 else
951 {
952 rtx mem = force_const_mem (mode, imm);
953 gcc_assert (mem);
954 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
955 }
956
957 return;
958 }
959
960 if (mode == SImode)
961 {
962 /* We know we can't do this in 1 insn, and we must be able to do it
963 in two; so don't mess around looking for sequences that don't buy
964 us anything. */
965 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
966 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
967 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
968 return;
969 }
970
971 /* Remaining cases are all for DImode. */
972
973 val = INTVAL (imm);
974 subtargets = optimize && can_create_pseudo_p ();
975
976 one_match = 0;
977 zero_match = 0;
978 mask = 0xffff;
979
980 for (i = 0; i < 64; i += 16, mask <<= 16)
981 {
982 if ((val & mask) == 0)
983 zero_match++;
984 else if ((val & mask) == mask)
985 one_match++;
986 }
987
988 if (one_match == 2)
989 {
990 mask = 0xffff;
991 for (i = 0; i < 64; i += 16, mask <<= 16)
992 {
993 if ((val & mask) != mask)
994 {
995 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
996 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
997 GEN_INT ((val >> i) & 0xffff)));
998 return;
999 }
1000 }
1001 gcc_unreachable ();
1002 }
1003
1004 if (zero_match == 2)
1005 goto simple_sequence;
1006
1007 mask = 0x0ffff0000UL;
1008 for (i = 16; i < 64; i += 16, mask <<= 16)
1009 {
1010 HOST_WIDE_INT comp = mask & ~(mask - 1);
1011
1012 if (aarch64_uimm12_shift (val - (val & mask)))
1013 {
1014 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1015
1016 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1017 emit_insn (gen_adddi3 (dest, subtarget,
1018 GEN_INT (val - (val & mask))));
1019 return;
1020 }
1021 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1022 {
1023 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1024
1025 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1026 GEN_INT ((val + comp) & mask)));
1027 emit_insn (gen_adddi3 (dest, subtarget,
1028 GEN_INT (val - ((val + comp) & mask))));
1029 return;
1030 }
1031 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1032 {
1033 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1034
1035 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1036 GEN_INT ((val - comp) | ~mask)));
1037 emit_insn (gen_adddi3 (dest, subtarget,
1038 GEN_INT (val - ((val - comp) | ~mask))));
1039 return;
1040 }
1041 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1042 {
1043 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1044
1045 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1046 GEN_INT (val | ~mask)));
1047 emit_insn (gen_adddi3 (dest, subtarget,
1048 GEN_INT (val - (val | ~mask))));
1049 return;
1050 }
1051 }
1052
1053 /* See if we can do it by arithmetically combining two
1054 immediates. */
1055 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1056 {
1057 int j;
1058 mask = 0xffff;
1059
1060 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1061 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1062 {
1063 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1064 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1065 GEN_INT (aarch64_bitmasks[i])));
1066 emit_insn (gen_adddi3 (dest, subtarget,
1067 GEN_INT (val - aarch64_bitmasks[i])));
1068 return;
1069 }
1070
1071 for (j = 0; j < 64; j += 16, mask <<= 16)
1072 {
1073 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1074 {
1075 emit_insn (gen_rtx_SET (VOIDmode, dest,
1076 GEN_INT (aarch64_bitmasks[i])));
1077 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1078 GEN_INT ((val >> j) & 0xffff)));
1079 return;
1080 }
1081 }
1082 }
1083
1084 /* See if we can do it by logically combining two immediates. */
1085 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1086 {
1087 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1088 {
1089 int j;
1090
1091 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1092 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1093 {
1094 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1095 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1096 GEN_INT (aarch64_bitmasks[i])));
1097 emit_insn (gen_iordi3 (dest, subtarget,
1098 GEN_INT (aarch64_bitmasks[j])));
1099 return;
1100 }
1101 }
1102 else if ((val & aarch64_bitmasks[i]) == val)
1103 {
1104 int j;
1105
1106 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1107 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1108 {
1109
1110 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1111 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1112 GEN_INT (aarch64_bitmasks[j])));
1113 emit_insn (gen_anddi3 (dest, subtarget,
1114 GEN_INT (aarch64_bitmasks[i])));
1115 return;
1116 }
1117 }
1118 }
1119
1120 simple_sequence:
1121 first = true;
1122 mask = 0xffff;
1123 for (i = 0; i < 64; i += 16, mask <<= 16)
1124 {
1125 if ((val & mask) != 0)
1126 {
1127 if (first)
1128 {
1129 emit_insn (gen_rtx_SET (VOIDmode, dest,
1130 GEN_INT (val & mask)));
1131 first = false;
1132 }
1133 else
1134 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1135 GEN_INT ((val >> i) & 0xffff)));
1136 }
1137 }
1138}
1139
1140static bool
1141aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1142{
1143 /* Indirect calls are not currently supported. */
1144 if (decl == NULL)
1145 return false;
1146
1147 /* Cannot tail-call to long-calls, since these are outside of the
1148 range of a branch instruction (we could handle this if we added
1149 support for indirect tail-calls. */
1150 if (aarch64_decl_is_long_call_p (decl))
1151 return false;
1152
1153 return true;
1154}
1155
1156/* Implement TARGET_PASS_BY_REFERENCE. */
1157
1158static bool
1159aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1160 enum machine_mode mode,
1161 const_tree type,
1162 bool named ATTRIBUTE_UNUSED)
1163{
1164 HOST_WIDE_INT size;
1165 enum machine_mode dummymode;
1166 int nregs;
1167
1168 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1169 size = (mode == BLKmode && type)
1170 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1171
1172 if (type)
1173 {
1174 /* Arrays always passed by reference. */
1175 if (TREE_CODE (type) == ARRAY_TYPE)
1176 return true;
1177 /* Other aggregates based on their size. */
1178 if (AGGREGATE_TYPE_P (type))
1179 size = int_size_in_bytes (type);
1180 }
1181
1182 /* Variable sized arguments are always returned by reference. */
1183 if (size < 0)
1184 return true;
1185
1186 /* Can this be a candidate to be passed in fp/simd register(s)? */
1187 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1188 &dummymode, &nregs,
1189 NULL))
1190 return false;
1191
1192 /* Arguments which are variable sized or larger than 2 registers are
1193 passed by reference unless they are a homogenous floating point
1194 aggregate. */
1195 return size > 2 * UNITS_PER_WORD;
1196}
1197
1198/* Return TRUE if VALTYPE is padded to its least significant bits. */
1199static bool
1200aarch64_return_in_msb (const_tree valtype)
1201{
1202 enum machine_mode dummy_mode;
1203 int dummy_int;
1204
1205 /* Never happens in little-endian mode. */
1206 if (!BYTES_BIG_ENDIAN)
1207 return false;
1208
1209 /* Only composite types smaller than or equal to 16 bytes can
1210 be potentially returned in registers. */
1211 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1212 || int_size_in_bytes (valtype) <= 0
1213 || int_size_in_bytes (valtype) > 16)
1214 return false;
1215
1216 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1217 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1218 is always passed/returned in the least significant bits of fp/simd
1219 register(s). */
1220 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1221 &dummy_mode, &dummy_int, NULL))
1222 return false;
1223
1224 return true;
1225}
1226
1227/* Implement TARGET_FUNCTION_VALUE.
1228 Define how to find the value returned by a function. */
1229
1230static rtx
1231aarch64_function_value (const_tree type, const_tree func,
1232 bool outgoing ATTRIBUTE_UNUSED)
1233{
1234 enum machine_mode mode;
1235 int unsignedp;
1236 int count;
1237 enum machine_mode ag_mode;
1238
1239 mode = TYPE_MODE (type);
1240 if (INTEGRAL_TYPE_P (type))
1241 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1242
1243 if (aarch64_return_in_msb (type))
1244 {
1245 HOST_WIDE_INT size = int_size_in_bytes (type);
1246
1247 if (size % UNITS_PER_WORD != 0)
1248 {
1249 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1250 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1251 }
1252 }
1253
1254 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1255 &ag_mode, &count, NULL))
1256 {
1257 if (!aarch64_composite_type_p (type, mode))
1258 {
1259 gcc_assert (count == 1 && mode == ag_mode);
1260 return gen_rtx_REG (mode, V0_REGNUM);
1261 }
1262 else
1263 {
1264 int i;
1265 rtx par;
1266
1267 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1268 for (i = 0; i < count; i++)
1269 {
1270 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1271 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1272 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1273 XVECEXP (par, 0, i) = tmp;
1274 }
1275 return par;
1276 }
1277 }
1278 else
1279 return gen_rtx_REG (mode, R0_REGNUM);
1280}
1281
1282/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1283 Return true if REGNO is the number of a hard register in which the values
1284 of called function may come back. */
1285
1286static bool
1287aarch64_function_value_regno_p (const unsigned int regno)
1288{
1289 /* Maximum of 16 bytes can be returned in the general registers. Examples
1290 of 16-byte return values are: 128-bit integers and 16-byte small
1291 structures (excluding homogeneous floating-point aggregates). */
1292 if (regno == R0_REGNUM || regno == R1_REGNUM)
1293 return true;
1294
1295 /* Up to four fp/simd registers can return a function value, e.g. a
1296 homogeneous floating-point aggregate having four members. */
1297 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1298 return !TARGET_GENERAL_REGS_ONLY;
1299
1300 return false;
1301}
1302
1303/* Implement TARGET_RETURN_IN_MEMORY.
1304
1305 If the type T of the result of a function is such that
1306 void func (T arg)
1307 would require that arg be passed as a value in a register (or set of
1308 registers) according to the parameter passing rules, then the result
1309 is returned in the same registers as would be used for such an
1310 argument. */
1311
1312static bool
1313aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1314{
1315 HOST_WIDE_INT size;
1316 enum machine_mode ag_mode;
1317 int count;
1318
1319 if (!AGGREGATE_TYPE_P (type)
1320 && TREE_CODE (type) != COMPLEX_TYPE
1321 && TREE_CODE (type) != VECTOR_TYPE)
1322 /* Simple scalar types always returned in registers. */
1323 return false;
1324
1325 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1326 type,
1327 &ag_mode,
1328 &count,
1329 NULL))
1330 return false;
1331
1332 /* Types larger than 2 registers returned in memory. */
1333 size = int_size_in_bytes (type);
1334 return (size < 0 || size > 2 * UNITS_PER_WORD);
1335}
1336
1337static bool
1338aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1339 const_tree type, int *nregs)
1340{
1341 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1342 return aarch64_vfp_is_call_or_return_candidate (mode,
1343 type,
1344 &pcum->aapcs_vfp_rmode,
1345 nregs,
1346 NULL);
1347}
1348
1349/* Given MODE and TYPE of a function argument, return the alignment in
1350 bits. The idea is to suppress any stronger alignment requested by
1351 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1352 This is a helper function for local use only. */
1353
1354static unsigned int
1355aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1356{
1357 unsigned int alignment;
1358
1359 if (type)
1360 {
1361 if (!integer_zerop (TYPE_SIZE (type)))
1362 {
1363 if (TYPE_MODE (type) == mode)
1364 alignment = TYPE_ALIGN (type);
1365 else
1366 alignment = GET_MODE_ALIGNMENT (mode);
1367 }
1368 else
1369 alignment = 0;
1370 }
1371 else
1372 alignment = GET_MODE_ALIGNMENT (mode);
1373
1374 return alignment;
1375}
1376
1377/* Layout a function argument according to the AAPCS64 rules. The rule
1378 numbers refer to the rule numbers in the AAPCS64. */
1379
1380static void
1381aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1382 const_tree type,
1383 bool named ATTRIBUTE_UNUSED)
1384{
1385 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1386 int ncrn, nvrn, nregs;
1387 bool allocate_ncrn, allocate_nvrn;
1388
1389 /* We need to do this once per argument. */
1390 if (pcum->aapcs_arg_processed)
1391 return;
1392
1393 pcum->aapcs_arg_processed = true;
1394
1395 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1396 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1397 mode,
1398 type,
1399 &nregs);
1400
1401 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1402 The following code thus handles passing by SIMD/FP registers first. */
1403
1404 nvrn = pcum->aapcs_nvrn;
1405
1406 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1407 and homogenous short-vector aggregates (HVA). */
1408 if (allocate_nvrn)
1409 {
1410 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1411 {
1412 pcum->aapcs_nextnvrn = nvrn + nregs;
1413 if (!aarch64_composite_type_p (type, mode))
1414 {
1415 gcc_assert (nregs == 1);
1416 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1417 }
1418 else
1419 {
1420 rtx par;
1421 int i;
1422 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1423 for (i = 0; i < nregs; i++)
1424 {
1425 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1426 V0_REGNUM + nvrn + i);
1427 tmp = gen_rtx_EXPR_LIST
1428 (VOIDmode, tmp,
1429 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1430 XVECEXP (par, 0, i) = tmp;
1431 }
1432 pcum->aapcs_reg = par;
1433 }
1434 return;
1435 }
1436 else
1437 {
1438 /* C.3 NSRN is set to 8. */
1439 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1440 goto on_stack;
1441 }
1442 }
1443
1444 ncrn = pcum->aapcs_ncrn;
1445 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1446 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1447
1448
1449 /* C6 - C9. though the sign and zero extension semantics are
1450 handled elsewhere. This is the case where the argument fits
1451 entirely general registers. */
1452 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1453 {
1454 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1455
1456 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1457
1458 /* C.8 if the argument has an alignment of 16 then the NGRN is
1459 rounded up to the next even number. */
1460 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1461 {
1462 ++ncrn;
1463 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1464 }
1465 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1466 A reg is still generated for it, but the caller should be smart
1467 enough not to use it. */
1468 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1469 {
1470 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1471 }
1472 else
1473 {
1474 rtx par;
1475 int i;
1476
1477 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1478 for (i = 0; i < nregs; i++)
1479 {
1480 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1481 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1482 GEN_INT (i * UNITS_PER_WORD));
1483 XVECEXP (par, 0, i) = tmp;
1484 }
1485 pcum->aapcs_reg = par;
1486 }
1487
1488 pcum->aapcs_nextncrn = ncrn + nregs;
1489 return;
1490 }
1491
1492 /* C.11 */
1493 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1494
1495 /* The argument is passed on stack; record the needed number of words for
1496 this argument (we can re-use NREGS) and align the total size if
1497 necessary. */
1498on_stack:
1499 pcum->aapcs_stack_words = nregs;
1500 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1501 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1502 16 / UNITS_PER_WORD) + 1;
1503 return;
1504}
1505
1506/* Implement TARGET_FUNCTION_ARG. */
1507
1508static rtx
1509aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1510 const_tree type, bool named)
1511{
1512 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1513 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1514
1515 if (mode == VOIDmode)
1516 return NULL_RTX;
1517
1518 aarch64_layout_arg (pcum_v, mode, type, named);
1519 return pcum->aapcs_reg;
1520}
1521
1522void
1523aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1524 const_tree fntype ATTRIBUTE_UNUSED,
1525 rtx libname ATTRIBUTE_UNUSED,
1526 const_tree fndecl ATTRIBUTE_UNUSED,
1527 unsigned n_named ATTRIBUTE_UNUSED)
1528{
1529 pcum->aapcs_ncrn = 0;
1530 pcum->aapcs_nvrn = 0;
1531 pcum->aapcs_nextncrn = 0;
1532 pcum->aapcs_nextnvrn = 0;
1533 pcum->pcs_variant = ARM_PCS_AAPCS64;
1534 pcum->aapcs_reg = NULL_RTX;
1535 pcum->aapcs_arg_processed = false;
1536 pcum->aapcs_stack_words = 0;
1537 pcum->aapcs_stack_size = 0;
1538
1539 return;
1540}
1541
1542static void
1543aarch64_function_arg_advance (cumulative_args_t pcum_v,
1544 enum machine_mode mode,
1545 const_tree type,
1546 bool named)
1547{
1548 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1549 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1550 {
1551 aarch64_layout_arg (pcum_v, mode, type, named);
1552 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1553 != (pcum->aapcs_stack_words != 0));
1554 pcum->aapcs_arg_processed = false;
1555 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1556 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1557 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1558 pcum->aapcs_stack_words = 0;
1559 pcum->aapcs_reg = NULL_RTX;
1560 }
1561}
1562
1563bool
1564aarch64_function_arg_regno_p (unsigned regno)
1565{
1566 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1567 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1568}
1569
1570/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1571 PARM_BOUNDARY bits of alignment, but will be given anything up
1572 to STACK_BOUNDARY bits if the type requires it. This makes sure
1573 that both before and after the layout of each argument, the Next
1574 Stacked Argument Address (NSAA) will have a minimum alignment of
1575 8 bytes. */
1576
1577static unsigned int
1578aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1579{
1580 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1581
1582 if (alignment < PARM_BOUNDARY)
1583 alignment = PARM_BOUNDARY;
1584 if (alignment > STACK_BOUNDARY)
1585 alignment = STACK_BOUNDARY;
1586 return alignment;
1587}
1588
1589/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1590
1591 Return true if an argument passed on the stack should be padded upwards,
1592 i.e. if the least-significant byte of the stack slot has useful data.
1593
1594 Small aggregate types are placed in the lowest memory address.
1595
1596 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1597
1598bool
1599aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1600{
1601 /* On little-endian targets, the least significant byte of every stack
1602 argument is passed at the lowest byte address of the stack slot. */
1603 if (!BYTES_BIG_ENDIAN)
1604 return true;
1605
00edcfbe 1606 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1607 the least significant byte of a stack argument is passed at the highest
1608 byte address of the stack slot. */
1609 if (type
00edcfbe
YZ
1610 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1611 || POINTER_TYPE_P (type))
43e9d192
IB
1612 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1613 return false;
1614
1615 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1616 return true;
1617}
1618
1619/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1620
1621 It specifies padding for the last (may also be the only)
1622 element of a block move between registers and memory. If
1623 assuming the block is in the memory, padding upward means that
1624 the last element is padded after its highest significant byte,
1625 while in downward padding, the last element is padded at the
1626 its least significant byte side.
1627
1628 Small aggregates and small complex types are always padded
1629 upwards.
1630
1631 We don't need to worry about homogeneous floating-point or
1632 short-vector aggregates; their move is not affected by the
1633 padding direction determined here. Regardless of endianness,
1634 each element of such an aggregate is put in the least
1635 significant bits of a fp/simd register.
1636
1637 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1638 register has useful data, and return the opposite if the most
1639 significant byte does. */
1640
1641bool
1642aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1643 bool first ATTRIBUTE_UNUSED)
1644{
1645
1646 /* Small composite types are always padded upward. */
1647 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1648 {
1649 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1650 : GET_MODE_SIZE (mode));
1651 if (size < 2 * UNITS_PER_WORD)
1652 return true;
1653 }
1654
1655 /* Otherwise, use the default padding. */
1656 return !BYTES_BIG_ENDIAN;
1657}
1658
1659static enum machine_mode
1660aarch64_libgcc_cmp_return_mode (void)
1661{
1662 return SImode;
1663}
1664
1665static bool
1666aarch64_frame_pointer_required (void)
1667{
1668 /* If the function contains dynamic stack allocations, we need to
1669 use the frame pointer to access the static parts of the frame. */
1670 if (cfun->calls_alloca)
1671 return true;
1672
1673 /* We may have turned flag_omit_frame_pointer on in order to have this
1674 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1675 and we'll check it here.
1676 If we really did set flag_omit_frame_pointer normally, then we return false
1677 (no frame pointer required) in all cases. */
1678
1679 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1680 return false;
1681 else if (flag_omit_leaf_frame_pointer)
1682 return !crtl->is_leaf;
1683 return true;
1684}
1685
1686/* Mark the registers that need to be saved by the callee and calculate
1687 the size of the callee-saved registers area and frame record (both FP
1688 and LR may be omitted). */
1689static void
1690aarch64_layout_frame (void)
1691{
1692 HOST_WIDE_INT offset = 0;
1693 int regno;
1694
1695 if (reload_completed && cfun->machine->frame.laid_out)
1696 return;
1697
1698 cfun->machine->frame.fp_lr_offset = 0;
1699
1700 /* First mark all the registers that really need to be saved... */
1701 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1702 cfun->machine->frame.reg_offset[regno] = -1;
1703
1704 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1705 cfun->machine->frame.reg_offset[regno] = -1;
1706
1707 /* ... that includes the eh data registers (if needed)... */
1708 if (crtl->calls_eh_return)
1709 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1710 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1711
1712 /* ... and any callee saved register that dataflow says is live. */
1713 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1714 if (df_regs_ever_live_p (regno)
1715 && !call_used_regs[regno])
1716 cfun->machine->frame.reg_offset[regno] = 0;
1717
1718 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1719 if (df_regs_ever_live_p (regno)
1720 && !call_used_regs[regno])
1721 cfun->machine->frame.reg_offset[regno] = 0;
1722
1723 if (frame_pointer_needed)
1724 {
1725 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1726 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1727 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1728 }
1729
1730 /* Now assign stack slots for them. */
1731 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1732 if (cfun->machine->frame.reg_offset[regno] != -1)
1733 {
1734 cfun->machine->frame.reg_offset[regno] = offset;
1735 offset += UNITS_PER_WORD;
1736 }
1737
1738 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1739 if (cfun->machine->frame.reg_offset[regno] != -1)
1740 {
1741 cfun->machine->frame.reg_offset[regno] = offset;
1742 offset += UNITS_PER_WORD;
1743 }
1744
1745 if (frame_pointer_needed)
1746 {
1747 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1748 offset += UNITS_PER_WORD;
1749 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1750 }
1751
1752 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1753 {
1754 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1755 offset += UNITS_PER_WORD;
1756 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1757 }
1758
1759 cfun->machine->frame.padding0 =
1760 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1761 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1762
1763 cfun->machine->frame.saved_regs_size = offset;
1764 cfun->machine->frame.laid_out = true;
1765}
1766
1767/* Make the last instruction frame-related and note that it performs
1768 the operation described by FRAME_PATTERN. */
1769
1770static void
1771aarch64_set_frame_expr (rtx frame_pattern)
1772{
1773 rtx insn;
1774
1775 insn = get_last_insn ();
1776 RTX_FRAME_RELATED_P (insn) = 1;
1777 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1778 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1779 frame_pattern,
1780 REG_NOTES (insn));
1781}
1782
1783static bool
1784aarch64_register_saved_on_entry (int regno)
1785{
1786 return cfun->machine->frame.reg_offset[regno] != -1;
1787}
1788
1789
1790static void
1791aarch64_save_or_restore_fprs (int start_offset, int increment,
1792 bool restore, rtx base_rtx)
1793
1794{
1795 unsigned regno;
1796 unsigned regno2;
1797 rtx insn;
1798 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1799
1800
1801 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1802 {
1803 if (aarch64_register_saved_on_entry (regno))
1804 {
1805 rtx mem;
1806 mem = gen_mem_ref (DFmode,
1807 plus_constant (Pmode,
1808 base_rtx,
1809 start_offset));
1810
1811 for (regno2 = regno + 1;
1812 regno2 <= V31_REGNUM
1813 && !aarch64_register_saved_on_entry (regno2);
1814 regno2++)
1815 {
1816 /* Empty loop. */
1817 }
1818 if (regno2 <= V31_REGNUM &&
1819 aarch64_register_saved_on_entry (regno2))
1820 {
1821 rtx mem2;
1822 /* Next highest register to be saved. */
1823 mem2 = gen_mem_ref (DFmode,
1824 plus_constant
1825 (Pmode,
1826 base_rtx,
1827 start_offset + increment));
1828 if (restore == false)
1829 {
1830 insn = emit_insn
1831 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1832 mem2, gen_rtx_REG (DFmode, regno2)));
1833
1834 }
1835 else
1836 {
1837 insn = emit_insn
1838 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1839 gen_rtx_REG (DFmode, regno2), mem2));
1840
1841 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1842 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1843 }
1844
1845 /* The first part of a frame-related parallel insn
1846 is always assumed to be relevant to the frame
1847 calculations; subsequent parts, are only
1848 frame-related if explicitly marked. */
1849 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1850 1)) = 1;
1851 regno = regno2;
1852 start_offset += increment * 2;
1853 }
1854 else
1855 {
1856 if (restore == false)
1857 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1858 else
1859 {
1860 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1861 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1862 }
1863 start_offset += increment;
1864 }
1865 RTX_FRAME_RELATED_P (insn) = 1;
1866 }
1867 }
1868
1869}
1870
1871
1872/* offset from the stack pointer of where the saves and
1873 restore's have to happen. */
1874static void
1875aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1876 bool restore)
1877{
1878 rtx insn;
1879 rtx base_rtx = stack_pointer_rtx;
1880 HOST_WIDE_INT start_offset = offset;
1881 HOST_WIDE_INT increment = UNITS_PER_WORD;
1882 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1883 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1884 unsigned regno;
1885 unsigned regno2;
1886
1887 for (regno = R0_REGNUM; regno <= limit; regno++)
1888 {
1889 if (aarch64_register_saved_on_entry (regno))
1890 {
1891 rtx mem;
1892 mem = gen_mem_ref (Pmode,
1893 plus_constant (Pmode,
1894 base_rtx,
1895 start_offset));
1896
1897 for (regno2 = regno + 1;
1898 regno2 <= limit
1899 && !aarch64_register_saved_on_entry (regno2);
1900 regno2++)
1901 {
1902 /* Empty loop. */
1903 }
1904 if (regno2 <= limit &&
1905 aarch64_register_saved_on_entry (regno2))
1906 {
1907 rtx mem2;
1908 /* Next highest register to be saved. */
1909 mem2 = gen_mem_ref (Pmode,
1910 plus_constant
1911 (Pmode,
1912 base_rtx,
1913 start_offset + increment));
1914 if (restore == false)
1915 {
1916 insn = emit_insn
1917 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1918 mem2, gen_rtx_REG (DImode, regno2)));
1919
1920 }
1921 else
1922 {
1923 insn = emit_insn
1924 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1925 gen_rtx_REG (DImode, regno2), mem2));
1926
1927 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1928 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1929 }
1930
1931 /* The first part of a frame-related parallel insn
1932 is always assumed to be relevant to the frame
1933 calculations; subsequent parts, are only
1934 frame-related if explicitly marked. */
1935 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1936 1)) = 1;
1937 regno = regno2;
1938 start_offset += increment * 2;
1939 }
1940 else
1941 {
1942 if (restore == false)
1943 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1944 else
1945 {
1946 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1947 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1948 }
1949 start_offset += increment;
1950 }
1951 RTX_FRAME_RELATED_P (insn) = 1;
1952 }
1953 }
1954
1955 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1956
1957}
1958
1959/* AArch64 stack frames generated by this compiler look like:
1960
1961 +-------------------------------+
1962 | |
1963 | incoming stack arguments |
1964 | |
1965 +-------------------------------+ <-- arg_pointer_rtx
1966 | |
1967 | callee-allocated save area |
1968 | for register varargs |
1969 | |
1970 +-------------------------------+
1971 | |
1972 | local variables |
1973 | |
1974 +-------------------------------+ <-- frame_pointer_rtx
1975 | |
1976 | callee-saved registers |
1977 | |
1978 +-------------------------------+
1979 | LR' |
1980 +-------------------------------+
1981 | FP' |
1982 P +-------------------------------+ <-- hard_frame_pointer_rtx
1983 | dynamic allocation |
1984 +-------------------------------+
1985 | |
1986 | outgoing stack arguments |
1987 | |
1988 +-------------------------------+ <-- stack_pointer_rtx
1989
1990 Dynamic stack allocations such as alloca insert data at point P.
1991 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1992 hard_frame_pointer_rtx unchanged. */
1993
1994/* Generate the prologue instructions for entry into a function.
1995 Establish the stack frame by decreasing the stack pointer with a
1996 properly calculated size and, if necessary, create a frame record
1997 filled with the values of LR and previous frame pointer. The
6991c977 1998 current FP is also set up if it is in use. */
43e9d192
IB
1999
2000void
2001aarch64_expand_prologue (void)
2002{
2003 /* sub sp, sp, #<frame_size>
2004 stp {fp, lr}, [sp, #<frame_size> - 16]
2005 add fp, sp, #<frame_size> - hardfp_offset
2006 stp {cs_reg}, [fp, #-16] etc.
2007
2008 sub sp, sp, <final_adjustment_if_any>
2009 */
2010 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2011 HOST_WIDE_INT frame_size, offset;
2012 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2013 rtx insn;
2014
2015 aarch64_layout_frame ();
2016 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2017 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2018 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2019 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2020 + crtl->outgoing_args_size);
2021 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2022 STACK_BOUNDARY / BITS_PER_UNIT);
2023
2024 if (flag_stack_usage_info)
2025 current_function_static_stack_size = frame_size;
2026
2027 fp_offset = (offset
2028 - original_frame_size
2029 - cfun->machine->frame.saved_regs_size);
2030
44c0e7b9 2031 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2032 if (offset >= 512)
2033 {
2034 /* When the frame has a large size, an initial decrease is done on
2035 the stack pointer to jump over the callee-allocated save area for
2036 register varargs, the local variable area and/or the callee-saved
2037 register area. This will allow the pre-index write-back
2038 store pair instructions to be used for setting up the stack frame
2039 efficiently. */
2040 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2041 if (offset >= 512)
2042 offset = cfun->machine->frame.saved_regs_size;
2043
2044 frame_size -= (offset + crtl->outgoing_args_size);
2045 fp_offset = 0;
2046
2047 if (frame_size >= 0x1000000)
2048 {
2049 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2050 emit_move_insn (op0, GEN_INT (-frame_size));
2051 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2052 aarch64_set_frame_expr (gen_rtx_SET
2053 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2054 plus_constant (Pmode,
2055 stack_pointer_rtx,
2056 -frame_size)));
43e9d192
IB
2057 }
2058 else if (frame_size > 0)
2059 {
2060 if ((frame_size & 0xfff) != frame_size)
2061 {
2062 insn = emit_insn (gen_add2_insn
2063 (stack_pointer_rtx,
2064 GEN_INT (-(frame_size
2065 & ~(HOST_WIDE_INT)0xfff))));
2066 RTX_FRAME_RELATED_P (insn) = 1;
2067 }
2068 if ((frame_size & 0xfff) != 0)
2069 {
2070 insn = emit_insn (gen_add2_insn
2071 (stack_pointer_rtx,
2072 GEN_INT (-(frame_size
2073 & (HOST_WIDE_INT)0xfff))));
2074 RTX_FRAME_RELATED_P (insn) = 1;
2075 }
2076 }
2077 }
2078 else
2079 frame_size = -1;
2080
2081 if (offset > 0)
2082 {
2083 /* Save the frame pointer and lr if the frame pointer is needed
2084 first. Make the frame pointer point to the location of the
2085 old frame pointer on the stack. */
2086 if (frame_pointer_needed)
2087 {
2088 rtx mem_fp, mem_lr;
2089
2090 if (fp_offset)
2091 {
2092 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2093 GEN_INT (-offset)));
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 aarch64_set_frame_expr (gen_rtx_SET
2096 (Pmode, stack_pointer_rtx,
2097 gen_rtx_MINUS (Pmode,
2098 stack_pointer_rtx,
2099 GEN_INT (offset))));
2100 mem_fp = gen_frame_mem (DImode,
2101 plus_constant (Pmode,
2102 stack_pointer_rtx,
2103 fp_offset));
2104 mem_lr = gen_frame_mem (DImode,
2105 plus_constant (Pmode,
2106 stack_pointer_rtx,
2107 fp_offset
2108 + UNITS_PER_WORD));
2109 insn = emit_insn (gen_store_pairdi (mem_fp,
2110 hard_frame_pointer_rtx,
2111 mem_lr,
2112 gen_rtx_REG (DImode,
2113 LR_REGNUM)));
2114 }
2115 else
2116 {
2117 insn = emit_insn (gen_storewb_pairdi_di
2118 (stack_pointer_rtx, stack_pointer_rtx,
2119 hard_frame_pointer_rtx,
2120 gen_rtx_REG (DImode, LR_REGNUM),
2121 GEN_INT (-offset),
2122 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2123 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2124 }
2125
2126 /* The first part of a frame-related parallel insn is always
2127 assumed to be relevant to the frame calculations;
2128 subsequent parts, are only frame-related if explicitly
2129 marked. */
2130 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2131 RTX_FRAME_RELATED_P (insn) = 1;
2132
2133 /* Set up frame pointer to point to the location of the
2134 previous frame pointer on the stack. */
2135 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2136 stack_pointer_rtx,
2137 GEN_INT (fp_offset)));
2138 aarch64_set_frame_expr (gen_rtx_SET
2139 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2140 plus_constant (Pmode,
2141 stack_pointer_rtx,
2142 fp_offset)));
43e9d192
IB
2143 RTX_FRAME_RELATED_P (insn) = 1;
2144 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2145 hard_frame_pointer_rtx));
2146 }
2147 else
2148 {
2149 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2150 GEN_INT (-offset)));
2151 RTX_FRAME_RELATED_P (insn) = 1;
2152 }
2153
2154 aarch64_save_or_restore_callee_save_registers
2155 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2156 }
2157
2158 /* when offset >= 512,
2159 sub sp, sp, #<outgoing_args_size> */
2160 if (frame_size > -1)
2161 {
2162 if (crtl->outgoing_args_size > 0)
2163 {
2164 insn = emit_insn (gen_add2_insn
2165 (stack_pointer_rtx,
2166 GEN_INT (- crtl->outgoing_args_size)));
2167 RTX_FRAME_RELATED_P (insn) = 1;
2168 }
2169 }
2170}
2171
2172/* Generate the epilogue instructions for returning from a function. */
2173void
2174aarch64_expand_epilogue (bool for_sibcall)
2175{
2176 HOST_WIDE_INT original_frame_size, frame_size, offset;
2177 HOST_WIDE_INT fp_offset;
2178 rtx insn;
44c0e7b9 2179 rtx cfa_reg;
43e9d192
IB
2180
2181 aarch64_layout_frame ();
2182 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2183 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2184 + crtl->outgoing_args_size);
2185 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2186 STACK_BOUNDARY / BITS_PER_UNIT);
2187
2188 fp_offset = (offset
2189 - original_frame_size
2190 - cfun->machine->frame.saved_regs_size);
2191
44c0e7b9
YZ
2192 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2193
2194 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2195 if (offset >= 512)
2196 {
2197 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2198 if (offset >= 512)
2199 offset = cfun->machine->frame.saved_regs_size;
2200
2201 frame_size -= (offset + crtl->outgoing_args_size);
2202 fp_offset = 0;
2203 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2204 {
2205 insn = emit_insn (gen_add2_insn
2206 (stack_pointer_rtx,
2207 GEN_INT (crtl->outgoing_args_size)));
2208 RTX_FRAME_RELATED_P (insn) = 1;
2209 }
2210 }
2211 else
2212 frame_size = -1;
2213
2214 /* If there were outgoing arguments or we've done dynamic stack
2215 allocation, then restore the stack pointer from the frame
2216 pointer. This is at most one insn and more efficient than using
2217 GCC's internal mechanism. */
2218 if (frame_pointer_needed
2219 && (crtl->outgoing_args_size || cfun->calls_alloca))
2220 {
2221 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2222 hard_frame_pointer_rtx,
2223 GEN_INT (- fp_offset)));
2224 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2225 /* As SP is set to (FP - fp_offset), according to the rules in
2226 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2227 from the value of SP from now on. */
2228 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2229 }
2230
2231 aarch64_save_or_restore_callee_save_registers
2232 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2233
2234 /* Restore the frame pointer and lr if the frame pointer is needed. */
2235 if (offset > 0)
2236 {
2237 if (frame_pointer_needed)
2238 {
2239 rtx mem_fp, mem_lr;
2240
2241 if (fp_offset)
2242 {
2243 mem_fp = gen_frame_mem (DImode,
2244 plus_constant (Pmode,
2245 stack_pointer_rtx,
2246 fp_offset));
2247 mem_lr = gen_frame_mem (DImode,
2248 plus_constant (Pmode,
2249 stack_pointer_rtx,
2250 fp_offset
2251 + UNITS_PER_WORD));
2252 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2253 mem_fp,
2254 gen_rtx_REG (DImode,
2255 LR_REGNUM),
2256 mem_lr));
2257 }
2258 else
2259 {
2260 insn = emit_insn (gen_loadwb_pairdi_di
2261 (stack_pointer_rtx,
2262 stack_pointer_rtx,
2263 hard_frame_pointer_rtx,
2264 gen_rtx_REG (DImode, LR_REGNUM),
2265 GEN_INT (offset),
2266 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2267 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2268 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2269 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2270 plus_constant (Pmode, cfa_reg,
2271 offset))));
43e9d192
IB
2272 }
2273
2274 /* The first part of a frame-related parallel insn
2275 is always assumed to be relevant to the frame
2276 calculations; subsequent parts, are only
2277 frame-related if explicitly marked. */
2278 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2279 RTX_FRAME_RELATED_P (insn) = 1;
2280 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2281 add_reg_note (insn, REG_CFA_RESTORE,
2282 gen_rtx_REG (DImode, LR_REGNUM));
2283
2284 if (fp_offset)
2285 {
2286 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2287 GEN_INT (offset)));
2288 RTX_FRAME_RELATED_P (insn) = 1;
2289 }
2290 }
43e9d192
IB
2291 else
2292 {
2293 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2294 GEN_INT (offset)));
2295 RTX_FRAME_RELATED_P (insn) = 1;
2296 }
2297 }
2298
2299 /* Stack adjustment for exception handler. */
2300 if (crtl->calls_eh_return)
2301 {
2302 /* We need to unwind the stack by the offset computed by
2303 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2304 based on SP. Ideally we would update the SP and define the
2305 CFA along the lines of:
2306
2307 SP = SP + EH_RETURN_STACKADJ_RTX
2308 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2309
2310 However the dwarf emitter only understands a constant
2311 register offset.
2312
631b20a7 2313 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2314 as a temporary register to hold the current SP value. The
2315 CFA is described using IP0 then SP is modified. */
2316
2317 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2318
2319 insn = emit_move_insn (ip0, stack_pointer_rtx);
2320 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2321 RTX_FRAME_RELATED_P (insn) = 1;
2322
2323 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2324
2325 /* Ensure the assignment to IP0 does not get optimized away. */
2326 emit_use (ip0);
2327 }
2328
2329 if (frame_size > -1)
2330 {
2331 if (frame_size >= 0x1000000)
2332 {
2333 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2334 emit_move_insn (op0, GEN_INT (frame_size));
2335 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2336 aarch64_set_frame_expr (gen_rtx_SET
2337 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2338 plus_constant (Pmode,
2339 stack_pointer_rtx,
2340 frame_size)));
43e9d192
IB
2341 }
2342 else if (frame_size > 0)
2343 {
2344 if ((frame_size & 0xfff) != 0)
2345 {
2346 insn = emit_insn (gen_add2_insn
2347 (stack_pointer_rtx,
2348 GEN_INT ((frame_size
2349 & (HOST_WIDE_INT) 0xfff))));
2350 RTX_FRAME_RELATED_P (insn) = 1;
2351 }
2352 if ((frame_size & 0xfff) != frame_size)
2353 {
2354 insn = emit_insn (gen_add2_insn
2355 (stack_pointer_rtx,
2356 GEN_INT ((frame_size
2357 & ~ (HOST_WIDE_INT) 0xfff))));
2358 RTX_FRAME_RELATED_P (insn) = 1;
2359 }
2360 }
2361
f6fe771a
RL
2362 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2363 plus_constant (Pmode,
2364 stack_pointer_rtx,
2365 offset)));
43e9d192
IB
2366 }
2367
2368 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2369 if (!for_sibcall)
2370 emit_jump_insn (ret_rtx);
2371}
2372
2373/* Return the place to copy the exception unwinding return address to.
2374 This will probably be a stack slot, but could (in theory be the
2375 return register). */
2376rtx
2377aarch64_final_eh_return_addr (void)
2378{
2379 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2380 aarch64_layout_frame ();
2381 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2382 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2383 + crtl->outgoing_args_size);
2384 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2385 STACK_BOUNDARY / BITS_PER_UNIT);
2386 fp_offset = offset
2387 - original_frame_size
2388 - cfun->machine->frame.saved_regs_size;
2389
2390 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2391 return gen_rtx_REG (DImode, LR_REGNUM);
2392
2393 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2394 result in a store to save LR introduced by builtin_eh_return () being
2395 incorrectly deleted because the alias is not detected.
2396 So in the calculation of the address to copy the exception unwinding
2397 return address to, we note 2 cases.
2398 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2399 we return a SP-relative location since all the addresses are SP-relative
2400 in this case. This prevents the store from being optimized away.
2401 If the fp_offset is not 0, then the addresses will be FP-relative and
2402 therefore we return a FP-relative location. */
2403
2404 if (frame_pointer_needed)
2405 {
2406 if (fp_offset)
2407 return gen_frame_mem (DImode,
2408 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2409 else
2410 return gen_frame_mem (DImode,
2411 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2412 }
2413
2414 /* If FP is not needed, we calculate the location of LR, which would be
2415 at the top of the saved registers block. */
2416
2417 return gen_frame_mem (DImode,
2418 plus_constant (Pmode,
2419 stack_pointer_rtx,
2420 fp_offset
2421 + cfun->machine->frame.saved_regs_size
2422 - 2 * UNITS_PER_WORD));
2423}
2424
2425/* Output code to build up a constant in a register. */
2426static void
d9600ae5 2427aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2428{
2429 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2430 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2431 else
2432 {
2433 int i;
2434 int ncount = 0;
2435 int zcount = 0;
2436 HOST_WIDE_INT valp = val >> 16;
2437 HOST_WIDE_INT valm;
2438 HOST_WIDE_INT tval;
2439
2440 for (i = 16; i < 64; i += 16)
2441 {
2442 valm = (valp & 0xffff);
2443
2444 if (valm != 0)
2445 ++ zcount;
2446
2447 if (valm != 0xffff)
2448 ++ ncount;
2449
2450 valp >>= 16;
2451 }
2452
2453 /* zcount contains the number of additional MOVK instructions
2454 required if the constant is built up with an initial MOVZ instruction,
2455 while ncount is the number of MOVK instructions required if starting
2456 with a MOVN instruction. Choose the sequence that yields the fewest
2457 number of instructions, preferring MOVZ instructions when they are both
2458 the same. */
2459 if (ncount < zcount)
2460 {
d9600ae5
SN
2461 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2462 GEN_INT ((~val) & 0xffff));
43e9d192
IB
2463 tval = 0xffff;
2464 }
2465 else
2466 {
d9600ae5
SN
2467 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2468 GEN_INT (val & 0xffff));
43e9d192
IB
2469 tval = 0;
2470 }
2471
2472 val >>= 16;
2473
2474 for (i = 16; i < 64; i += 16)
2475 {
2476 if ((val & 0xffff) != tval)
d9600ae5
SN
2477 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2478 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2479 val >>= 16;
2480 }
2481 }
2482}
2483
2484static void
d9600ae5 2485aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2486{
2487 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2488 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2489 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2490
2491 if (mdelta < 0)
2492 mdelta = -mdelta;
2493
2494 if (mdelta >= 4096 * 4096)
2495 {
d9600ae5
SN
2496 aarch64_build_constant (scratchreg, delta);
2497 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2498 }
2499 else if (mdelta > 0)
2500 {
43e9d192 2501 if (mdelta >= 4096)
d9600ae5
SN
2502 {
2503 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2504 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2505 if (delta < 0)
2506 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2507 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2508 else
2509 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2510 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2511 }
43e9d192 2512 if (mdelta % 4096 != 0)
d9600ae5
SN
2513 {
2514 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2515 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2516 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2517 }
43e9d192
IB
2518 }
2519}
2520
2521/* Output code to add DELTA to the first argument, and then jump
2522 to FUNCTION. Used for C++ multiple inheritance. */
2523static void
2524aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2525 HOST_WIDE_INT delta,
2526 HOST_WIDE_INT vcall_offset,
2527 tree function)
2528{
2529 /* The this pointer is always in x0. Note that this differs from
2530 Arm where the this pointer maybe bumped to r1 if r0 is required
2531 to return a pointer to an aggregate. On AArch64 a result value
2532 pointer will be in x8. */
2533 int this_regno = R0_REGNUM;
75f1d6fc 2534 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2535
75f1d6fc
SN
2536 reload_completed = 1;
2537 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2538
2539 if (vcall_offset == 0)
d9600ae5 2540 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2541 else
2542 {
28514dda 2543 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2544
75f1d6fc
SN
2545 this_rtx = gen_rtx_REG (Pmode, this_regno);
2546 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2547 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2548
75f1d6fc
SN
2549 addr = this_rtx;
2550 if (delta != 0)
2551 {
2552 if (delta >= -256 && delta < 256)
2553 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2554 plus_constant (Pmode, this_rtx, delta));
2555 else
d9600ae5 2556 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2557 }
2558
28514dda
YZ
2559 if (Pmode == ptr_mode)
2560 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2561 else
2562 aarch64_emit_move (temp0,
2563 gen_rtx_ZERO_EXTEND (Pmode,
2564 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2565
28514dda 2566 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2567 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2568 else
2569 {
d9600ae5 2570 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2571 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2572 }
2573
28514dda
YZ
2574 if (Pmode == ptr_mode)
2575 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2576 else
2577 aarch64_emit_move (temp1,
2578 gen_rtx_SIGN_EXTEND (Pmode,
2579 gen_rtx_MEM (ptr_mode, addr)));
2580
75f1d6fc 2581 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2582 }
2583
75f1d6fc
SN
2584 /* Generate a tail call to the target function. */
2585 if (!TREE_USED (function))
2586 {
2587 assemble_external (function);
2588 TREE_USED (function) = 1;
2589 }
2590 funexp = XEXP (DECL_RTL (function), 0);
2591 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2592 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2593 SIBLING_CALL_P (insn) = 1;
2594
2595 insn = get_insns ();
2596 shorten_branches (insn);
2597 final_start_function (insn, file, 1);
2598 final (insn, file, 1);
43e9d192 2599 final_end_function ();
75f1d6fc
SN
2600
2601 /* Stop pretending to be a post-reload pass. */
2602 reload_completed = 0;
43e9d192
IB
2603}
2604
43e9d192
IB
2605static int
2606aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2607{
2608 if (GET_CODE (*x) == SYMBOL_REF)
2609 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2610
2611 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2612 TLS offsets, not real symbol references. */
2613 if (GET_CODE (*x) == UNSPEC
2614 && XINT (*x, 1) == UNSPEC_TLS)
2615 return -1;
2616
2617 return 0;
2618}
2619
2620static bool
2621aarch64_tls_referenced_p (rtx x)
2622{
2623 if (!TARGET_HAVE_TLS)
2624 return false;
2625
2626 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2627}
2628
2629
2630static int
2631aarch64_bitmasks_cmp (const void *i1, const void *i2)
2632{
2633 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2634 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2635
2636 if (*imm1 < *imm2)
2637 return -1;
2638 if (*imm1 > *imm2)
2639 return +1;
2640 return 0;
2641}
2642
2643
2644static void
2645aarch64_build_bitmask_table (void)
2646{
2647 unsigned HOST_WIDE_INT mask, imm;
2648 unsigned int log_e, e, s, r;
2649 unsigned int nimms = 0;
2650
2651 for (log_e = 1; log_e <= 6; log_e++)
2652 {
2653 e = 1 << log_e;
2654 if (e == 64)
2655 mask = ~(HOST_WIDE_INT) 0;
2656 else
2657 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2658 for (s = 1; s < e; s++)
2659 {
2660 for (r = 0; r < e; r++)
2661 {
2662 /* set s consecutive bits to 1 (s < 64) */
2663 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2664 /* rotate right by r */
2665 if (r != 0)
2666 imm = ((imm >> r) | (imm << (e - r))) & mask;
2667 /* replicate the constant depending on SIMD size */
2668 switch (log_e) {
2669 case 1: imm |= (imm << 2);
2670 case 2: imm |= (imm << 4);
2671 case 3: imm |= (imm << 8);
2672 case 4: imm |= (imm << 16);
2673 case 5: imm |= (imm << 32);
2674 case 6:
2675 break;
2676 default:
2677 gcc_unreachable ();
2678 }
2679 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2680 aarch64_bitmasks[nimms++] = imm;
2681 }
2682 }
2683 }
2684
2685 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2686 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2687 aarch64_bitmasks_cmp);
2688}
2689
2690
2691/* Return true if val can be encoded as a 12-bit unsigned immediate with
2692 a left shift of 0 or 12 bits. */
2693bool
2694aarch64_uimm12_shift (HOST_WIDE_INT val)
2695{
2696 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2697 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2698 );
2699}
2700
2701
2702/* Return true if val is an immediate that can be loaded into a
2703 register by a MOVZ instruction. */
2704static bool
2705aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2706{
2707 if (GET_MODE_SIZE (mode) > 4)
2708 {
2709 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2710 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2711 return 1;
2712 }
2713 else
2714 {
2715 /* Ignore sign extension. */
2716 val &= (HOST_WIDE_INT) 0xffffffff;
2717 }
2718 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2719 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2720}
2721
2722
2723/* Return true if val is a valid bitmask immediate. */
2724bool
2725aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2726{
2727 if (GET_MODE_SIZE (mode) < 8)
2728 {
2729 /* Replicate bit pattern. */
2730 val &= (HOST_WIDE_INT) 0xffffffff;
2731 val |= val << 32;
2732 }
2733 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2734 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2735}
2736
2737
2738/* Return true if val is an immediate that can be loaded into a
2739 register in a single instruction. */
2740bool
2741aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2742{
2743 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2744 return 1;
2745 return aarch64_bitmask_imm (val, mode);
2746}
2747
2748static bool
2749aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2750{
2751 rtx base, offset;
7eda14e1 2752
43e9d192
IB
2753 if (GET_CODE (x) == HIGH)
2754 return true;
2755
2756 split_const (x, &base, &offset);
2757 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2758 {
2759 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2760 != SYMBOL_FORCE_TO_MEM)
2761 return true;
2762 else
2763 /* Avoid generating a 64-bit relocation in ILP32; leave
2764 to aarch64_expand_mov_immediate to handle it properly. */
2765 return mode != ptr_mode;
2766 }
43e9d192
IB
2767
2768 return aarch64_tls_referenced_p (x);
2769}
2770
2771/* Return true if register REGNO is a valid index register.
2772 STRICT_P is true if REG_OK_STRICT is in effect. */
2773
2774bool
2775aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2776{
2777 if (!HARD_REGISTER_NUM_P (regno))
2778 {
2779 if (!strict_p)
2780 return true;
2781
2782 if (!reg_renumber)
2783 return false;
2784
2785 regno = reg_renumber[regno];
2786 }
2787 return GP_REGNUM_P (regno);
2788}
2789
2790/* Return true if register REGNO is a valid base register for mode MODE.
2791 STRICT_P is true if REG_OK_STRICT is in effect. */
2792
2793bool
2794aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2795{
2796 if (!HARD_REGISTER_NUM_P (regno))
2797 {
2798 if (!strict_p)
2799 return true;
2800
2801 if (!reg_renumber)
2802 return false;
2803
2804 regno = reg_renumber[regno];
2805 }
2806
2807 /* The fake registers will be eliminated to either the stack or
2808 hard frame pointer, both of which are usually valid base registers.
2809 Reload deals with the cases where the eliminated form isn't valid. */
2810 return (GP_REGNUM_P (regno)
2811 || regno == SP_REGNUM
2812 || regno == FRAME_POINTER_REGNUM
2813 || regno == ARG_POINTER_REGNUM);
2814}
2815
2816/* Return true if X is a valid base register for mode MODE.
2817 STRICT_P is true if REG_OK_STRICT is in effect. */
2818
2819static bool
2820aarch64_base_register_rtx_p (rtx x, bool strict_p)
2821{
2822 if (!strict_p && GET_CODE (x) == SUBREG)
2823 x = SUBREG_REG (x);
2824
2825 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2826}
2827
2828/* Return true if address offset is a valid index. If it is, fill in INFO
2829 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2830
2831static bool
2832aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2833 enum machine_mode mode, bool strict_p)
2834{
2835 enum aarch64_address_type type;
2836 rtx index;
2837 int shift;
2838
2839 /* (reg:P) */
2840 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2841 && GET_MODE (x) == Pmode)
2842 {
2843 type = ADDRESS_REG_REG;
2844 index = x;
2845 shift = 0;
2846 }
2847 /* (sign_extend:DI (reg:SI)) */
2848 else if ((GET_CODE (x) == SIGN_EXTEND
2849 || GET_CODE (x) == ZERO_EXTEND)
2850 && GET_MODE (x) == DImode
2851 && GET_MODE (XEXP (x, 0)) == SImode)
2852 {
2853 type = (GET_CODE (x) == SIGN_EXTEND)
2854 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2855 index = XEXP (x, 0);
2856 shift = 0;
2857 }
2858 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2859 else if (GET_CODE (x) == MULT
2860 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2861 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2862 && GET_MODE (XEXP (x, 0)) == DImode
2863 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2864 && CONST_INT_P (XEXP (x, 1)))
2865 {
2866 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2867 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2868 index = XEXP (XEXP (x, 0), 0);
2869 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2870 }
2871 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2872 else if (GET_CODE (x) == ASHIFT
2873 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2874 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2875 && GET_MODE (XEXP (x, 0)) == DImode
2876 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2877 && CONST_INT_P (XEXP (x, 1)))
2878 {
2879 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2880 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2881 index = XEXP (XEXP (x, 0), 0);
2882 shift = INTVAL (XEXP (x, 1));
2883 }
2884 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2885 else if ((GET_CODE (x) == SIGN_EXTRACT
2886 || GET_CODE (x) == ZERO_EXTRACT)
2887 && GET_MODE (x) == DImode
2888 && GET_CODE (XEXP (x, 0)) == MULT
2889 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2890 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2891 {
2892 type = (GET_CODE (x) == SIGN_EXTRACT)
2893 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2894 index = XEXP (XEXP (x, 0), 0);
2895 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2896 if (INTVAL (XEXP (x, 1)) != 32 + shift
2897 || INTVAL (XEXP (x, 2)) != 0)
2898 shift = -1;
2899 }
2900 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2901 (const_int 0xffffffff<<shift)) */
2902 else if (GET_CODE (x) == AND
2903 && GET_MODE (x) == DImode
2904 && GET_CODE (XEXP (x, 0)) == MULT
2905 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2906 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2907 && CONST_INT_P (XEXP (x, 1)))
2908 {
2909 type = ADDRESS_REG_UXTW;
2910 index = XEXP (XEXP (x, 0), 0);
2911 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2912 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2913 shift = -1;
2914 }
2915 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2916 else if ((GET_CODE (x) == SIGN_EXTRACT
2917 || GET_CODE (x) == ZERO_EXTRACT)
2918 && GET_MODE (x) == DImode
2919 && GET_CODE (XEXP (x, 0)) == ASHIFT
2920 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2921 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2922 {
2923 type = (GET_CODE (x) == SIGN_EXTRACT)
2924 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2925 index = XEXP (XEXP (x, 0), 0);
2926 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2927 if (INTVAL (XEXP (x, 1)) != 32 + shift
2928 || INTVAL (XEXP (x, 2)) != 0)
2929 shift = -1;
2930 }
2931 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2932 (const_int 0xffffffff<<shift)) */
2933 else if (GET_CODE (x) == AND
2934 && GET_MODE (x) == DImode
2935 && GET_CODE (XEXP (x, 0)) == ASHIFT
2936 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2937 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2938 && CONST_INT_P (XEXP (x, 1)))
2939 {
2940 type = ADDRESS_REG_UXTW;
2941 index = XEXP (XEXP (x, 0), 0);
2942 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2943 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2944 shift = -1;
2945 }
2946 /* (mult:P (reg:P) (const_int scale)) */
2947 else if (GET_CODE (x) == MULT
2948 && GET_MODE (x) == Pmode
2949 && GET_MODE (XEXP (x, 0)) == Pmode
2950 && CONST_INT_P (XEXP (x, 1)))
2951 {
2952 type = ADDRESS_REG_REG;
2953 index = XEXP (x, 0);
2954 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2955 }
2956 /* (ashift:P (reg:P) (const_int shift)) */
2957 else if (GET_CODE (x) == ASHIFT
2958 && GET_MODE (x) == Pmode
2959 && GET_MODE (XEXP (x, 0)) == Pmode
2960 && CONST_INT_P (XEXP (x, 1)))
2961 {
2962 type = ADDRESS_REG_REG;
2963 index = XEXP (x, 0);
2964 shift = INTVAL (XEXP (x, 1));
2965 }
2966 else
2967 return false;
2968
2969 if (GET_CODE (index) == SUBREG)
2970 index = SUBREG_REG (index);
2971
2972 if ((shift == 0 ||
2973 (shift > 0 && shift <= 3
2974 && (1 << shift) == GET_MODE_SIZE (mode)))
2975 && REG_P (index)
2976 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2977 {
2978 info->type = type;
2979 info->offset = index;
2980 info->shift = shift;
2981 return true;
2982 }
2983
2984 return false;
2985}
2986
2987static inline bool
2988offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2989{
2990 return (offset >= -64 * GET_MODE_SIZE (mode)
2991 && offset < 64 * GET_MODE_SIZE (mode)
2992 && offset % GET_MODE_SIZE (mode) == 0);
2993}
2994
2995static inline bool
2996offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2997 HOST_WIDE_INT offset)
2998{
2999 return offset >= -256 && offset < 256;
3000}
3001
3002static inline bool
3003offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3004{
3005 return (offset >= 0
3006 && offset < 4096 * GET_MODE_SIZE (mode)
3007 && offset % GET_MODE_SIZE (mode) == 0);
3008}
3009
3010/* Return true if X is a valid address for machine mode MODE. If it is,
3011 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3012 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3013
3014static bool
3015aarch64_classify_address (struct aarch64_address_info *info,
3016 rtx x, enum machine_mode mode,
3017 RTX_CODE outer_code, bool strict_p)
3018{
3019 enum rtx_code code = GET_CODE (x);
3020 rtx op0, op1;
3021 bool allow_reg_index_p =
3022 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3023
3024 /* Don't support anything other than POST_INC or REG addressing for
3025 AdvSIMD. */
3026 if (aarch64_vector_mode_p (mode)
3027 && (code != POST_INC && code != REG))
3028 return false;
3029
3030 switch (code)
3031 {
3032 case REG:
3033 case SUBREG:
3034 info->type = ADDRESS_REG_IMM;
3035 info->base = x;
3036 info->offset = const0_rtx;
3037 return aarch64_base_register_rtx_p (x, strict_p);
3038
3039 case PLUS:
3040 op0 = XEXP (x, 0);
3041 op1 = XEXP (x, 1);
3042 if (GET_MODE_SIZE (mode) != 0
3043 && CONST_INT_P (op1)
3044 && aarch64_base_register_rtx_p (op0, strict_p))
3045 {
3046 HOST_WIDE_INT offset = INTVAL (op1);
3047
3048 info->type = ADDRESS_REG_IMM;
3049 info->base = op0;
3050 info->offset = op1;
3051
3052 /* TImode and TFmode values are allowed in both pairs of X
3053 registers and individual Q registers. The available
3054 address modes are:
3055 X,X: 7-bit signed scaled offset
3056 Q: 9-bit signed offset
3057 We conservatively require an offset representable in either mode.
3058 */
3059 if (mode == TImode || mode == TFmode)
3060 return (offset_7bit_signed_scaled_p (mode, offset)
3061 && offset_9bit_signed_unscaled_p (mode, offset));
3062
3063 if (outer_code == PARALLEL)
3064 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3065 && offset_7bit_signed_scaled_p (mode, offset));
3066 else
3067 return (offset_9bit_signed_unscaled_p (mode, offset)
3068 || offset_12bit_unsigned_scaled_p (mode, offset));
3069 }
3070
3071 if (allow_reg_index_p)
3072 {
3073 /* Look for base + (scaled/extended) index register. */
3074 if (aarch64_base_register_rtx_p (op0, strict_p)
3075 && aarch64_classify_index (info, op1, mode, strict_p))
3076 {
3077 info->base = op0;
3078 return true;
3079 }
3080 if (aarch64_base_register_rtx_p (op1, strict_p)
3081 && aarch64_classify_index (info, op0, mode, strict_p))
3082 {
3083 info->base = op1;
3084 return true;
3085 }
3086 }
3087
3088 return false;
3089
3090 case POST_INC:
3091 case POST_DEC:
3092 case PRE_INC:
3093 case PRE_DEC:
3094 info->type = ADDRESS_REG_WB;
3095 info->base = XEXP (x, 0);
3096 info->offset = NULL_RTX;
3097 return aarch64_base_register_rtx_p (info->base, strict_p);
3098
3099 case POST_MODIFY:
3100 case PRE_MODIFY:
3101 info->type = ADDRESS_REG_WB;
3102 info->base = XEXP (x, 0);
3103 if (GET_CODE (XEXP (x, 1)) == PLUS
3104 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3105 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3106 && aarch64_base_register_rtx_p (info->base, strict_p))
3107 {
3108 HOST_WIDE_INT offset;
3109 info->offset = XEXP (XEXP (x, 1), 1);
3110 offset = INTVAL (info->offset);
3111
3112 /* TImode and TFmode values are allowed in both pairs of X
3113 registers and individual Q registers. The available
3114 address modes are:
3115 X,X: 7-bit signed scaled offset
3116 Q: 9-bit signed offset
3117 We conservatively require an offset representable in either mode.
3118 */
3119 if (mode == TImode || mode == TFmode)
3120 return (offset_7bit_signed_scaled_p (mode, offset)
3121 && offset_9bit_signed_unscaled_p (mode, offset));
3122
3123 if (outer_code == PARALLEL)
3124 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3125 && offset_7bit_signed_scaled_p (mode, offset));
3126 else
3127 return offset_9bit_signed_unscaled_p (mode, offset);
3128 }
3129 return false;
3130
3131 case CONST:
3132 case SYMBOL_REF:
3133 case LABEL_REF:
79517551
SN
3134 /* load literal: pc-relative constant pool entry. Only supported
3135 for SI mode or larger. */
43e9d192 3136 info->type = ADDRESS_SYMBOLIC;
79517551 3137 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3138 {
3139 rtx sym, addend;
3140
3141 split_const (x, &sym, &addend);
3142 return (GET_CODE (sym) == LABEL_REF
3143 || (GET_CODE (sym) == SYMBOL_REF
3144 && CONSTANT_POOL_ADDRESS_P (sym)));
3145 }
3146 return false;
3147
3148 case LO_SUM:
3149 info->type = ADDRESS_LO_SUM;
3150 info->base = XEXP (x, 0);
3151 info->offset = XEXP (x, 1);
3152 if (allow_reg_index_p
3153 && aarch64_base_register_rtx_p (info->base, strict_p))
3154 {
3155 rtx sym, offs;
3156 split_const (info->offset, &sym, &offs);
3157 if (GET_CODE (sym) == SYMBOL_REF
3158 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3159 == SYMBOL_SMALL_ABSOLUTE))
3160 {
3161 /* The symbol and offset must be aligned to the access size. */
3162 unsigned int align;
3163 unsigned int ref_size;
3164
3165 if (CONSTANT_POOL_ADDRESS_P (sym))
3166 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3167 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3168 {
3169 tree exp = SYMBOL_REF_DECL (sym);
3170 align = TYPE_ALIGN (TREE_TYPE (exp));
3171 align = CONSTANT_ALIGNMENT (exp, align);
3172 }
3173 else if (SYMBOL_REF_DECL (sym))
3174 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3175 else
3176 align = BITS_PER_UNIT;
3177
3178 ref_size = GET_MODE_SIZE (mode);
3179 if (ref_size == 0)
3180 ref_size = GET_MODE_SIZE (DImode);
3181
3182 return ((INTVAL (offs) & (ref_size - 1)) == 0
3183 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3184 }
3185 }
3186 return false;
3187
3188 default:
3189 return false;
3190 }
3191}
3192
3193bool
3194aarch64_symbolic_address_p (rtx x)
3195{
3196 rtx offset;
3197
3198 split_const (x, &x, &offset);
3199 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3200}
3201
3202/* Classify the base of symbolic expression X, given that X appears in
3203 context CONTEXT. */
da4f13a4
MS
3204
3205enum aarch64_symbol_type
3206aarch64_classify_symbolic_expression (rtx x,
3207 enum aarch64_symbol_context context)
43e9d192
IB
3208{
3209 rtx offset;
da4f13a4 3210
43e9d192
IB
3211 split_const (x, &x, &offset);
3212 return aarch64_classify_symbol (x, context);
3213}
3214
3215
3216/* Return TRUE if X is a legitimate address for accessing memory in
3217 mode MODE. */
3218static bool
3219aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3220{
3221 struct aarch64_address_info addr;
3222
3223 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3224}
3225
3226/* Return TRUE if X is a legitimate address for accessing memory in
3227 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3228 pair operation. */
3229bool
3230aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3231 RTX_CODE outer_code, bool strict_p)
3232{
3233 struct aarch64_address_info addr;
3234
3235 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3236}
3237
3238/* Return TRUE if rtx X is immediate constant 0.0 */
3239bool
3520f7cc 3240aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3241{
3242 REAL_VALUE_TYPE r;
3243
3244 if (GET_MODE (x) == VOIDmode)
3245 return false;
3246
3247 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3248 if (REAL_VALUE_MINUS_ZERO (r))
3249 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3250 return REAL_VALUES_EQUAL (r, dconst0);
3251}
3252
70f09188
AP
3253/* Return the fixed registers used for condition codes. */
3254
3255static bool
3256aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3257{
3258 *p1 = CC_REGNUM;
3259 *p2 = INVALID_REGNUM;
3260 return true;
3261}
3262
43e9d192
IB
3263enum machine_mode
3264aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3265{
3266 /* All floating point compares return CCFP if it is an equality
3267 comparison, and CCFPE otherwise. */
3268 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3269 {
3270 switch (code)
3271 {
3272 case EQ:
3273 case NE:
3274 case UNORDERED:
3275 case ORDERED:
3276 case UNLT:
3277 case UNLE:
3278 case UNGT:
3279 case UNGE:
3280 case UNEQ:
3281 case LTGT:
3282 return CCFPmode;
3283
3284 case LT:
3285 case LE:
3286 case GT:
3287 case GE:
3288 return CCFPEmode;
3289
3290 default:
3291 gcc_unreachable ();
3292 }
3293 }
3294
3295 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3296 && y == const0_rtx
3297 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3298 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3299 || GET_CODE (x) == NEG))
43e9d192
IB
3300 return CC_NZmode;
3301
274b2532 3302 /* A compare with a shifted or negated operand. Because of canonicalization,
43e9d192
IB
3303 the comparison will have to be swapped when we emit the assembly
3304 code. */
3305 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3306 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3307 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3308 || GET_CODE (x) == LSHIFTRT
274b2532
KT
3309 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3310 || GET_CODE (x) == NEG))
43e9d192
IB
3311 return CC_SWPmode;
3312
3313 /* A compare of a mode narrower than SI mode against zero can be done
3314 by extending the value in the comparison. */
3315 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3316 && y == const0_rtx)
3317 /* Only use sign-extension if we really need it. */
3318 return ((code == GT || code == GE || code == LE || code == LT)
3319 ? CC_SESWPmode : CC_ZESWPmode);
3320
3321 /* For everything else, return CCmode. */
3322 return CCmode;
3323}
3324
3325static unsigned
3326aarch64_get_condition_code (rtx x)
3327{
3328 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3329 enum rtx_code comp_code = GET_CODE (x);
3330
3331 if (GET_MODE_CLASS (mode) != MODE_CC)
3332 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3333
3334 switch (mode)
3335 {
3336 case CCFPmode:
3337 case CCFPEmode:
3338 switch (comp_code)
3339 {
3340 case GE: return AARCH64_GE;
3341 case GT: return AARCH64_GT;
3342 case LE: return AARCH64_LS;
3343 case LT: return AARCH64_MI;
3344 case NE: return AARCH64_NE;
3345 case EQ: return AARCH64_EQ;
3346 case ORDERED: return AARCH64_VC;
3347 case UNORDERED: return AARCH64_VS;
3348 case UNLT: return AARCH64_LT;
3349 case UNLE: return AARCH64_LE;
3350 case UNGT: return AARCH64_HI;
3351 case UNGE: return AARCH64_PL;
3352 default: gcc_unreachable ();
3353 }
3354 break;
3355
3356 case CCmode:
3357 switch (comp_code)
3358 {
3359 case NE: return AARCH64_NE;
3360 case EQ: return AARCH64_EQ;
3361 case GE: return AARCH64_GE;
3362 case GT: return AARCH64_GT;
3363 case LE: return AARCH64_LE;
3364 case LT: return AARCH64_LT;
3365 case GEU: return AARCH64_CS;
3366 case GTU: return AARCH64_HI;
3367 case LEU: return AARCH64_LS;
3368 case LTU: return AARCH64_CC;
3369 default: gcc_unreachable ();
3370 }
3371 break;
3372
3373 case CC_SWPmode:
3374 case CC_ZESWPmode:
3375 case CC_SESWPmode:
3376 switch (comp_code)
3377 {
3378 case NE: return AARCH64_NE;
3379 case EQ: return AARCH64_EQ;
3380 case GE: return AARCH64_LE;
3381 case GT: return AARCH64_LT;
3382 case LE: return AARCH64_GE;
3383 case LT: return AARCH64_GT;
3384 case GEU: return AARCH64_LS;
3385 case GTU: return AARCH64_CC;
3386 case LEU: return AARCH64_CS;
3387 case LTU: return AARCH64_HI;
3388 default: gcc_unreachable ();
3389 }
3390 break;
3391
3392 case CC_NZmode:
3393 switch (comp_code)
3394 {
3395 case NE: return AARCH64_NE;
3396 case EQ: return AARCH64_EQ;
3397 case GE: return AARCH64_PL;
3398 case LT: return AARCH64_MI;
3399 default: gcc_unreachable ();
3400 }
3401 break;
3402
3403 default:
3404 gcc_unreachable ();
3405 break;
3406 }
3407}
3408
3409static unsigned
3410bit_count (unsigned HOST_WIDE_INT value)
3411{
3412 unsigned count = 0;
3413
3414 while (value)
3415 {
3416 count++;
3417 value &= value - 1;
3418 }
3419
3420 return count;
3421}
3422
3423void
3424aarch64_print_operand (FILE *f, rtx x, char code)
3425{
3426 switch (code)
3427 {
f541a481
KT
3428 /* An integer or symbol address without a preceding # sign. */
3429 case 'c':
3430 switch (GET_CODE (x))
3431 {
3432 case CONST_INT:
3433 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3434 break;
3435
3436 case SYMBOL_REF:
3437 output_addr_const (f, x);
3438 break;
3439
3440 case CONST:
3441 if (GET_CODE (XEXP (x, 0)) == PLUS
3442 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3443 {
3444 output_addr_const (f, x);
3445 break;
3446 }
3447 /* Fall through. */
3448
3449 default:
3450 output_operand_lossage ("Unsupported operand for code '%c'", code);
3451 }
3452 break;
3453
43e9d192
IB
3454 case 'e':
3455 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3456 {
3457 int n;
3458
3459 if (GET_CODE (x) != CONST_INT
3460 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3461 {
3462 output_operand_lossage ("invalid operand for '%%%c'", code);
3463 return;
3464 }
3465
3466 switch (n)
3467 {
3468 case 3:
3469 fputc ('b', f);
3470 break;
3471 case 4:
3472 fputc ('h', f);
3473 break;
3474 case 5:
3475 fputc ('w', f);
3476 break;
3477 default:
3478 output_operand_lossage ("invalid operand for '%%%c'", code);
3479 return;
3480 }
3481 }
3482 break;
3483
3484 case 'p':
3485 {
3486 int n;
3487
3488 /* Print N such that 2^N == X. */
3489 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3490 {
3491 output_operand_lossage ("invalid operand for '%%%c'", code);
3492 return;
3493 }
3494
3495 asm_fprintf (f, "%d", n);
3496 }
3497 break;
3498
3499 case 'P':
3500 /* Print the number of non-zero bits in X (a const_int). */
3501 if (GET_CODE (x) != CONST_INT)
3502 {
3503 output_operand_lossage ("invalid operand for '%%%c'", code);
3504 return;
3505 }
3506
3507 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3508 break;
3509
3510 case 'H':
3511 /* Print the higher numbered register of a pair (TImode) of regs. */
3512 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3513 {
3514 output_operand_lossage ("invalid operand for '%%%c'", code);
3515 return;
3516 }
3517
01a3a324 3518 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3519 break;
3520
43e9d192
IB
3521 case 'm':
3522 /* Print a condition (eq, ne, etc). */
3523
3524 /* CONST_TRUE_RTX means always -- that's the default. */
3525 if (x == const_true_rtx)
3526 return;
3527
3528 if (!COMPARISON_P (x))
3529 {
3530 output_operand_lossage ("invalid operand for '%%%c'", code);
3531 return;
3532 }
3533
3534 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3535 break;
3536
3537 case 'M':
3538 /* Print the inverse of a condition (eq <-> ne, etc). */
3539
3540 /* CONST_TRUE_RTX means never -- that's the default. */
3541 if (x == const_true_rtx)
3542 {
3543 fputs ("nv", f);
3544 return;
3545 }
3546
3547 if (!COMPARISON_P (x))
3548 {
3549 output_operand_lossage ("invalid operand for '%%%c'", code);
3550 return;
3551 }
3552
3553 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3554 (aarch64_get_condition_code (x))], f);
3555 break;
3556
3557 case 'b':
3558 case 'h':
3559 case 's':
3560 case 'd':
3561 case 'q':
3562 /* Print a scalar FP/SIMD register name. */
3563 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3564 {
3565 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3566 return;
3567 }
50ce6f88 3568 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3569 break;
3570
3571 case 'S':
3572 case 'T':
3573 case 'U':
3574 case 'V':
3575 /* Print the first FP/SIMD register name in a list. */
3576 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3577 {
3578 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3579 return;
3580 }
50ce6f88 3581 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3582 break;
3583
a05c0ddf 3584 case 'X':
50d38551 3585 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3586 if (GET_CODE (x) != CONST_INT)
3587 {
3588 output_operand_lossage ("invalid operand for '%%%c'", code);
3589 return;
3590 }
50d38551 3591 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3592 break;
3593
43e9d192
IB
3594 case 'w':
3595 case 'x':
3596 /* Print a general register name or the zero register (32-bit or
3597 64-bit). */
3520f7cc
JG
3598 if (x == const0_rtx
3599 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3600 {
50ce6f88 3601 asm_fprintf (f, "%czr", code);
43e9d192
IB
3602 break;
3603 }
3604
3605 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3606 {
50ce6f88 3607 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3608 break;
3609 }
3610
3611 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3612 {
50ce6f88 3613 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3614 break;
3615 }
3616
3617 /* Fall through */
3618
3619 case 0:
3620 /* Print a normal operand, if it's a general register, then we
3621 assume DImode. */
3622 if (x == NULL)
3623 {
3624 output_operand_lossage ("missing operand");
3625 return;
3626 }
3627
3628 switch (GET_CODE (x))
3629 {
3630 case REG:
01a3a324 3631 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3632 break;
3633
3634 case MEM:
3635 aarch64_memory_reference_mode = GET_MODE (x);
3636 output_address (XEXP (x, 0));
3637 break;
3638
3639 case LABEL_REF:
3640 case SYMBOL_REF:
3641 output_addr_const (asm_out_file, x);
3642 break;
3643
3644 case CONST_INT:
3645 asm_fprintf (f, "%wd", INTVAL (x));
3646 break;
3647
3648 case CONST_VECTOR:
3520f7cc
JG
3649 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3650 {
3651 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3652 HOST_WIDE_INT_MIN,
3653 HOST_WIDE_INT_MAX));
3654 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3655 }
3656 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3657 {
3658 fputc ('0', f);
3659 }
3660 else
3661 gcc_unreachable ();
43e9d192
IB
3662 break;
3663
3520f7cc
JG
3664 case CONST_DOUBLE:
3665 /* CONST_DOUBLE can represent a double-width integer.
3666 In this case, the mode of x is VOIDmode. */
3667 if (GET_MODE (x) == VOIDmode)
3668 ; /* Do Nothing. */
3669 else if (aarch64_float_const_zero_rtx_p (x))
3670 {
3671 fputc ('0', f);
3672 break;
3673 }
3674 else if (aarch64_float_const_representable_p (x))
3675 {
3676#define buf_size 20
3677 char float_buf[buf_size] = {'\0'};
3678 REAL_VALUE_TYPE r;
3679 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3680 real_to_decimal_for_mode (float_buf, &r,
3681 buf_size, buf_size,
3682 1, GET_MODE (x));
3683 asm_fprintf (asm_out_file, "%s", float_buf);
3684 break;
3685#undef buf_size
3686 }
3687 output_operand_lossage ("invalid constant");
3688 return;
43e9d192
IB
3689 default:
3690 output_operand_lossage ("invalid operand");
3691 return;
3692 }
3693 break;
3694
3695 case 'A':
3696 if (GET_CODE (x) == HIGH)
3697 x = XEXP (x, 0);
3698
3699 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3700 {
3701 case SYMBOL_SMALL_GOT:
3702 asm_fprintf (asm_out_file, ":got:");
3703 break;
3704
3705 case SYMBOL_SMALL_TLSGD:
3706 asm_fprintf (asm_out_file, ":tlsgd:");
3707 break;
3708
3709 case SYMBOL_SMALL_TLSDESC:
3710 asm_fprintf (asm_out_file, ":tlsdesc:");
3711 break;
3712
3713 case SYMBOL_SMALL_GOTTPREL:
3714 asm_fprintf (asm_out_file, ":gottprel:");
3715 break;
3716
3717 case SYMBOL_SMALL_TPREL:
3718 asm_fprintf (asm_out_file, ":tprel:");
3719 break;
3720
87dd8ab0
MS
3721 case SYMBOL_TINY_GOT:
3722 gcc_unreachable ();
3723 break;
3724
43e9d192
IB
3725 default:
3726 break;
3727 }
3728 output_addr_const (asm_out_file, x);
3729 break;
3730
3731 case 'L':
3732 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3733 {
3734 case SYMBOL_SMALL_GOT:
3735 asm_fprintf (asm_out_file, ":lo12:");
3736 break;
3737
3738 case SYMBOL_SMALL_TLSGD:
3739 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3740 break;
3741
3742 case SYMBOL_SMALL_TLSDESC:
3743 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3744 break;
3745
3746 case SYMBOL_SMALL_GOTTPREL:
3747 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3748 break;
3749
3750 case SYMBOL_SMALL_TPREL:
3751 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3752 break;
3753
87dd8ab0
MS
3754 case SYMBOL_TINY_GOT:
3755 asm_fprintf (asm_out_file, ":got:");
3756 break;
3757
43e9d192
IB
3758 default:
3759 break;
3760 }
3761 output_addr_const (asm_out_file, x);
3762 break;
3763
3764 case 'G':
3765
3766 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3767 {
3768 case SYMBOL_SMALL_TPREL:
3769 asm_fprintf (asm_out_file, ":tprel_hi12:");
3770 break;
3771 default:
3772 break;
3773 }
3774 output_addr_const (asm_out_file, x);
3775 break;
3776
3777 default:
3778 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3779 return;
3780 }
3781}
3782
3783void
3784aarch64_print_operand_address (FILE *f, rtx x)
3785{
3786 struct aarch64_address_info addr;
3787
3788 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3789 MEM, true))
3790 switch (addr.type)
3791 {
3792 case ADDRESS_REG_IMM:
3793 if (addr.offset == const0_rtx)
01a3a324 3794 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3795 else
01a3a324 3796 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3797 INTVAL (addr.offset));
3798 return;
3799
3800 case ADDRESS_REG_REG:
3801 if (addr.shift == 0)
01a3a324
N
3802 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3803 reg_names [REGNO (addr.offset)]);
43e9d192 3804 else
01a3a324
N
3805 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3806 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3807 return;
3808
3809 case ADDRESS_REG_UXTW:
3810 if (addr.shift == 0)
01a3a324 3811 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3812 REGNO (addr.offset) - R0_REGNUM);
3813 else
01a3a324 3814 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3815 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3816 return;
3817
3818 case ADDRESS_REG_SXTW:
3819 if (addr.shift == 0)
01a3a324 3820 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3821 REGNO (addr.offset) - R0_REGNUM);
3822 else
01a3a324 3823 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3824 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3825 return;
3826
3827 case ADDRESS_REG_WB:
3828 switch (GET_CODE (x))
3829 {
3830 case PRE_INC:
01a3a324 3831 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3832 GET_MODE_SIZE (aarch64_memory_reference_mode));
3833 return;
3834 case POST_INC:
01a3a324 3835 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3836 GET_MODE_SIZE (aarch64_memory_reference_mode));
3837 return;
3838 case PRE_DEC:
01a3a324 3839 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3840 GET_MODE_SIZE (aarch64_memory_reference_mode));
3841 return;
3842 case POST_DEC:
01a3a324 3843 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3844 GET_MODE_SIZE (aarch64_memory_reference_mode));
3845 return;
3846 case PRE_MODIFY:
01a3a324 3847 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3848 INTVAL (addr.offset));
3849 return;
3850 case POST_MODIFY:
01a3a324 3851 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3852 INTVAL (addr.offset));
3853 return;
3854 default:
3855 break;
3856 }
3857 break;
3858
3859 case ADDRESS_LO_SUM:
01a3a324 3860 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3861 output_addr_const (f, addr.offset);
3862 asm_fprintf (f, "]");
3863 return;
3864
3865 case ADDRESS_SYMBOLIC:
3866 break;
3867 }
3868
3869 output_addr_const (f, x);
3870}
3871
43e9d192
IB
3872bool
3873aarch64_label_mentioned_p (rtx x)
3874{
3875 const char *fmt;
3876 int i;
3877
3878 if (GET_CODE (x) == LABEL_REF)
3879 return true;
3880
3881 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3882 referencing instruction, but they are constant offsets, not
3883 symbols. */
3884 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3885 return false;
3886
3887 fmt = GET_RTX_FORMAT (GET_CODE (x));
3888 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3889 {
3890 if (fmt[i] == 'E')
3891 {
3892 int j;
3893
3894 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3895 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3896 return 1;
3897 }
3898 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3899 return 1;
3900 }
3901
3902 return 0;
3903}
3904
3905/* Implement REGNO_REG_CLASS. */
3906
3907enum reg_class
3908aarch64_regno_regclass (unsigned regno)
3909{
3910 if (GP_REGNUM_P (regno))
3911 return CORE_REGS;
3912
3913 if (regno == SP_REGNUM)
3914 return STACK_REG;
3915
3916 if (regno == FRAME_POINTER_REGNUM
3917 || regno == ARG_POINTER_REGNUM)
f24bb080 3918 return POINTER_REGS;
43e9d192
IB
3919
3920 if (FP_REGNUM_P (regno))
3921 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3922
3923 return NO_REGS;
3924}
3925
3926/* Try a machine-dependent way of reloading an illegitimate address
3927 operand. If we find one, push the reload and return the new rtx. */
3928
3929rtx
3930aarch64_legitimize_reload_address (rtx *x_p,
3931 enum machine_mode mode,
3932 int opnum, int type,
3933 int ind_levels ATTRIBUTE_UNUSED)
3934{
3935 rtx x = *x_p;
3936
3937 /* Do not allow mem (plus (reg, const)) if vector mode. */
3938 if (aarch64_vector_mode_p (mode)
3939 && GET_CODE (x) == PLUS
3940 && REG_P (XEXP (x, 0))
3941 && CONST_INT_P (XEXP (x, 1)))
3942 {
3943 rtx orig_rtx = x;
3944 x = copy_rtx (x);
3945 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3946 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3947 opnum, (enum reload_type) type);
3948 return x;
3949 }
3950
3951 /* We must recognize output that we have already generated ourselves. */
3952 if (GET_CODE (x) == PLUS
3953 && GET_CODE (XEXP (x, 0)) == PLUS
3954 && REG_P (XEXP (XEXP (x, 0), 0))
3955 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3956 && CONST_INT_P (XEXP (x, 1)))
3957 {
3958 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3959 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3960 opnum, (enum reload_type) type);
3961 return x;
3962 }
3963
3964 /* We wish to handle large displacements off a base register by splitting
3965 the addend across an add and the mem insn. This can cut the number of
3966 extra insns needed from 3 to 1. It is only useful for load/store of a
3967 single register with 12 bit offset field. */
3968 if (GET_CODE (x) == PLUS
3969 && REG_P (XEXP (x, 0))
3970 && CONST_INT_P (XEXP (x, 1))
3971 && HARD_REGISTER_P (XEXP (x, 0))
3972 && mode != TImode
3973 && mode != TFmode
3974 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3975 {
3976 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3977 HOST_WIDE_INT low = val & 0xfff;
3978 HOST_WIDE_INT high = val - low;
3979 HOST_WIDE_INT offs;
3980 rtx cst;
28514dda
YZ
3981 enum machine_mode xmode = GET_MODE (x);
3982
3983 /* In ILP32, xmode can be either DImode or SImode. */
3984 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
3985
3986 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3987 BLKmode alignment. */
3988 if (GET_MODE_SIZE (mode) == 0)
3989 return NULL_RTX;
3990
3991 offs = low % GET_MODE_SIZE (mode);
3992
3993 /* Align misaligned offset by adjusting high part to compensate. */
3994 if (offs != 0)
3995 {
3996 if (aarch64_uimm12_shift (high + offs))
3997 {
3998 /* Align down. */
3999 low = low - offs;
4000 high = high + offs;
4001 }
4002 else
4003 {
4004 /* Align up. */
4005 offs = GET_MODE_SIZE (mode) - offs;
4006 low = low + offs;
4007 high = high + (low & 0x1000) - offs;
4008 low &= 0xfff;
4009 }
4010 }
4011
4012 /* Check for overflow. */
4013 if (high + low != val)
4014 return NULL_RTX;
4015
4016 cst = GEN_INT (high);
4017 if (!aarch64_uimm12_shift (high))
28514dda 4018 cst = force_const_mem (xmode, cst);
43e9d192
IB
4019
4020 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4021 in the mem instruction.
4022 Note that replacing this gen_rtx_PLUS with plus_constant is
4023 wrong in this case because we rely on the
4024 (plus (plus reg c1) c2) structure being preserved so that
4025 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4026 x = gen_rtx_PLUS (xmode,
4027 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4028 GEN_INT (low));
43e9d192
IB
4029
4030 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4031 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4032 opnum, (enum reload_type) type);
4033 return x;
4034 }
4035
4036 return NULL_RTX;
4037}
4038
4039
4040static reg_class_t
4041aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4042 reg_class_t rclass,
4043 enum machine_mode mode,
4044 secondary_reload_info *sri)
4045{
43e9d192
IB
4046 /* Without the TARGET_SIMD instructions we cannot move a Q register
4047 to a Q register directly. We need a scratch. */
4048 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4049 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4050 && reg_class_subset_p (rclass, FP_REGS))
4051 {
4052 if (mode == TFmode)
4053 sri->icode = CODE_FOR_aarch64_reload_movtf;
4054 else if (mode == TImode)
4055 sri->icode = CODE_FOR_aarch64_reload_movti;
4056 return NO_REGS;
4057 }
4058
4059 /* A TFmode or TImode memory access should be handled via an FP_REGS
4060 because AArch64 has richer addressing modes for LDR/STR instructions
4061 than LDP/STP instructions. */
4062 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4063 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4064 return FP_REGS;
4065
4066 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4067 return CORE_REGS;
4068
4069 return NO_REGS;
4070}
4071
4072static bool
4073aarch64_can_eliminate (const int from, const int to)
4074{
4075 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4076 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4077
4078 if (frame_pointer_needed)
4079 {
4080 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4081 return true;
4082 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4083 return false;
4084 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4085 && !cfun->calls_alloca)
4086 return true;
4087 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4088 return true;
4089 return false;
4090 }
4091 else
4092 {
777e6976
IB
4093 /* If we decided that we didn't need a leaf frame pointer but then used
4094 LR in the function, then we'll want a frame pointer after all, so
4095 prevent this elimination to ensure a frame pointer is used.
4096
4097 NOTE: the original value of flag_omit_frame_pointer gets trashed
4098 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4099 of faked_omit_frame_pointer here (which is true when we always
4100 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4101 pointers when LR is clobbered). */
43e9d192 4102 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
777e6976
IB
4103 && df_regs_ever_live_p (LR_REGNUM)
4104 && faked_omit_frame_pointer)
43e9d192
IB
4105 return false;
4106 }
777e6976 4107
43e9d192
IB
4108 return true;
4109}
4110
4111HOST_WIDE_INT
4112aarch64_initial_elimination_offset (unsigned from, unsigned to)
4113{
4114 HOST_WIDE_INT frame_size;
4115 HOST_WIDE_INT offset;
4116
4117 aarch64_layout_frame ();
4118 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4119 + crtl->outgoing_args_size
4120 + cfun->machine->saved_varargs_size);
4121
4122 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4123 offset = frame_size;
4124
4125 if (to == HARD_FRAME_POINTER_REGNUM)
4126 {
4127 if (from == ARG_POINTER_REGNUM)
4128 return offset - crtl->outgoing_args_size;
4129
4130 if (from == FRAME_POINTER_REGNUM)
6991c977 4131 return cfun->machine->frame.saved_regs_size + get_frame_size ();
43e9d192
IB
4132 }
4133
4134 if (to == STACK_POINTER_REGNUM)
4135 {
4136 if (from == FRAME_POINTER_REGNUM)
4137 {
4138 HOST_WIDE_INT elim = crtl->outgoing_args_size
4139 + cfun->machine->frame.saved_regs_size
6991c977 4140 + get_frame_size ()
43e9d192
IB
4141 - cfun->machine->frame.fp_lr_offset;
4142 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4143 return elim;
4144 }
4145 }
4146
4147 return offset;
4148}
4149
4150
4151/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4152 previous frame. */
4153
4154rtx
4155aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4156{
4157 if (count != 0)
4158 return const0_rtx;
4159 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4160}
4161
4162
4163static void
4164aarch64_asm_trampoline_template (FILE *f)
4165{
28514dda
YZ
4166 if (TARGET_ILP32)
4167 {
4168 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4169 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4170 }
4171 else
4172 {
4173 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4174 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4175 }
01a3a324 4176 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4177 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4178 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4179 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4180}
4181
4182static void
4183aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4184{
4185 rtx fnaddr, mem, a_tramp;
28514dda 4186 const int tramp_code_sz = 16;
43e9d192
IB
4187
4188 /* Don't need to copy the trailing D-words, we fill those in below. */
4189 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4190 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4191 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4192 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4193 if (GET_MODE (fnaddr) != ptr_mode)
4194 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4195 emit_move_insn (mem, fnaddr);
4196
28514dda 4197 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4198 emit_move_insn (mem, chain_value);
4199
4200 /* XXX We should really define a "clear_cache" pattern and use
4201 gen_clear_cache(). */
4202 a_tramp = XEXP (m_tramp, 0);
4203 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4204 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4205 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4206 ptr_mode);
43e9d192
IB
4207}
4208
4209static unsigned char
4210aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4211{
4212 switch (regclass)
4213 {
4214 case CORE_REGS:
4215 case POINTER_REGS:
4216 case GENERAL_REGS:
4217 case ALL_REGS:
4218 case FP_REGS:
4219 case FP_LO_REGS:
4220 return
4221 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4222 (GET_MODE_SIZE (mode) + 7) / 8;
4223 case STACK_REG:
4224 return 1;
4225
4226 case NO_REGS:
4227 return 0;
4228
4229 default:
4230 break;
4231 }
4232 gcc_unreachable ();
4233}
4234
4235static reg_class_t
78d8b9f0 4236aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4237{
51bb310d 4238 if (regclass == POINTER_REGS)
78d8b9f0
IB
4239 return GENERAL_REGS;
4240
51bb310d
MS
4241 if (regclass == STACK_REG)
4242 {
4243 if (REG_P(x)
4244 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4245 return regclass;
4246
4247 return NO_REGS;
4248 }
4249
78d8b9f0
IB
4250 /* If it's an integer immediate that MOVI can't handle, then
4251 FP_REGS is not an option, so we return NO_REGS instead. */
4252 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4253 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4254 return NO_REGS;
4255
27bd251b
IB
4256 /* Register eliminiation can result in a request for
4257 SP+constant->FP_REGS. We cannot support such operations which
4258 use SP as source and an FP_REG as destination, so reject out
4259 right now. */
4260 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4261 {
4262 rtx lhs = XEXP (x, 0);
4263
4264 /* Look through a possible SUBREG introduced by ILP32. */
4265 if (GET_CODE (lhs) == SUBREG)
4266 lhs = SUBREG_REG (lhs);
4267
4268 gcc_assert (REG_P (lhs));
4269 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4270 POINTER_REGS));
4271 return NO_REGS;
4272 }
4273
78d8b9f0 4274 return regclass;
43e9d192
IB
4275}
4276
4277void
4278aarch64_asm_output_labelref (FILE* f, const char *name)
4279{
4280 asm_fprintf (f, "%U%s", name);
4281}
4282
4283static void
4284aarch64_elf_asm_constructor (rtx symbol, int priority)
4285{
4286 if (priority == DEFAULT_INIT_PRIORITY)
4287 default_ctor_section_asm_out_constructor (symbol, priority);
4288 else
4289 {
4290 section *s;
4291 char buf[18];
4292 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4293 s = get_section (buf, SECTION_WRITE, NULL);
4294 switch_to_section (s);
4295 assemble_align (POINTER_SIZE);
28514dda 4296 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4297 }
4298}
4299
4300static void
4301aarch64_elf_asm_destructor (rtx symbol, int priority)
4302{
4303 if (priority == DEFAULT_INIT_PRIORITY)
4304 default_dtor_section_asm_out_destructor (symbol, priority);
4305 else
4306 {
4307 section *s;
4308 char buf[18];
4309 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4310 s = get_section (buf, SECTION_WRITE, NULL);
4311 switch_to_section (s);
4312 assemble_align (POINTER_SIZE);
28514dda 4313 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4314 }
4315}
4316
4317const char*
4318aarch64_output_casesi (rtx *operands)
4319{
4320 char buf[100];
4321 char label[100];
592a16fc 4322 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4323 int index;
4324 static const char *const patterns[4][2] =
4325 {
4326 {
4327 "ldrb\t%w3, [%0,%w1,uxtw]",
4328 "add\t%3, %4, %w3, sxtb #2"
4329 },
4330 {
4331 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4332 "add\t%3, %4, %w3, sxth #2"
4333 },
4334 {
4335 "ldr\t%w3, [%0,%w1,uxtw #2]",
4336 "add\t%3, %4, %w3, sxtw #2"
4337 },
4338 /* We assume that DImode is only generated when not optimizing and
4339 that we don't really need 64-bit address offsets. That would
4340 imply an object file with 8GB of code in a single function! */
4341 {
4342 "ldr\t%w3, [%0,%w1,uxtw #2]",
4343 "add\t%3, %4, %w3, sxtw #2"
4344 }
4345 };
4346
4347 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4348
4349 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4350
4351 gcc_assert (index >= 0 && index <= 3);
4352
4353 /* Need to implement table size reduction, by chaning the code below. */
4354 output_asm_insn (patterns[index][0], operands);
4355 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4356 snprintf (buf, sizeof (buf),
4357 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4358 output_asm_insn (buf, operands);
4359 output_asm_insn (patterns[index][1], operands);
4360 output_asm_insn ("br\t%3", operands);
4361 assemble_label (asm_out_file, label);
4362 return "";
4363}
4364
4365
4366/* Return size in bits of an arithmetic operand which is shifted/scaled and
4367 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4368 operator. */
4369
4370int
4371aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4372{
4373 if (shift >= 0 && shift <= 3)
4374 {
4375 int size;
4376 for (size = 8; size <= 32; size *= 2)
4377 {
4378 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4379 if (mask == bits << shift)
4380 return size;
4381 }
4382 }
4383 return 0;
4384}
4385
4386static bool
4387aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4388 const_rtx x ATTRIBUTE_UNUSED)
4389{
4390 /* We can't use blocks for constants when we're using a per-function
4391 constant pool. */
4392 return false;
4393}
4394
4395static section *
4396aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4397 rtx x ATTRIBUTE_UNUSED,
4398 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4399{
4400 /* Force all constant pool entries into the current function section. */
4401 return function_section (current_function_decl);
4402}
4403
4404
4405/* Costs. */
4406
4407/* Helper function for rtx cost calculation. Strip a shift expression
4408 from X. Returns the inner operand if successful, or the original
4409 expression on failure. */
4410static rtx
4411aarch64_strip_shift (rtx x)
4412{
4413 rtx op = x;
4414
4415 if ((GET_CODE (op) == ASHIFT
4416 || GET_CODE (op) == ASHIFTRT
4417 || GET_CODE (op) == LSHIFTRT)
4418 && CONST_INT_P (XEXP (op, 1)))
4419 return XEXP (op, 0);
4420
4421 if (GET_CODE (op) == MULT
4422 && CONST_INT_P (XEXP (op, 1))
4423 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4424 return XEXP (op, 0);
4425
4426 return x;
4427}
4428
4429/* Helper function for rtx cost calculation. Strip a shift or extend
4430 expression from X. Returns the inner operand if successful, or the
4431 original expression on failure. We deal with a number of possible
4432 canonicalization variations here. */
4433static rtx
4434aarch64_strip_shift_or_extend (rtx x)
4435{
4436 rtx op = x;
4437
4438 /* Zero and sign extraction of a widened value. */
4439 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4440 && XEXP (op, 2) == const0_rtx
4441 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4442 XEXP (op, 1)))
4443 return XEXP (XEXP (op, 0), 0);
4444
4445 /* It can also be represented (for zero-extend) as an AND with an
4446 immediate. */
4447 if (GET_CODE (op) == AND
4448 && GET_CODE (XEXP (op, 0)) == MULT
4449 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4450 && CONST_INT_P (XEXP (op, 1))
4451 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4452 INTVAL (XEXP (op, 1))) != 0)
4453 return XEXP (XEXP (op, 0), 0);
4454
4455 /* Now handle extended register, as this may also have an optional
4456 left shift by 1..4. */
4457 if (GET_CODE (op) == ASHIFT
4458 && CONST_INT_P (XEXP (op, 1))
4459 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4460 op = XEXP (op, 0);
4461
4462 if (GET_CODE (op) == ZERO_EXTEND
4463 || GET_CODE (op) == SIGN_EXTEND)
4464 op = XEXP (op, 0);
4465
4466 if (op != x)
4467 return op;
4468
4469 return aarch64_strip_shift (x);
4470}
4471
4472/* Calculate the cost of calculating X, storing it in *COST. Result
4473 is true if the total cost of the operation has now been calculated. */
4474static bool
4475aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4476 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4477{
4478 rtx op0, op1;
73250c4c 4479 const struct cpu_cost_table *extra_cost
43e9d192
IB
4480 = aarch64_tune_params->insn_extra_cost;
4481
4482 switch (code)
4483 {
4484 case SET:
4485 op0 = SET_DEST (x);
4486 op1 = SET_SRC (x);
4487
4488 switch (GET_CODE (op0))
4489 {
4490 case MEM:
4491 if (speed)
73250c4c 4492 *cost += extra_cost->ldst.store;
43e9d192
IB
4493
4494 if (op1 != const0_rtx)
4495 *cost += rtx_cost (op1, SET, 1, speed);
4496 return true;
4497
4498 case SUBREG:
4499 if (! REG_P (SUBREG_REG (op0)))
4500 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4501 /* Fall through. */
4502 case REG:
4503 /* Cost is just the cost of the RHS of the set. */
4504 *cost += rtx_cost (op1, SET, 1, true);
4505 return true;
4506
4507 case ZERO_EXTRACT: /* Bit-field insertion. */
4508 case SIGN_EXTRACT:
4509 /* Strip any redundant widening of the RHS to meet the width of
4510 the target. */
4511 if (GET_CODE (op1) == SUBREG)
4512 op1 = SUBREG_REG (op1);
4513 if ((GET_CODE (op1) == ZERO_EXTEND
4514 || GET_CODE (op1) == SIGN_EXTEND)
4515 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4516 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4517 >= INTVAL (XEXP (op0, 1))))
4518 op1 = XEXP (op1, 0);
4519 *cost += rtx_cost (op1, SET, 1, speed);
4520 return true;
4521
4522 default:
4523 break;
4524 }
4525 return false;
4526
4527 case MEM:
4528 if (speed)
73250c4c 4529 *cost += extra_cost->ldst.load;
43e9d192
IB
4530
4531 return true;
4532
4533 case NEG:
4534 op0 = CONST0_RTX (GET_MODE (x));
4535 op1 = XEXP (x, 0);
4536 goto cost_minus;
4537
4538 case COMPARE:
4539 op0 = XEXP (x, 0);
4540 op1 = XEXP (x, 1);
4541
4542 if (op1 == const0_rtx
4543 && GET_CODE (op0) == AND)
4544 {
4545 x = op0;
4546 goto cost_logic;
4547 }
4548
4549 /* Comparisons can work if the order is swapped.
4550 Canonicalization puts the more complex operation first, but
4551 we want it in op1. */
4552 if (! (REG_P (op0)
4553 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4554 {
4555 op0 = XEXP (x, 1);
4556 op1 = XEXP (x, 0);
4557 }
4558 goto cost_minus;
4559
4560 case MINUS:
4561 op0 = XEXP (x, 0);
4562 op1 = XEXP (x, 1);
4563
4564 cost_minus:
4565 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4566 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4567 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4568 {
4569 if (op0 != const0_rtx)
4570 *cost += rtx_cost (op0, MINUS, 0, speed);
4571
4572 if (CONST_INT_P (op1))
4573 {
4574 if (!aarch64_uimm12_shift (INTVAL (op1)))
4575 *cost += rtx_cost (op1, MINUS, 1, speed);
4576 }
4577 else
4578 {
4579 op1 = aarch64_strip_shift_or_extend (op1);
4580 *cost += rtx_cost (op1, MINUS, 1, speed);
4581 }
4582 return true;
4583 }
4584
4585 return false;
4586
4587 case PLUS:
4588 op0 = XEXP (x, 0);
4589 op1 = XEXP (x, 1);
4590
4591 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4592 {
4593 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4594 {
4595 *cost += rtx_cost (op0, PLUS, 0, speed);
4596 }
4597 else
4598 {
4599 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4600
4601 if (new_op0 == op0
4602 && GET_CODE (op0) == MULT)
4603 {
4604 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4605 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4606 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4607 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4608 {
4609 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4610 speed)
4611 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4612 speed)
4613 + rtx_cost (op1, PLUS, 1, speed));
4614 if (speed)
73250c4c
KT
4615 *cost +=
4616 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
43e9d192
IB
4617 return true;
4618 }
4619 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4620 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4621 + rtx_cost (op1, PLUS, 1, speed));
4622
4623 if (speed)
73250c4c 4624 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
43e9d192
IB
4625 }
4626
4627 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4628 + rtx_cost (op1, PLUS, 1, speed));
4629 }
4630 return true;
4631 }
4632
4633 return false;
4634
4635 case IOR:
4636 case XOR:
4637 case AND:
4638 cost_logic:
4639 op0 = XEXP (x, 0);
4640 op1 = XEXP (x, 1);
4641
4642 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4643 {
4644 if (CONST_INT_P (op1)
4645 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4646 {
4647 *cost += rtx_cost (op0, AND, 0, speed);
4648 }
4649 else
4650 {
4651 if (GET_CODE (op0) == NOT)
4652 op0 = XEXP (op0, 0);
4653 op0 = aarch64_strip_shift (op0);
4654 *cost += (rtx_cost (op0, AND, 0, speed)
4655 + rtx_cost (op1, AND, 1, speed));
4656 }
4657 return true;
4658 }
4659 return false;
4660
4661 case ZERO_EXTEND:
4662 if ((GET_MODE (x) == DImode
4663 && GET_MODE (XEXP (x, 0)) == SImode)
4664 || GET_CODE (XEXP (x, 0)) == MEM)
4665 {
4666 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4667 return true;
4668 }
4669 return false;
4670
4671 case SIGN_EXTEND:
4672 if (GET_CODE (XEXP (x, 0)) == MEM)
4673 {
4674 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4675 return true;
4676 }
4677 return false;
4678
4679 case ROTATE:
4680 if (!CONST_INT_P (XEXP (x, 1)))
4681 *cost += COSTS_N_INSNS (2);
4682 /* Fall through. */
4683 case ROTATERT:
4684 case LSHIFTRT:
4685 case ASHIFT:
4686 case ASHIFTRT:
4687
4688 /* Shifting by a register often takes an extra cycle. */
4689 if (speed && !CONST_INT_P (XEXP (x, 1)))
73250c4c 4690 *cost += extra_cost->alu.arith_shift_reg;
43e9d192
IB
4691
4692 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4693 return true;
4694
4695 case HIGH:
4696 if (!CONSTANT_P (XEXP (x, 0)))
4697 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4698 return true;
4699
4700 case LO_SUM:
4701 if (!CONSTANT_P (XEXP (x, 1)))
4702 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4703 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4704 return true;
4705
4706 case ZERO_EXTRACT:
4707 case SIGN_EXTRACT:
4708 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4709 return true;
4710
4711 case MULT:
4712 op0 = XEXP (x, 0);
4713 op1 = XEXP (x, 1);
4714
4715 *cost = COSTS_N_INSNS (1);
4716 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4717 {
4718 if (CONST_INT_P (op1)
4719 && exact_log2 (INTVAL (op1)) > 0)
4720 {
4721 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4722 return true;
4723 }
4724
4725 if ((GET_CODE (op0) == ZERO_EXTEND
4726 && GET_CODE (op1) == ZERO_EXTEND)
4727 || (GET_CODE (op0) == SIGN_EXTEND
4728 && GET_CODE (op1) == SIGN_EXTEND))
4729 {
4730 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4731 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4732 if (speed)
73250c4c 4733 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
43e9d192
IB
4734 return true;
4735 }
4736
4737 if (speed)
73250c4c 4738 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
43e9d192
IB
4739 }
4740 else if (speed)
4741 {
4742 if (GET_MODE (x) == DFmode)
73250c4c 4743 *cost += extra_cost->fp[1].mult;
43e9d192 4744 else if (GET_MODE (x) == SFmode)
73250c4c 4745 *cost += extra_cost->fp[0].mult;
43e9d192
IB
4746 }
4747
4748 return false; /* All arguments need to be in registers. */
4749
4750 case MOD:
4751 case UMOD:
4752 *cost = COSTS_N_INSNS (2);
4753 if (speed)
4754 {
4755 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
4756 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4757 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 4758 else if (GET_MODE (x) == DFmode)
73250c4c
KT
4759 *cost += (extra_cost->fp[1].mult
4760 + extra_cost->fp[1].div);
43e9d192 4761 else if (GET_MODE (x) == SFmode)
73250c4c
KT
4762 *cost += (extra_cost->fp[0].mult
4763 + extra_cost->fp[0].div);
43e9d192
IB
4764 }
4765 return false; /* All arguments need to be in registers. */
4766
4767 case DIV:
4768 case UDIV:
4769 *cost = COSTS_N_INSNS (1);
4770 if (speed)
4771 {
4772 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c 4773 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
43e9d192 4774 else if (GET_MODE (x) == DFmode)
73250c4c 4775 *cost += extra_cost->fp[1].div;
43e9d192 4776 else if (GET_MODE (x) == SFmode)
73250c4c 4777 *cost += extra_cost->fp[0].div;
43e9d192
IB
4778 }
4779 return false; /* All arguments need to be in registers. */
4780
4781 default:
4782 break;
4783 }
4784 return false;
4785}
4786
4787static int
4788aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4789 enum machine_mode mode ATTRIBUTE_UNUSED,
4790 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4791{
4792 enum rtx_code c = GET_CODE (x);
4793 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4794
4795 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4796 return addr_cost->pre_modify;
4797
4798 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4799 return addr_cost->post_modify;
4800
4801 if (c == PLUS)
4802 {
4803 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4804 return addr_cost->imm_offset;
4805 else if (GET_CODE (XEXP (x, 0)) == MULT
4806 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4807 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4808 return addr_cost->register_extend;
4809
4810 return addr_cost->register_offset;
4811 }
4812 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4813 return addr_cost->imm_offset;
4814
4815 return 0;
4816}
4817
4818static int
4819aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4820 reg_class_t from, reg_class_t to)
4821{
4822 const struct cpu_regmove_cost *regmove_cost
4823 = aarch64_tune_params->regmove_cost;
4824
4825 if (from == GENERAL_REGS && to == GENERAL_REGS)
4826 return regmove_cost->GP2GP;
4827 else if (from == GENERAL_REGS)
4828 return regmove_cost->GP2FP;
4829 else if (to == GENERAL_REGS)
4830 return regmove_cost->FP2GP;
4831
4832 /* When AdvSIMD instructions are disabled it is not possible to move
4833 a 128-bit value directly between Q registers. This is handled in
4834 secondary reload. A general register is used as a scratch to move
4835 the upper DI value and the lower DI value is moved directly,
4836 hence the cost is the sum of three moves. */
4837
4838 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4839 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4840
4841 return regmove_cost->FP2FP;
4842}
4843
4844static int
4845aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4846 reg_class_t rclass ATTRIBUTE_UNUSED,
4847 bool in ATTRIBUTE_UNUSED)
4848{
4849 return aarch64_tune_params->memmov_cost;
4850}
4851
8990e73a
TB
4852/* Vectorizer cost model target hooks. */
4853
4854/* Implement targetm.vectorize.builtin_vectorization_cost. */
4855static int
4856aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4857 tree vectype,
4858 int misalign ATTRIBUTE_UNUSED)
4859{
4860 unsigned elements;
4861
4862 switch (type_of_cost)
4863 {
4864 case scalar_stmt:
4865 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4866
4867 case scalar_load:
4868 return aarch64_tune_params->vec_costs->scalar_load_cost;
4869
4870 case scalar_store:
4871 return aarch64_tune_params->vec_costs->scalar_store_cost;
4872
4873 case vector_stmt:
4874 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4875
4876 case vector_load:
4877 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4878
4879 case vector_store:
4880 return aarch64_tune_params->vec_costs->vec_store_cost;
4881
4882 case vec_to_scalar:
4883 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4884
4885 case scalar_to_vec:
4886 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4887
4888 case unaligned_load:
4889 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4890
4891 case unaligned_store:
4892 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4893
4894 case cond_branch_taken:
4895 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4896
4897 case cond_branch_not_taken:
4898 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4899
4900 case vec_perm:
4901 case vec_promote_demote:
4902 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4903
4904 case vec_construct:
4905 elements = TYPE_VECTOR_SUBPARTS (vectype);
4906 return elements / 2 + 1;
4907
4908 default:
4909 gcc_unreachable ();
4910 }
4911}
4912
4913/* Implement targetm.vectorize.add_stmt_cost. */
4914static unsigned
4915aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4916 struct _stmt_vec_info *stmt_info, int misalign,
4917 enum vect_cost_model_location where)
4918{
4919 unsigned *cost = (unsigned *) data;
4920 unsigned retval = 0;
4921
4922 if (flag_vect_cost_model)
4923 {
4924 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4925 int stmt_cost =
4926 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4927
4928 /* Statements in an inner loop relative to the loop being
4929 vectorized are weighted more heavily. The value here is
4930 a function (linear for now) of the loop nest level. */
4931 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4932 {
4933 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4934 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4935 unsigned nest_level = loop_depth (loop);
4936
4937 count *= nest_level;
4938 }
4939
4940 retval = (unsigned) (count * stmt_cost);
4941 cost[where] += retval;
4942 }
4943
4944 return retval;
4945}
4946
43e9d192
IB
4947static void initialize_aarch64_code_model (void);
4948
4949/* Parse the architecture extension string. */
4950
4951static void
4952aarch64_parse_extension (char *str)
4953{
4954 /* The extension string is parsed left to right. */
4955 const struct aarch64_option_extension *opt = NULL;
4956
4957 /* Flag to say whether we are adding or removing an extension. */
4958 int adding_ext = -1;
4959
4960 while (str != NULL && *str != 0)
4961 {
4962 char *ext;
4963 size_t len;
4964
4965 str++;
4966 ext = strchr (str, '+');
4967
4968 if (ext != NULL)
4969 len = ext - str;
4970 else
4971 len = strlen (str);
4972
4973 if (len >= 2 && strncmp (str, "no", 2) == 0)
4974 {
4975 adding_ext = 0;
4976 len -= 2;
4977 str += 2;
4978 }
4979 else if (len > 0)
4980 adding_ext = 1;
4981
4982 if (len == 0)
4983 {
4984 error ("missing feature modifier after %qs", "+no");
4985 return;
4986 }
4987
4988 /* Scan over the extensions table trying to find an exact match. */
4989 for (opt = all_extensions; opt->name != NULL; opt++)
4990 {
4991 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4992 {
4993 /* Add or remove the extension. */
4994 if (adding_ext)
4995 aarch64_isa_flags |= opt->flags_on;
4996 else
4997 aarch64_isa_flags &= ~(opt->flags_off);
4998 break;
4999 }
5000 }
5001
5002 if (opt->name == NULL)
5003 {
5004 /* Extension not found in list. */
5005 error ("unknown feature modifier %qs", str);
5006 return;
5007 }
5008
5009 str = ext;
5010 };
5011
5012 return;
5013}
5014
5015/* Parse the ARCH string. */
5016
5017static void
5018aarch64_parse_arch (void)
5019{
5020 char *ext;
5021 const struct processor *arch;
5022 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5023 size_t len;
5024
5025 strcpy (str, aarch64_arch_string);
5026
5027 ext = strchr (str, '+');
5028
5029 if (ext != NULL)
5030 len = ext - str;
5031 else
5032 len = strlen (str);
5033
5034 if (len == 0)
5035 {
5036 error ("missing arch name in -march=%qs", str);
5037 return;
5038 }
5039
5040 /* Loop through the list of supported ARCHs to find a match. */
5041 for (arch = all_architectures; arch->name != NULL; arch++)
5042 {
5043 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5044 {
5045 selected_arch = arch;
5046 aarch64_isa_flags = selected_arch->flags;
5047 selected_cpu = &all_cores[selected_arch->core];
5048
5049 if (ext != NULL)
5050 {
5051 /* ARCH string contains at least one extension. */
5052 aarch64_parse_extension (ext);
5053 }
5054
5055 return;
5056 }
5057 }
5058
5059 /* ARCH name not found in list. */
5060 error ("unknown value %qs for -march", str);
5061 return;
5062}
5063
5064/* Parse the CPU string. */
5065
5066static void
5067aarch64_parse_cpu (void)
5068{
5069 char *ext;
5070 const struct processor *cpu;
5071 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5072 size_t len;
5073
5074 strcpy (str, aarch64_cpu_string);
5075
5076 ext = strchr (str, '+');
5077
5078 if (ext != NULL)
5079 len = ext - str;
5080 else
5081 len = strlen (str);
5082
5083 if (len == 0)
5084 {
5085 error ("missing cpu name in -mcpu=%qs", str);
5086 return;
5087 }
5088
5089 /* Loop through the list of supported CPUs to find a match. */
5090 for (cpu = all_cores; cpu->name != NULL; cpu++)
5091 {
5092 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5093 {
5094 selected_cpu = cpu;
5095 aarch64_isa_flags = selected_cpu->flags;
5096
5097 if (ext != NULL)
5098 {
5099 /* CPU string contains at least one extension. */
5100 aarch64_parse_extension (ext);
5101 }
5102
5103 return;
5104 }
5105 }
5106
5107 /* CPU name not found in list. */
5108 error ("unknown value %qs for -mcpu", str);
5109 return;
5110}
5111
5112/* Parse the TUNE string. */
5113
5114static void
5115aarch64_parse_tune (void)
5116{
5117 const struct processor *cpu;
5118 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5119 strcpy (str, aarch64_tune_string);
5120
5121 /* Loop through the list of supported CPUs to find a match. */
5122 for (cpu = all_cores; cpu->name != NULL; cpu++)
5123 {
5124 if (strcmp (cpu->name, str) == 0)
5125 {
5126 selected_tune = cpu;
5127 return;
5128 }
5129 }
5130
5131 /* CPU name not found in list. */
5132 error ("unknown value %qs for -mtune", str);
5133 return;
5134}
5135
5136
5137/* Implement TARGET_OPTION_OVERRIDE. */
5138
5139static void
5140aarch64_override_options (void)
5141{
5142 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5143 otherwise march remains undefined. mtune can be used with either march or
5144 mcpu. */
5145
5146 if (aarch64_arch_string)
5147 {
5148 aarch64_parse_arch ();
5149 aarch64_cpu_string = NULL;
5150 }
5151
5152 if (aarch64_cpu_string)
5153 {
5154 aarch64_parse_cpu ();
5155 selected_arch = NULL;
5156 }
5157
5158 if (aarch64_tune_string)
5159 {
5160 aarch64_parse_tune ();
5161 }
5162
5163 initialize_aarch64_code_model ();
5164
5165 aarch64_build_bitmask_table ();
5166
5167 /* This target defaults to strict volatile bitfields. */
5168 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5169 flag_strict_volatile_bitfields = 1;
5170
5171 /* If the user did not specify a processor, choose the default
5172 one for them. This will be the CPU set during configuration using
5173 --with-cpu, otherwise it is "generic". */
5174 if (!selected_cpu)
5175 {
5176 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5177 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5178 }
5179
5180 gcc_assert (selected_cpu);
5181
5182 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5183 if (!selected_tune)
5184 selected_tune = &all_cores[selected_cpu->core];
5185
5186 aarch64_tune_flags = selected_tune->flags;
5187 aarch64_tune = selected_tune->core;
5188 aarch64_tune_params = selected_tune->tune;
5189
5190 aarch64_override_options_after_change ();
5191}
5192
5193/* Implement targetm.override_options_after_change. */
5194
5195static void
5196aarch64_override_options_after_change (void)
5197{
5198 faked_omit_frame_pointer = false;
5199
5200 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5201 that aarch64_frame_pointer_required will be called. We need to remember
5202 whether flag_omit_frame_pointer was turned on normally or just faked. */
5203
5204 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5205 {
5206 flag_omit_frame_pointer = true;
5207 faked_omit_frame_pointer = true;
5208 }
5209}
5210
5211static struct machine_function *
5212aarch64_init_machine_status (void)
5213{
5214 struct machine_function *machine;
5215 machine = ggc_alloc_cleared_machine_function ();
5216 return machine;
5217}
5218
5219void
5220aarch64_init_expanders (void)
5221{
5222 init_machine_status = aarch64_init_machine_status;
5223}
5224
5225/* A checking mechanism for the implementation of the various code models. */
5226static void
5227initialize_aarch64_code_model (void)
5228{
5229 if (flag_pic)
5230 {
5231 switch (aarch64_cmodel_var)
5232 {
5233 case AARCH64_CMODEL_TINY:
5234 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5235 break;
5236 case AARCH64_CMODEL_SMALL:
5237 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5238 break;
5239 case AARCH64_CMODEL_LARGE:
5240 sorry ("code model %qs with -f%s", "large",
5241 flag_pic > 1 ? "PIC" : "pic");
5242 default:
5243 gcc_unreachable ();
5244 }
5245 }
5246 else
5247 aarch64_cmodel = aarch64_cmodel_var;
5248}
5249
5250/* Return true if SYMBOL_REF X binds locally. */
5251
5252static bool
5253aarch64_symbol_binds_local_p (const_rtx x)
5254{
5255 return (SYMBOL_REF_DECL (x)
5256 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5257 : SYMBOL_REF_LOCAL_P (x));
5258}
5259
5260/* Return true if SYMBOL_REF X is thread local */
5261static bool
5262aarch64_tls_symbol_p (rtx x)
5263{
5264 if (! TARGET_HAVE_TLS)
5265 return false;
5266
5267 if (GET_CODE (x) != SYMBOL_REF)
5268 return false;
5269
5270 return SYMBOL_REF_TLS_MODEL (x) != 0;
5271}
5272
5273/* Classify a TLS symbol into one of the TLS kinds. */
5274enum aarch64_symbol_type
5275aarch64_classify_tls_symbol (rtx x)
5276{
5277 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5278
5279 switch (tls_kind)
5280 {
5281 case TLS_MODEL_GLOBAL_DYNAMIC:
5282 case TLS_MODEL_LOCAL_DYNAMIC:
5283 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5284
5285 case TLS_MODEL_INITIAL_EXEC:
5286 return SYMBOL_SMALL_GOTTPREL;
5287
5288 case TLS_MODEL_LOCAL_EXEC:
5289 return SYMBOL_SMALL_TPREL;
5290
5291 case TLS_MODEL_EMULATED:
5292 case TLS_MODEL_NONE:
5293 return SYMBOL_FORCE_TO_MEM;
5294
5295 default:
5296 gcc_unreachable ();
5297 }
5298}
5299
5300/* Return the method that should be used to access SYMBOL_REF or
5301 LABEL_REF X in context CONTEXT. */
17f4d4bf 5302
43e9d192
IB
5303enum aarch64_symbol_type
5304aarch64_classify_symbol (rtx x,
5305 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5306{
5307 if (GET_CODE (x) == LABEL_REF)
5308 {
5309 switch (aarch64_cmodel)
5310 {
5311 case AARCH64_CMODEL_LARGE:
5312 return SYMBOL_FORCE_TO_MEM;
5313
5314 case AARCH64_CMODEL_TINY_PIC:
5315 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5316 return SYMBOL_TINY_ABSOLUTE;
5317
43e9d192
IB
5318 case AARCH64_CMODEL_SMALL_PIC:
5319 case AARCH64_CMODEL_SMALL:
5320 return SYMBOL_SMALL_ABSOLUTE;
5321
5322 default:
5323 gcc_unreachable ();
5324 }
5325 }
5326
17f4d4bf 5327 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5328 {
17f4d4bf
CSS
5329 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5330 || CONSTANT_POOL_ADDRESS_P (x))
43e9d192
IB
5331 return SYMBOL_FORCE_TO_MEM;
5332
5333 if (aarch64_tls_symbol_p (x))
5334 return aarch64_classify_tls_symbol (x);
5335
17f4d4bf
CSS
5336 switch (aarch64_cmodel)
5337 {
5338 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5339 if (SYMBOL_REF_WEAK (x))
5340 return SYMBOL_FORCE_TO_MEM;
5341 return SYMBOL_TINY_ABSOLUTE;
5342
17f4d4bf
CSS
5343 case AARCH64_CMODEL_SMALL:
5344 if (SYMBOL_REF_WEAK (x))
5345 return SYMBOL_FORCE_TO_MEM;
5346 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5347
17f4d4bf 5348 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 5349 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 5350 return SYMBOL_TINY_GOT;
38e6c9a6
MS
5351 return SYMBOL_TINY_ABSOLUTE;
5352
17f4d4bf
CSS
5353 case AARCH64_CMODEL_SMALL_PIC:
5354 if (!aarch64_symbol_binds_local_p (x))
5355 return SYMBOL_SMALL_GOT;
5356 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5357
17f4d4bf
CSS
5358 default:
5359 gcc_unreachable ();
5360 }
43e9d192 5361 }
17f4d4bf 5362
43e9d192
IB
5363 /* By default push everything into the constant pool. */
5364 return SYMBOL_FORCE_TO_MEM;
5365}
5366
43e9d192
IB
5367bool
5368aarch64_constant_address_p (rtx x)
5369{
5370 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5371}
5372
5373bool
5374aarch64_legitimate_pic_operand_p (rtx x)
5375{
5376 if (GET_CODE (x) == SYMBOL_REF
5377 || (GET_CODE (x) == CONST
5378 && GET_CODE (XEXP (x, 0)) == PLUS
5379 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5380 return false;
5381
5382 return true;
5383}
5384
3520f7cc
JG
5385/* Return true if X holds either a quarter-precision or
5386 floating-point +0.0 constant. */
5387static bool
5388aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5389{
5390 if (!CONST_DOUBLE_P (x))
5391 return false;
5392
5393 /* TODO: We could handle moving 0.0 to a TFmode register,
5394 but first we would like to refactor the movtf_aarch64
5395 to be more amicable to split moves properly and
5396 correctly gate on TARGET_SIMD. For now - reject all
5397 constants which are not to SFmode or DFmode registers. */
5398 if (!(mode == SFmode || mode == DFmode))
5399 return false;
5400
5401 if (aarch64_float_const_zero_rtx_p (x))
5402 return true;
5403 return aarch64_float_const_representable_p (x);
5404}
5405
43e9d192
IB
5406static bool
5407aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5408{
5409 /* Do not allow vector struct mode constants. We could support
5410 0 and -1 easily, but they need support in aarch64-simd.md. */
5411 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5412 return false;
5413
5414 /* This could probably go away because
5415 we now decompose CONST_INTs according to expand_mov_immediate. */
5416 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5417 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5418 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5419 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5420
5421 if (GET_CODE (x) == HIGH
5422 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5423 return true;
5424
5425 return aarch64_constant_address_p (x);
5426}
5427
a5bc806c 5428rtx
43e9d192
IB
5429aarch64_load_tp (rtx target)
5430{
5431 if (!target
5432 || GET_MODE (target) != Pmode
5433 || !register_operand (target, Pmode))
5434 target = gen_reg_rtx (Pmode);
5435
5436 /* Can return in any reg. */
5437 emit_insn (gen_aarch64_load_tp_hard (target));
5438 return target;
5439}
5440
43e9d192
IB
5441/* On AAPCS systems, this is the "struct __va_list". */
5442static GTY(()) tree va_list_type;
5443
5444/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5445 Return the type to use as __builtin_va_list.
5446
5447 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5448
5449 struct __va_list
5450 {
5451 void *__stack;
5452 void *__gr_top;
5453 void *__vr_top;
5454 int __gr_offs;
5455 int __vr_offs;
5456 }; */
5457
5458static tree
5459aarch64_build_builtin_va_list (void)
5460{
5461 tree va_list_name;
5462 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5463
5464 /* Create the type. */
5465 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5466 /* Give it the required name. */
5467 va_list_name = build_decl (BUILTINS_LOCATION,
5468 TYPE_DECL,
5469 get_identifier ("__va_list"),
5470 va_list_type);
5471 DECL_ARTIFICIAL (va_list_name) = 1;
5472 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5473 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5474
5475 /* Create the fields. */
5476 f_stack = build_decl (BUILTINS_LOCATION,
5477 FIELD_DECL, get_identifier ("__stack"),
5478 ptr_type_node);
5479 f_grtop = build_decl (BUILTINS_LOCATION,
5480 FIELD_DECL, get_identifier ("__gr_top"),
5481 ptr_type_node);
5482 f_vrtop = build_decl (BUILTINS_LOCATION,
5483 FIELD_DECL, get_identifier ("__vr_top"),
5484 ptr_type_node);
5485 f_groff = build_decl (BUILTINS_LOCATION,
5486 FIELD_DECL, get_identifier ("__gr_offs"),
5487 integer_type_node);
5488 f_vroff = build_decl (BUILTINS_LOCATION,
5489 FIELD_DECL, get_identifier ("__vr_offs"),
5490 integer_type_node);
5491
5492 DECL_ARTIFICIAL (f_stack) = 1;
5493 DECL_ARTIFICIAL (f_grtop) = 1;
5494 DECL_ARTIFICIAL (f_vrtop) = 1;
5495 DECL_ARTIFICIAL (f_groff) = 1;
5496 DECL_ARTIFICIAL (f_vroff) = 1;
5497
5498 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5499 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5500 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5501 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5502 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5503
5504 TYPE_FIELDS (va_list_type) = f_stack;
5505 DECL_CHAIN (f_stack) = f_grtop;
5506 DECL_CHAIN (f_grtop) = f_vrtop;
5507 DECL_CHAIN (f_vrtop) = f_groff;
5508 DECL_CHAIN (f_groff) = f_vroff;
5509
5510 /* Compute its layout. */
5511 layout_type (va_list_type);
5512
5513 return va_list_type;
5514}
5515
5516/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5517static void
5518aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5519{
5520 const CUMULATIVE_ARGS *cum;
5521 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5522 tree stack, grtop, vrtop, groff, vroff;
5523 tree t;
5524 int gr_save_area_size;
5525 int vr_save_area_size;
5526 int vr_offset;
5527
5528 cum = &crtl->args.info;
5529 gr_save_area_size
5530 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5531 vr_save_area_size
5532 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5533
5534 if (TARGET_GENERAL_REGS_ONLY)
5535 {
5536 if (cum->aapcs_nvrn > 0)
5537 sorry ("%qs and floating point or vector arguments",
5538 "-mgeneral-regs-only");
5539 vr_save_area_size = 0;
5540 }
5541
5542 f_stack = TYPE_FIELDS (va_list_type_node);
5543 f_grtop = DECL_CHAIN (f_stack);
5544 f_vrtop = DECL_CHAIN (f_grtop);
5545 f_groff = DECL_CHAIN (f_vrtop);
5546 f_vroff = DECL_CHAIN (f_groff);
5547
5548 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5549 NULL_TREE);
5550 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5551 NULL_TREE);
5552 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5553 NULL_TREE);
5554 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5555 NULL_TREE);
5556 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5557 NULL_TREE);
5558
5559 /* Emit code to initialize STACK, which points to the next varargs stack
5560 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5561 by named arguments. STACK is 8-byte aligned. */
5562 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5563 if (cum->aapcs_stack_size > 0)
5564 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5565 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5566 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5567
5568 /* Emit code to initialize GRTOP, the top of the GR save area.
5569 virtual_incoming_args_rtx should have been 16 byte aligned. */
5570 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5571 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5572 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5573
5574 /* Emit code to initialize VRTOP, the top of the VR save area.
5575 This address is gr_save_area_bytes below GRTOP, rounded
5576 down to the next 16-byte boundary. */
5577 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5578 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5579 STACK_BOUNDARY / BITS_PER_UNIT);
5580
5581 if (vr_offset)
5582 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5583 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5584 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5585
5586 /* Emit code to initialize GROFF, the offset from GRTOP of the
5587 next GPR argument. */
5588 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5589 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5590 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5591
5592 /* Likewise emit code to initialize VROFF, the offset from FTOP
5593 of the next VR argument. */
5594 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5595 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5597}
5598
5599/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5600
5601static tree
5602aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5603 gimple_seq *post_p ATTRIBUTE_UNUSED)
5604{
5605 tree addr;
5606 bool indirect_p;
5607 bool is_ha; /* is HFA or HVA. */
5608 bool dw_align; /* double-word align. */
5609 enum machine_mode ag_mode = VOIDmode;
5610 int nregs;
5611 enum machine_mode mode;
5612
5613 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5614 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5615 HOST_WIDE_INT size, rsize, adjust, align;
5616 tree t, u, cond1, cond2;
5617
5618 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5619 if (indirect_p)
5620 type = build_pointer_type (type);
5621
5622 mode = TYPE_MODE (type);
5623
5624 f_stack = TYPE_FIELDS (va_list_type_node);
5625 f_grtop = DECL_CHAIN (f_stack);
5626 f_vrtop = DECL_CHAIN (f_grtop);
5627 f_groff = DECL_CHAIN (f_vrtop);
5628 f_vroff = DECL_CHAIN (f_groff);
5629
5630 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5631 f_stack, NULL_TREE);
5632 size = int_size_in_bytes (type);
5633 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5634
5635 dw_align = false;
5636 adjust = 0;
5637 if (aarch64_vfp_is_call_or_return_candidate (mode,
5638 type,
5639 &ag_mode,
5640 &nregs,
5641 &is_ha))
5642 {
5643 /* TYPE passed in fp/simd registers. */
5644 if (TARGET_GENERAL_REGS_ONLY)
5645 sorry ("%qs and floating point or vector arguments",
5646 "-mgeneral-regs-only");
5647
5648 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5649 unshare_expr (valist), f_vrtop, NULL_TREE);
5650 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5651 unshare_expr (valist), f_vroff, NULL_TREE);
5652
5653 rsize = nregs * UNITS_PER_VREG;
5654
5655 if (is_ha)
5656 {
5657 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5658 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5659 }
5660 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5661 && size < UNITS_PER_VREG)
5662 {
5663 adjust = UNITS_PER_VREG - size;
5664 }
5665 }
5666 else
5667 {
5668 /* TYPE passed in general registers. */
5669 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5670 unshare_expr (valist), f_grtop, NULL_TREE);
5671 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5672 unshare_expr (valist), f_groff, NULL_TREE);
5673 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5674 nregs = rsize / UNITS_PER_WORD;
5675
5676 if (align > 8)
5677 dw_align = true;
5678
5679 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5680 && size < UNITS_PER_WORD)
5681 {
5682 adjust = UNITS_PER_WORD - size;
5683 }
5684 }
5685
5686 /* Get a local temporary for the field value. */
5687 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5688
5689 /* Emit code to branch if off >= 0. */
5690 t = build2 (GE_EXPR, boolean_type_node, off,
5691 build_int_cst (TREE_TYPE (off), 0));
5692 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5693
5694 if (dw_align)
5695 {
5696 /* Emit: offs = (offs + 15) & -16. */
5697 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5698 build_int_cst (TREE_TYPE (off), 15));
5699 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5700 build_int_cst (TREE_TYPE (off), -16));
5701 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5702 }
5703 else
5704 roundup = NULL;
5705
5706 /* Update ap.__[g|v]r_offs */
5707 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5708 build_int_cst (TREE_TYPE (off), rsize));
5709 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5710
5711 /* String up. */
5712 if (roundup)
5713 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5714
5715 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5716 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5717 build_int_cst (TREE_TYPE (f_off), 0));
5718 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5719
5720 /* String up: make sure the assignment happens before the use. */
5721 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5722 COND_EXPR_ELSE (cond1) = t;
5723
5724 /* Prepare the trees handling the argument that is passed on the stack;
5725 the top level node will store in ON_STACK. */
5726 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5727 if (align > 8)
5728 {
5729 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5730 t = fold_convert (intDI_type_node, arg);
5731 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5732 build_int_cst (TREE_TYPE (t), 15));
5733 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5734 build_int_cst (TREE_TYPE (t), -16));
5735 t = fold_convert (TREE_TYPE (arg), t);
5736 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5737 }
5738 else
5739 roundup = NULL;
5740 /* Advance ap.__stack */
5741 t = fold_convert (intDI_type_node, arg);
5742 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5743 build_int_cst (TREE_TYPE (t), size + 7));
5744 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5745 build_int_cst (TREE_TYPE (t), -8));
5746 t = fold_convert (TREE_TYPE (arg), t);
5747 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5748 /* String up roundup and advance. */
5749 if (roundup)
5750 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5751 /* String up with arg */
5752 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5753 /* Big-endianness related address adjustment. */
5754 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5755 && size < UNITS_PER_WORD)
5756 {
5757 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5758 size_int (UNITS_PER_WORD - size));
5759 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5760 }
5761
5762 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5763 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5764
5765 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5766 t = off;
5767 if (adjust)
5768 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5769 build_int_cst (TREE_TYPE (off), adjust));
5770
5771 t = fold_convert (sizetype, t);
5772 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5773
5774 if (is_ha)
5775 {
5776 /* type ha; // treat as "struct {ftype field[n];}"
5777 ... [computing offs]
5778 for (i = 0; i <nregs; ++i, offs += 16)
5779 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5780 return ha; */
5781 int i;
5782 tree tmp_ha, field_t, field_ptr_t;
5783
5784 /* Declare a local variable. */
5785 tmp_ha = create_tmp_var_raw (type, "ha");
5786 gimple_add_tmp_var (tmp_ha);
5787
5788 /* Establish the base type. */
5789 switch (ag_mode)
5790 {
5791 case SFmode:
5792 field_t = float_type_node;
5793 field_ptr_t = float_ptr_type_node;
5794 break;
5795 case DFmode:
5796 field_t = double_type_node;
5797 field_ptr_t = double_ptr_type_node;
5798 break;
5799 case TFmode:
5800 field_t = long_double_type_node;
5801 field_ptr_t = long_double_ptr_type_node;
5802 break;
5803/* The half precision and quad precision are not fully supported yet. Enable
5804 the following code after the support is complete. Need to find the correct
5805 type node for __fp16 *. */
5806#if 0
5807 case HFmode:
5808 field_t = float_type_node;
5809 field_ptr_t = float_ptr_type_node;
5810 break;
5811#endif
5812 case V2SImode:
5813 case V4SImode:
5814 {
5815 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5816 field_t = build_vector_type_for_mode (innertype, ag_mode);
5817 field_ptr_t = build_pointer_type (field_t);
5818 }
5819 break;
5820 default:
5821 gcc_assert (0);
5822 }
5823
5824 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5825 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5826 addr = t;
5827 t = fold_convert (field_ptr_t, addr);
5828 t = build2 (MODIFY_EXPR, field_t,
5829 build1 (INDIRECT_REF, field_t, tmp_ha),
5830 build1 (INDIRECT_REF, field_t, t));
5831
5832 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5833 for (i = 1; i < nregs; ++i)
5834 {
5835 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5836 u = fold_convert (field_ptr_t, addr);
5837 u = build2 (MODIFY_EXPR, field_t,
5838 build2 (MEM_REF, field_t, tmp_ha,
5839 build_int_cst (field_ptr_t,
5840 (i *
5841 int_size_in_bytes (field_t)))),
5842 build1 (INDIRECT_REF, field_t, u));
5843 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5844 }
5845
5846 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5847 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5848 }
5849
5850 COND_EXPR_ELSE (cond2) = t;
5851 addr = fold_convert (build_pointer_type (type), cond1);
5852 addr = build_va_arg_indirect_ref (addr);
5853
5854 if (indirect_p)
5855 addr = build_va_arg_indirect_ref (addr);
5856
5857 return addr;
5858}
5859
5860/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5861
5862static void
5863aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5864 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5865 int no_rtl)
5866{
5867 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5868 CUMULATIVE_ARGS local_cum;
5869 int gr_saved, vr_saved;
5870
5871 /* The caller has advanced CUM up to, but not beyond, the last named
5872 argument. Advance a local copy of CUM past the last "real" named
5873 argument, to find out how many registers are left over. */
5874 local_cum = *cum;
5875 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5876
5877 /* Found out how many registers we need to save. */
5878 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5879 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5880
5881 if (TARGET_GENERAL_REGS_ONLY)
5882 {
5883 if (local_cum.aapcs_nvrn > 0)
5884 sorry ("%qs and floating point or vector arguments",
5885 "-mgeneral-regs-only");
5886 vr_saved = 0;
5887 }
5888
5889 if (!no_rtl)
5890 {
5891 if (gr_saved > 0)
5892 {
5893 rtx ptr, mem;
5894
5895 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5896 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5897 - gr_saved * UNITS_PER_WORD);
5898 mem = gen_frame_mem (BLKmode, ptr);
5899 set_mem_alias_set (mem, get_varargs_alias_set ());
5900
5901 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5902 mem, gr_saved);
5903 }
5904 if (vr_saved > 0)
5905 {
5906 /* We can't use move_block_from_reg, because it will use
5907 the wrong mode, storing D regs only. */
5908 enum machine_mode mode = TImode;
5909 int off, i;
5910
5911 /* Set OFF to the offset from virtual_incoming_args_rtx of
5912 the first vector register. The VR save area lies below
5913 the GR one, and is aligned to 16 bytes. */
5914 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5915 STACK_BOUNDARY / BITS_PER_UNIT);
5916 off -= vr_saved * UNITS_PER_VREG;
5917
5918 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5919 {
5920 rtx ptr, mem;
5921
5922 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5923 mem = gen_frame_mem (mode, ptr);
5924 set_mem_alias_set (mem, get_varargs_alias_set ());
5925 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5926 off += UNITS_PER_VREG;
5927 }
5928 }
5929 }
5930
5931 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5932 any complication of having crtl->args.pretend_args_size changed. */
5933 cfun->machine->saved_varargs_size
5934 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5935 STACK_BOUNDARY / BITS_PER_UNIT)
5936 + vr_saved * UNITS_PER_VREG);
5937}
5938
5939static void
5940aarch64_conditional_register_usage (void)
5941{
5942 int i;
5943 if (!TARGET_FLOAT)
5944 {
5945 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5946 {
5947 fixed_regs[i] = 1;
5948 call_used_regs[i] = 1;
5949 }
5950 }
5951}
5952
5953/* Walk down the type tree of TYPE counting consecutive base elements.
5954 If *MODEP is VOIDmode, then set it to the first valid floating point
5955 type. If a non-floating point type is found, or if a floating point
5956 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5957 otherwise return the count in the sub-tree. */
5958static int
5959aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5960{
5961 enum machine_mode mode;
5962 HOST_WIDE_INT size;
5963
5964 switch (TREE_CODE (type))
5965 {
5966 case REAL_TYPE:
5967 mode = TYPE_MODE (type);
5968 if (mode != DFmode && mode != SFmode && mode != TFmode)
5969 return -1;
5970
5971 if (*modep == VOIDmode)
5972 *modep = mode;
5973
5974 if (*modep == mode)
5975 return 1;
5976
5977 break;
5978
5979 case COMPLEX_TYPE:
5980 mode = TYPE_MODE (TREE_TYPE (type));
5981 if (mode != DFmode && mode != SFmode && mode != TFmode)
5982 return -1;
5983
5984 if (*modep == VOIDmode)
5985 *modep = mode;
5986
5987 if (*modep == mode)
5988 return 2;
5989
5990 break;
5991
5992 case VECTOR_TYPE:
5993 /* Use V2SImode and V4SImode as representatives of all 64-bit
5994 and 128-bit vector types. */
5995 size = int_size_in_bytes (type);
5996 switch (size)
5997 {
5998 case 8:
5999 mode = V2SImode;
6000 break;
6001 case 16:
6002 mode = V4SImode;
6003 break;
6004 default:
6005 return -1;
6006 }
6007
6008 if (*modep == VOIDmode)
6009 *modep = mode;
6010
6011 /* Vector modes are considered to be opaque: two vectors are
6012 equivalent for the purposes of being homogeneous aggregates
6013 if they are the same size. */
6014 if (*modep == mode)
6015 return 1;
6016
6017 break;
6018
6019 case ARRAY_TYPE:
6020 {
6021 int count;
6022 tree index = TYPE_DOMAIN (type);
6023
6024 /* Can't handle incomplete types. */
6025 if (!COMPLETE_TYPE_P (type))
6026 return -1;
6027
6028 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6029 if (count == -1
6030 || !index
6031 || !TYPE_MAX_VALUE (index)
6032 || !host_integerp (TYPE_MAX_VALUE (index), 1)
6033 || !TYPE_MIN_VALUE (index)
6034 || !host_integerp (TYPE_MIN_VALUE (index), 1)
6035 || count < 0)
6036 return -1;
6037
6038 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
6039 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
6040
6041 /* There must be no padding. */
6042 if (!host_integerp (TYPE_SIZE (type), 1)
6043 || (tree_low_cst (TYPE_SIZE (type), 1)
6044 != count * GET_MODE_BITSIZE (*modep)))
6045 return -1;
6046
6047 return count;
6048 }
6049
6050 case RECORD_TYPE:
6051 {
6052 int count = 0;
6053 int sub_count;
6054 tree field;
6055
6056 /* Can't handle incomplete types. */
6057 if (!COMPLETE_TYPE_P (type))
6058 return -1;
6059
6060 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6061 {
6062 if (TREE_CODE (field) != FIELD_DECL)
6063 continue;
6064
6065 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6066 if (sub_count < 0)
6067 return -1;
6068 count += sub_count;
6069 }
6070
6071 /* There must be no padding. */
6072 if (!host_integerp (TYPE_SIZE (type), 1)
6073 || (tree_low_cst (TYPE_SIZE (type), 1)
6074 != count * GET_MODE_BITSIZE (*modep)))
6075 return -1;
6076
6077 return count;
6078 }
6079
6080 case UNION_TYPE:
6081 case QUAL_UNION_TYPE:
6082 {
6083 /* These aren't very interesting except in a degenerate case. */
6084 int count = 0;
6085 int sub_count;
6086 tree field;
6087
6088 /* Can't handle incomplete types. */
6089 if (!COMPLETE_TYPE_P (type))
6090 return -1;
6091
6092 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6093 {
6094 if (TREE_CODE (field) != FIELD_DECL)
6095 continue;
6096
6097 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6098 if (sub_count < 0)
6099 return -1;
6100 count = count > sub_count ? count : sub_count;
6101 }
6102
6103 /* There must be no padding. */
6104 if (!host_integerp (TYPE_SIZE (type), 1)
6105 || (tree_low_cst (TYPE_SIZE (type), 1)
6106 != count * GET_MODE_BITSIZE (*modep)))
6107 return -1;
6108
6109 return count;
6110 }
6111
6112 default:
6113 break;
6114 }
6115
6116 return -1;
6117}
6118
38e8f663
YR
6119/* Return true if we use LRA instead of reload pass. */
6120static bool
6121aarch64_lra_p (void)
6122{
6123 return aarch64_lra_flag;
6124}
6125
43e9d192
IB
6126/* Return TRUE if the type, as described by TYPE and MODE, is a composite
6127 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6128 array types. The C99 floating-point complex types are also considered
6129 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6130 types, which are GCC extensions and out of the scope of AAPCS64, are
6131 treated as composite types here as well.
6132
6133 Note that MODE itself is not sufficient in determining whether a type
6134 is such a composite type or not. This is because
6135 stor-layout.c:compute_record_mode may have already changed the MODE
6136 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6137 structure with only one field may have its MODE set to the mode of the
6138 field. Also an integer mode whose size matches the size of the
6139 RECORD_TYPE type may be used to substitute the original mode
6140 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6141 solely relied on. */
6142
6143static bool
6144aarch64_composite_type_p (const_tree type,
6145 enum machine_mode mode)
6146{
6147 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6148 return true;
6149
6150 if (mode == BLKmode
6151 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6152 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6153 return true;
6154
6155 return false;
6156}
6157
6158/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6159 type as described in AAPCS64 \S 4.1.2.
6160
6161 See the comment above aarch64_composite_type_p for the notes on MODE. */
6162
6163static bool
6164aarch64_short_vector_p (const_tree type,
6165 enum machine_mode mode)
6166{
6167 HOST_WIDE_INT size = -1;
6168
6169 if (type && TREE_CODE (type) == VECTOR_TYPE)
6170 size = int_size_in_bytes (type);
6171 else if (!aarch64_composite_type_p (type, mode)
6172 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6173 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6174 size = GET_MODE_SIZE (mode);
6175
6176 return (size == 8 || size == 16) ? true : false;
6177}
6178
6179/* Return TRUE if an argument, whose type is described by TYPE and MODE,
6180 shall be passed or returned in simd/fp register(s) (providing these
6181 parameter passing registers are available).
6182
6183 Upon successful return, *COUNT returns the number of needed registers,
6184 *BASE_MODE returns the mode of the individual register and when IS_HAF
6185 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6186 floating-point aggregate or a homogeneous short-vector aggregate. */
6187
6188static bool
6189aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6190 const_tree type,
6191 enum machine_mode *base_mode,
6192 int *count,
6193 bool *is_ha)
6194{
6195 enum machine_mode new_mode = VOIDmode;
6196 bool composite_p = aarch64_composite_type_p (type, mode);
6197
6198 if (is_ha != NULL) *is_ha = false;
6199
6200 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6201 || aarch64_short_vector_p (type, mode))
6202 {
6203 *count = 1;
6204 new_mode = mode;
6205 }
6206 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6207 {
6208 if (is_ha != NULL) *is_ha = true;
6209 *count = 2;
6210 new_mode = GET_MODE_INNER (mode);
6211 }
6212 else if (type && composite_p)
6213 {
6214 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6215
6216 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6217 {
6218 if (is_ha != NULL) *is_ha = true;
6219 *count = ag_count;
6220 }
6221 else
6222 return false;
6223 }
6224 else
6225 return false;
6226
6227 *base_mode = new_mode;
6228 return true;
6229}
6230
6231/* Implement TARGET_STRUCT_VALUE_RTX. */
6232
6233static rtx
6234aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6235 int incoming ATTRIBUTE_UNUSED)
6236{
6237 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6238}
6239
6240/* Implements target hook vector_mode_supported_p. */
6241static bool
6242aarch64_vector_mode_supported_p (enum machine_mode mode)
6243{
6244 if (TARGET_SIMD
6245 && (mode == V4SImode || mode == V8HImode
6246 || mode == V16QImode || mode == V2DImode
6247 || mode == V2SImode || mode == V4HImode
6248 || mode == V8QImode || mode == V2SFmode
6249 || mode == V4SFmode || mode == V2DFmode))
6250 return true;
6251
6252 return false;
6253}
6254
b7342d25
IB
6255/* Return appropriate SIMD container
6256 for MODE within a vector of WIDTH bits. */
43e9d192 6257static enum machine_mode
b7342d25 6258aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6259{
b7342d25 6260 gcc_assert (width == 64 || width == 128);
43e9d192 6261 if (TARGET_SIMD)
b7342d25
IB
6262 {
6263 if (width == 128)
6264 switch (mode)
6265 {
6266 case DFmode:
6267 return V2DFmode;
6268 case SFmode:
6269 return V4SFmode;
6270 case SImode:
6271 return V4SImode;
6272 case HImode:
6273 return V8HImode;
6274 case QImode:
6275 return V16QImode;
6276 case DImode:
6277 return V2DImode;
6278 default:
6279 break;
6280 }
6281 else
6282 switch (mode)
6283 {
6284 case SFmode:
6285 return V2SFmode;
6286 case SImode:
6287 return V2SImode;
6288 case HImode:
6289 return V4HImode;
6290 case QImode:
6291 return V8QImode;
6292 default:
6293 break;
6294 }
6295 }
43e9d192
IB
6296 return word_mode;
6297}
6298
b7342d25
IB
6299/* Return 128-bit container as the preferred SIMD mode for MODE. */
6300static enum machine_mode
6301aarch64_preferred_simd_mode (enum machine_mode mode)
6302{
6303 return aarch64_simd_container_mode (mode, 128);
6304}
6305
3b357264
JG
6306/* Return the bitmask of possible vector sizes for the vectorizer
6307 to iterate over. */
6308static unsigned int
6309aarch64_autovectorize_vector_sizes (void)
6310{
6311 return (16 | 8);
6312}
6313
c6fc9e43
YZ
6314/* A table to help perform AArch64-specific name mangling for AdvSIMD
6315 vector types in order to conform to the AAPCS64 (see "Procedure
6316 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6317 qualify for emission with the mangled names defined in that document,
6318 a vector type must not only be of the correct mode but also be
6319 composed of AdvSIMD vector element types (e.g.
6320 _builtin_aarch64_simd_qi); these types are registered by
6321 aarch64_init_simd_builtins (). In other words, vector types defined
6322 in other ways e.g. via vector_size attribute will get default
6323 mangled names. */
6324typedef struct
6325{
6326 enum machine_mode mode;
6327 const char *element_type_name;
6328 const char *mangled_name;
6329} aarch64_simd_mangle_map_entry;
6330
6331static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6332 /* 64-bit containerized types. */
6333 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6334 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6335 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6336 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6337 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6338 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6339 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6340 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6341 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6342 /* 128-bit containerized types. */
6343 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6344 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6345 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6346 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6347 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6348 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6349 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6350 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6351 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6352 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6353 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6354 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6355 { VOIDmode, NULL, NULL }
6356};
6357
ac2b960f
YZ
6358/* Implement TARGET_MANGLE_TYPE. */
6359
6f549691 6360static const char *
ac2b960f
YZ
6361aarch64_mangle_type (const_tree type)
6362{
6363 /* The AArch64 ABI documents say that "__va_list" has to be
6364 managled as if it is in the "std" namespace. */
6365 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6366 return "St9__va_list";
6367
c6fc9e43
YZ
6368 /* Check the mode of the vector type, and the name of the vector
6369 element type, against the table. */
6370 if (TREE_CODE (type) == VECTOR_TYPE)
6371 {
6372 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6373
6374 while (pos->mode != VOIDmode)
6375 {
6376 tree elt_type = TREE_TYPE (type);
6377
6378 if (pos->mode == TYPE_MODE (type)
6379 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6380 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6381 pos->element_type_name))
6382 return pos->mangled_name;
6383
6384 pos++;
6385 }
6386 }
6387
ac2b960f
YZ
6388 /* Use the default mangling. */
6389 return NULL;
6390}
6391
43e9d192 6392/* Return the equivalent letter for size. */
81c2dfb9 6393static char
43e9d192
IB
6394sizetochar (int size)
6395{
6396 switch (size)
6397 {
6398 case 64: return 'd';
6399 case 32: return 's';
6400 case 16: return 'h';
6401 case 8 : return 'b';
6402 default: gcc_unreachable ();
6403 }
6404}
6405
3520f7cc
JG
6406/* Return true iff x is a uniform vector of floating-point
6407 constants, and the constant can be represented in
6408 quarter-precision form. Note, as aarch64_float_const_representable
6409 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6410static bool
6411aarch64_vect_float_const_representable_p (rtx x)
6412{
6413 int i = 0;
6414 REAL_VALUE_TYPE r0, ri;
6415 rtx x0, xi;
6416
6417 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6418 return false;
6419
6420 x0 = CONST_VECTOR_ELT (x, 0);
6421 if (!CONST_DOUBLE_P (x0))
6422 return false;
6423
6424 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6425
6426 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6427 {
6428 xi = CONST_VECTOR_ELT (x, i);
6429 if (!CONST_DOUBLE_P (xi))
6430 return false;
6431
6432 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6433 if (!REAL_VALUES_EQUAL (r0, ri))
6434 return false;
6435 }
6436
6437 return aarch64_float_const_representable_p (x0);
6438}
6439
d8edd899 6440/* Return true for valid and false for invalid. */
3ea63f60 6441bool
48063b9d
IB
6442aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6443 struct simd_immediate_info *info)
43e9d192
IB
6444{
6445#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6446 matches = 1; \
6447 for (i = 0; i < idx; i += (STRIDE)) \
6448 if (!(TEST)) \
6449 matches = 0; \
6450 if (matches) \
6451 { \
6452 immtype = (CLASS); \
6453 elsize = (ELSIZE); \
43e9d192
IB
6454 eshift = (SHIFT); \
6455 emvn = (NEG); \
6456 break; \
6457 }
6458
6459 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6460 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6461 unsigned char bytes[16];
43e9d192
IB
6462 int immtype = -1, matches;
6463 unsigned int invmask = inverse ? 0xff : 0;
6464 int eshift, emvn;
6465
43e9d192 6466 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6467 {
81c2dfb9
IB
6468 if (! (aarch64_simd_imm_zero_p (op, mode)
6469 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6470 return false;
3520f7cc 6471
48063b9d
IB
6472 if (info)
6473 {
6474 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6475 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6476 info->mvn = false;
6477 info->shift = 0;
6478 }
3520f7cc 6479
d8edd899 6480 return true;
3520f7cc 6481 }
43e9d192
IB
6482
6483 /* Splat vector constant out into a byte vector. */
6484 for (i = 0; i < n_elts; i++)
6485 {
6486 rtx el = CONST_VECTOR_ELT (op, i);
6487 unsigned HOST_WIDE_INT elpart;
6488 unsigned int part, parts;
6489
6490 if (GET_CODE (el) == CONST_INT)
6491 {
6492 elpart = INTVAL (el);
6493 parts = 1;
6494 }
6495 else if (GET_CODE (el) == CONST_DOUBLE)
6496 {
6497 elpart = CONST_DOUBLE_LOW (el);
6498 parts = 2;
6499 }
6500 else
6501 gcc_unreachable ();
6502
6503 for (part = 0; part < parts; part++)
6504 {
6505 unsigned int byte;
6506 for (byte = 0; byte < innersize; byte++)
6507 {
6508 bytes[idx++] = (elpart & 0xff) ^ invmask;
6509 elpart >>= BITS_PER_UNIT;
6510 }
6511 if (GET_CODE (el) == CONST_DOUBLE)
6512 elpart = CONST_DOUBLE_HIGH (el);
6513 }
6514 }
6515
6516 /* Sanity check. */
6517 gcc_assert (idx == GET_MODE_SIZE (mode));
6518
6519 do
6520 {
6521 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6522 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6523
6524 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6525 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6526
6527 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6528 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6529
6530 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6531 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6532
6533 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6534
6535 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6536
6537 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6538 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6539
6540 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6541 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6542
6543 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6544 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6545
6546 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6547 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6548
6549 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6550
6551 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6552
6553 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 6554 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
6555
6556 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 6557 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
6558
6559 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 6560 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
6561
6562 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 6563 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
6564
6565 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6566
6567 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6568 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6569 }
6570 while (0);
6571
e4f0f84d 6572 if (immtype == -1)
d8edd899 6573 return false;
43e9d192 6574
48063b9d 6575 if (info)
43e9d192 6576 {
48063b9d 6577 info->element_width = elsize;
48063b9d
IB
6578 info->mvn = emvn != 0;
6579 info->shift = eshift;
6580
43e9d192
IB
6581 unsigned HOST_WIDE_INT imm = 0;
6582
e4f0f84d
TB
6583 if (immtype >= 12 && immtype <= 15)
6584 info->msl = true;
6585
43e9d192
IB
6586 /* Un-invert bytes of recognized vector, if necessary. */
6587 if (invmask != 0)
6588 for (i = 0; i < idx; i++)
6589 bytes[i] ^= invmask;
6590
6591 if (immtype == 17)
6592 {
6593 /* FIXME: Broken on 32-bit H_W_I hosts. */
6594 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6595
6596 for (i = 0; i < 8; i++)
6597 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6598 << (i * BITS_PER_UNIT);
6599
43e9d192 6600
48063b9d
IB
6601 info->value = GEN_INT (imm);
6602 }
6603 else
6604 {
6605 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6606 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6607
6608 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6609 generic constants. */
6610 if (info->mvn)
43e9d192 6611 imm = ~imm;
48063b9d
IB
6612 imm = (imm >> info->shift) & 0xff;
6613 info->value = GEN_INT (imm);
6614 }
43e9d192
IB
6615 }
6616
48063b9d 6617 return true;
43e9d192
IB
6618#undef CHECK
6619}
6620
43e9d192
IB
6621static bool
6622aarch64_const_vec_all_same_int_p (rtx x,
6623 HOST_WIDE_INT minval,
6624 HOST_WIDE_INT maxval)
6625{
6626 HOST_WIDE_INT firstval;
6627 int count, i;
6628
6629 if (GET_CODE (x) != CONST_VECTOR
6630 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6631 return false;
6632
6633 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6634 if (firstval < minval || firstval > maxval)
6635 return false;
6636
6637 count = CONST_VECTOR_NUNITS (x);
6638 for (i = 1; i < count; i++)
6639 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6640 return false;
6641
6642 return true;
6643}
6644
6645/* Check of immediate shift constants are within range. */
6646bool
6647aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6648{
6649 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6650 if (left)
6651 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6652 else
6653 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6654}
6655
3520f7cc
JG
6656/* Return true if X is a uniform vector where all elements
6657 are either the floating-point constant 0.0 or the
6658 integer constant 0. */
43e9d192
IB
6659bool
6660aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6661{
3520f7cc 6662 return x == CONST0_RTX (mode);
43e9d192
IB
6663}
6664
6665bool
6666aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6667{
6668 HOST_WIDE_INT imm = INTVAL (x);
6669 int i;
6670
6671 for (i = 0; i < 8; i++)
6672 {
6673 unsigned int byte = imm & 0xff;
6674 if (byte != 0xff && byte != 0)
6675 return false;
6676 imm >>= 8;
6677 }
6678
6679 return true;
6680}
6681
83f8c414
CSS
6682bool
6683aarch64_mov_operand_p (rtx x,
a5350ddc 6684 enum aarch64_symbol_context context,
83f8c414
CSS
6685 enum machine_mode mode)
6686{
83f8c414
CSS
6687 if (GET_CODE (x) == HIGH
6688 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6689 return true;
6690
6691 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6692 return true;
6693
6694 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6695 return true;
6696
a5350ddc
CSS
6697 return aarch64_classify_symbolic_expression (x, context)
6698 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6699}
6700
43e9d192
IB
6701/* Return a const_int vector of VAL. */
6702rtx
6703aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6704{
6705 int nunits = GET_MODE_NUNITS (mode);
6706 rtvec v = rtvec_alloc (nunits);
6707 int i;
6708
6709 for (i=0; i < nunits; i++)
6710 RTVEC_ELT (v, i) = GEN_INT (val);
6711
6712 return gen_rtx_CONST_VECTOR (mode, v);
6713}
6714
051d0e2f
SN
6715/* Check OP is a legal scalar immediate for the MOVI instruction. */
6716
6717bool
6718aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6719{
6720 enum machine_mode vmode;
6721
6722 gcc_assert (!VECTOR_MODE_P (mode));
6723 vmode = aarch64_preferred_simd_mode (mode);
6724 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6725 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6726}
6727
43e9d192
IB
6728/* Construct and return a PARALLEL RTX vector. */
6729rtx
6730aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6731{
6732 int nunits = GET_MODE_NUNITS (mode);
6733 rtvec v = rtvec_alloc (nunits / 2);
6734 int base = high ? nunits / 2 : 0;
6735 rtx t1;
6736 int i;
6737
6738 for (i=0; i < nunits / 2; i++)
6739 RTVEC_ELT (v, i) = GEN_INT (base + i);
6740
6741 t1 = gen_rtx_PARALLEL (mode, v);
6742 return t1;
6743}
6744
6745/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6746 HIGH (exclusive). */
6747void
6748aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6749{
6750 HOST_WIDE_INT lane;
6751 gcc_assert (GET_CODE (operand) == CONST_INT);
6752 lane = INTVAL (operand);
6753
6754 if (lane < low || lane >= high)
6755 error ("lane out of range");
6756}
6757
6758void
6759aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6760{
6761 gcc_assert (GET_CODE (operand) == CONST_INT);
6762 HOST_WIDE_INT lane = INTVAL (operand);
6763
6764 if (lane < low || lane >= high)
6765 error ("constant out of range");
6766}
6767
6768/* Emit code to reinterpret one AdvSIMD type as another,
6769 without altering bits. */
6770void
6771aarch64_simd_reinterpret (rtx dest, rtx src)
6772{
6773 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6774}
6775
6776/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6777 registers). */
6778void
6779aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6780 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6781 rtx op1)
6782{
6783 rtx mem = gen_rtx_MEM (mode, destaddr);
6784 rtx tmp1 = gen_reg_rtx (mode);
6785 rtx tmp2 = gen_reg_rtx (mode);
6786
6787 emit_insn (intfn (tmp1, op1, tmp2));
6788
6789 emit_move_insn (mem, tmp1);
6790 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6791 emit_move_insn (mem, tmp2);
6792}
6793
6794/* Return TRUE if OP is a valid vector addressing mode. */
6795bool
6796aarch64_simd_mem_operand_p (rtx op)
6797{
6798 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6799 || GET_CODE (XEXP (op, 0)) == REG);
6800}
6801
6802/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6803 not to early-clobber SRC registers in the process.
6804
6805 We assume that the operands described by SRC and DEST represent a
6806 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6807 number of components into which the copy has been decomposed. */
6808void
6809aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6810 rtx *src, unsigned int count)
6811{
6812 unsigned int i;
6813
6814 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6815 || REGNO (operands[0]) < REGNO (operands[1]))
6816 {
6817 for (i = 0; i < count; i++)
6818 {
6819 operands[2 * i] = dest[i];
6820 operands[2 * i + 1] = src[i];
6821 }
6822 }
6823 else
6824 {
6825 for (i = 0; i < count; i++)
6826 {
6827 operands[2 * i] = dest[count - i - 1];
6828 operands[2 * i + 1] = src[count - i - 1];
6829 }
6830 }
6831}
6832
6833/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6834 one of VSTRUCT modes: OI, CI or XI. */
6835int
6836aarch64_simd_attr_length_move (rtx insn)
6837{
43e9d192
IB
6838 enum machine_mode mode;
6839
6840 extract_insn_cached (insn);
6841
6842 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6843 {
6844 mode = GET_MODE (recog_data.operand[0]);
6845 switch (mode)
6846 {
6847 case OImode:
6848 return 8;
6849 case CImode:
6850 return 12;
6851 case XImode:
6852 return 16;
6853 default:
6854 gcc_unreachable ();
6855 }
6856 }
6857 return 4;
6858}
6859
db0253a4
TB
6860/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6861 alignment of a vector to 128 bits. */
6862static HOST_WIDE_INT
6863aarch64_simd_vector_alignment (const_tree type)
6864{
6865 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6866 return MIN (align, 128);
6867}
6868
6869/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6870static bool
6871aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6872{
6873 if (is_packed)
6874 return false;
6875
6876 /* We guarantee alignment for vectors up to 128-bits. */
6877 if (tree_int_cst_compare (TYPE_SIZE (type),
6878 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6879 return false;
6880
6881 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6882 return true;
6883}
6884
4369c11e
TB
6885/* If VALS is a vector constant that can be loaded into a register
6886 using DUP, generate instructions to do so and return an RTX to
6887 assign to the register. Otherwise return NULL_RTX. */
6888static rtx
6889aarch64_simd_dup_constant (rtx vals)
6890{
6891 enum machine_mode mode = GET_MODE (vals);
6892 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6893 int n_elts = GET_MODE_NUNITS (mode);
6894 bool all_same = true;
6895 rtx x;
6896 int i;
6897
6898 if (GET_CODE (vals) != CONST_VECTOR)
6899 return NULL_RTX;
6900
6901 for (i = 1; i < n_elts; ++i)
6902 {
6903 x = CONST_VECTOR_ELT (vals, i);
6904 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6905 all_same = false;
6906 }
6907
6908 if (!all_same)
6909 return NULL_RTX;
6910
6911 /* We can load this constant by using DUP and a constant in a
6912 single ARM register. This will be cheaper than a vector
6913 load. */
6914 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6915 return gen_rtx_VEC_DUPLICATE (mode, x);
6916}
6917
6918
6919/* Generate code to load VALS, which is a PARALLEL containing only
6920 constants (for vec_init) or CONST_VECTOR, efficiently into a
6921 register. Returns an RTX to copy into the register, or NULL_RTX
6922 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6923static rtx
4369c11e
TB
6924aarch64_simd_make_constant (rtx vals)
6925{
6926 enum machine_mode mode = GET_MODE (vals);
6927 rtx const_dup;
6928 rtx const_vec = NULL_RTX;
6929 int n_elts = GET_MODE_NUNITS (mode);
6930 int n_const = 0;
6931 int i;
6932
6933 if (GET_CODE (vals) == CONST_VECTOR)
6934 const_vec = vals;
6935 else if (GET_CODE (vals) == PARALLEL)
6936 {
6937 /* A CONST_VECTOR must contain only CONST_INTs and
6938 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6939 Only store valid constants in a CONST_VECTOR. */
6940 for (i = 0; i < n_elts; ++i)
6941 {
6942 rtx x = XVECEXP (vals, 0, i);
6943 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6944 n_const++;
6945 }
6946 if (n_const == n_elts)
6947 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6948 }
6949 else
6950 gcc_unreachable ();
6951
6952 if (const_vec != NULL_RTX
48063b9d 6953 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
6954 /* Load using MOVI/MVNI. */
6955 return const_vec;
6956 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6957 /* Loaded using DUP. */
6958 return const_dup;
6959 else if (const_vec != NULL_RTX)
6960 /* Load from constant pool. We can not take advantage of single-cycle
6961 LD1 because we need a PC-relative addressing mode. */
6962 return const_vec;
6963 else
6964 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6965 We can not construct an initializer. */
6966 return NULL_RTX;
6967}
6968
6969void
6970aarch64_expand_vector_init (rtx target, rtx vals)
6971{
6972 enum machine_mode mode = GET_MODE (target);
6973 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6974 int n_elts = GET_MODE_NUNITS (mode);
6975 int n_var = 0, one_var = -1;
6976 bool all_same = true;
6977 rtx x, mem;
6978 int i;
6979
6980 x = XVECEXP (vals, 0, 0);
6981 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6982 n_var = 1, one_var = 0;
6983
6984 for (i = 1; i < n_elts; ++i)
6985 {
6986 x = XVECEXP (vals, 0, i);
6987 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6988 ++n_var, one_var = i;
6989
6990 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6991 all_same = false;
6992 }
6993
6994 if (n_var == 0)
6995 {
6996 rtx constant = aarch64_simd_make_constant (vals);
6997 if (constant != NULL_RTX)
6998 {
6999 emit_move_insn (target, constant);
7000 return;
7001 }
7002 }
7003
7004 /* Splat a single non-constant element if we can. */
7005 if (all_same)
7006 {
7007 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7008 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7009 return;
7010 }
7011
7012 /* One field is non-constant. Load constant then overwrite varying
7013 field. This is more efficient than using the stack. */
7014 if (n_var == 1)
7015 {
7016 rtx copy = copy_rtx (vals);
7017 rtx index = GEN_INT (one_var);
7018 enum insn_code icode;
7019
7020 /* Load constant part of vector, substitute neighboring value for
7021 varying element. */
7022 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7023 aarch64_expand_vector_init (target, copy);
7024
7025 /* Insert variable. */
7026 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7027 icode = optab_handler (vec_set_optab, mode);
7028 gcc_assert (icode != CODE_FOR_nothing);
7029 emit_insn (GEN_FCN (icode) (target, x, index));
7030 return;
7031 }
7032
7033 /* Construct the vector in memory one field at a time
7034 and load the whole vector. */
7035 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7036 for (i = 0; i < n_elts; i++)
7037 emit_move_insn (adjust_address_nv (mem, inner_mode,
7038 i * GET_MODE_SIZE (inner_mode)),
7039 XVECEXP (vals, 0, i));
7040 emit_move_insn (target, mem);
7041
7042}
7043
43e9d192
IB
7044static unsigned HOST_WIDE_INT
7045aarch64_shift_truncation_mask (enum machine_mode mode)
7046{
7047 return
7048 (aarch64_vector_mode_supported_p (mode)
7049 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7050}
7051
7052#ifndef TLS_SECTION_ASM_FLAG
7053#define TLS_SECTION_ASM_FLAG 'T'
7054#endif
7055
7056void
7057aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7058 tree decl ATTRIBUTE_UNUSED)
7059{
7060 char flagchars[10], *f = flagchars;
7061
7062 /* If we have already declared this section, we can use an
7063 abbreviated form to switch back to it -- unless this section is
7064 part of a COMDAT groups, in which case GAS requires the full
7065 declaration every time. */
7066 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7067 && (flags & SECTION_DECLARED))
7068 {
7069 fprintf (asm_out_file, "\t.section\t%s\n", name);
7070 return;
7071 }
7072
7073 if (!(flags & SECTION_DEBUG))
7074 *f++ = 'a';
7075 if (flags & SECTION_WRITE)
7076 *f++ = 'w';
7077 if (flags & SECTION_CODE)
7078 *f++ = 'x';
7079 if (flags & SECTION_SMALL)
7080 *f++ = 's';
7081 if (flags & SECTION_MERGE)
7082 *f++ = 'M';
7083 if (flags & SECTION_STRINGS)
7084 *f++ = 'S';
7085 if (flags & SECTION_TLS)
7086 *f++ = TLS_SECTION_ASM_FLAG;
7087 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7088 *f++ = 'G';
7089 *f = '\0';
7090
7091 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7092
7093 if (!(flags & SECTION_NOTYPE))
7094 {
7095 const char *type;
7096 const char *format;
7097
7098 if (flags & SECTION_BSS)
7099 type = "nobits";
7100 else
7101 type = "progbits";
7102
7103#ifdef TYPE_OPERAND_FMT
7104 format = "," TYPE_OPERAND_FMT;
7105#else
7106 format = ",@%s";
7107#endif
7108
7109 fprintf (asm_out_file, format, type);
7110
7111 if (flags & SECTION_ENTSIZE)
7112 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7113 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7114 {
7115 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7116 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7117 else
7118 fprintf (asm_out_file, ",%s,comdat",
7119 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7120 }
7121 }
7122
7123 putc ('\n', asm_out_file);
7124}
7125
7126/* Select a format to encode pointers in exception handling data. */
7127int
7128aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7129{
7130 int type;
7131 switch (aarch64_cmodel)
7132 {
7133 case AARCH64_CMODEL_TINY:
7134 case AARCH64_CMODEL_TINY_PIC:
7135 case AARCH64_CMODEL_SMALL:
7136 case AARCH64_CMODEL_SMALL_PIC:
7137 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7138 for everything. */
7139 type = DW_EH_PE_sdata4;
7140 break;
7141 default:
7142 /* No assumptions here. 8-byte relocs required. */
7143 type = DW_EH_PE_sdata8;
7144 break;
7145 }
7146 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7147}
7148
0462169c
SN
7149/* Emit load exclusive. */
7150
7151static void
7152aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7153 rtx mem, rtx model_rtx)
7154{
7155 rtx (*gen) (rtx, rtx, rtx);
7156
7157 switch (mode)
7158 {
7159 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7160 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7161 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7162 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7163 default:
7164 gcc_unreachable ();
7165 }
7166
7167 emit_insn (gen (rval, mem, model_rtx));
7168}
7169
7170/* Emit store exclusive. */
7171
7172static void
7173aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7174 rtx rval, rtx mem, rtx model_rtx)
7175{
7176 rtx (*gen) (rtx, rtx, rtx, rtx);
7177
7178 switch (mode)
7179 {
7180 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7181 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7182 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7183 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7184 default:
7185 gcc_unreachable ();
7186 }
7187
7188 emit_insn (gen (bval, rval, mem, model_rtx));
7189}
7190
7191/* Mark the previous jump instruction as unlikely. */
7192
7193static void
7194aarch64_emit_unlikely_jump (rtx insn)
7195{
e5af9ddd 7196 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
7197
7198 insn = emit_jump_insn (insn);
e5af9ddd 7199 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
7200}
7201
7202/* Expand a compare and swap pattern. */
7203
7204void
7205aarch64_expand_compare_and_swap (rtx operands[])
7206{
7207 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7208 enum machine_mode mode, cmp_mode;
7209 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7210
7211 bval = operands[0];
7212 rval = operands[1];
7213 mem = operands[2];
7214 oldval = operands[3];
7215 newval = operands[4];
7216 is_weak = operands[5];
7217 mod_s = operands[6];
7218 mod_f = operands[7];
7219 mode = GET_MODE (mem);
7220 cmp_mode = mode;
7221
7222 /* Normally the succ memory model must be stronger than fail, but in the
7223 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7224 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7225
7226 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7227 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7228 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7229
7230 switch (mode)
7231 {
7232 case QImode:
7233 case HImode:
7234 /* For short modes, we're going to perform the comparison in SImode,
7235 so do the zero-extension now. */
7236 cmp_mode = SImode;
7237 rval = gen_reg_rtx (SImode);
7238 oldval = convert_modes (SImode, mode, oldval, true);
7239 /* Fall through. */
7240
7241 case SImode:
7242 case DImode:
7243 /* Force the value into a register if needed. */
7244 if (!aarch64_plus_operand (oldval, mode))
7245 oldval = force_reg (cmp_mode, oldval);
7246 break;
7247
7248 default:
7249 gcc_unreachable ();
7250 }
7251
7252 switch (mode)
7253 {
7254 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7255 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7256 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7257 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7258 default:
7259 gcc_unreachable ();
7260 }
7261
7262 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7263
7264 if (mode == QImode || mode == HImode)
7265 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7266
7267 x = gen_rtx_REG (CCmode, CC_REGNUM);
7268 x = gen_rtx_EQ (SImode, x, const0_rtx);
7269 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7270}
7271
7272/* Split a compare and swap pattern. */
7273
7274void
7275aarch64_split_compare_and_swap (rtx operands[])
7276{
7277 rtx rval, mem, oldval, newval, scratch;
7278 enum machine_mode mode;
0462169c
SN
7279 bool is_weak;
7280 rtx label1, label2, x, cond;
7281
7282 rval = operands[0];
7283 mem = operands[1];
7284 oldval = operands[2];
7285 newval = operands[3];
7286 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7287 scratch = operands[7];
7288 mode = GET_MODE (mem);
7289
7290 label1 = NULL_RTX;
7291 if (!is_weak)
7292 {
7293 label1 = gen_label_rtx ();
7294 emit_label (label1);
7295 }
7296 label2 = gen_label_rtx ();
7297
7298 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7299
7300 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7301 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7302 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7303 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7304 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7305
7306 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7307
7308 if (!is_weak)
7309 {
7310 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7311 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7312 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7313 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7314 }
7315 else
7316 {
7317 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7318 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7319 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7320 }
7321
7322 emit_label (label2);
7323}
7324
7325/* Split an atomic operation. */
7326
7327void
7328aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7329 rtx value, rtx model_rtx, rtx cond)
7330{
7331 enum machine_mode mode = GET_MODE (mem);
7332 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7333 rtx label, x;
7334
7335 label = gen_label_rtx ();
7336 emit_label (label);
7337
7338 if (new_out)
7339 new_out = gen_lowpart (wmode, new_out);
7340 if (old_out)
7341 old_out = gen_lowpart (wmode, old_out);
7342 else
7343 old_out = new_out;
7344 value = simplify_gen_subreg (wmode, value, mode, 0);
7345
7346 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7347
7348 switch (code)
7349 {
7350 case SET:
7351 new_out = value;
7352 break;
7353
7354 case NOT:
7355 x = gen_rtx_AND (wmode, old_out, value);
7356 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7357 x = gen_rtx_NOT (wmode, new_out);
7358 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7359 break;
7360
7361 case MINUS:
7362 if (CONST_INT_P (value))
7363 {
7364 value = GEN_INT (-INTVAL (value));
7365 code = PLUS;
7366 }
7367 /* Fall through. */
7368
7369 default:
7370 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7371 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7372 break;
7373 }
7374
7375 aarch64_emit_store_exclusive (mode, cond, mem,
7376 gen_lowpart (mode, new_out), model_rtx);
7377
7378 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7379 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7380 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7381 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7382}
7383
95ca411e
YZ
7384static void
7385aarch64_print_extension (void)
7386{
7387 const struct aarch64_option_extension *opt = NULL;
7388
7389 for (opt = all_extensions; opt->name != NULL; opt++)
7390 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7391 asm_fprintf (asm_out_file, "+%s", opt->name);
7392
7393 asm_fprintf (asm_out_file, "\n");
7394}
7395
43e9d192
IB
7396static void
7397aarch64_start_file (void)
7398{
7399 if (selected_arch)
95ca411e
YZ
7400 {
7401 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7402 aarch64_print_extension ();
7403 }
43e9d192 7404 else if (selected_cpu)
95ca411e
YZ
7405 {
7406 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7407 aarch64_print_extension ();
7408 }
43e9d192
IB
7409 default_file_start();
7410}
7411
7412/* Target hook for c_mode_for_suffix. */
7413static enum machine_mode
7414aarch64_c_mode_for_suffix (char suffix)
7415{
7416 if (suffix == 'q')
7417 return TFmode;
7418
7419 return VOIDmode;
7420}
7421
3520f7cc
JG
7422/* We can only represent floating point constants which will fit in
7423 "quarter-precision" values. These values are characterised by
7424 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7425 by:
7426
7427 (-1)^s * (n/16) * 2^r
7428
7429 Where:
7430 's' is the sign bit.
7431 'n' is an integer in the range 16 <= n <= 31.
7432 'r' is an integer in the range -3 <= r <= 4. */
7433
7434/* Return true iff X can be represented by a quarter-precision
7435 floating point immediate operand X. Note, we cannot represent 0.0. */
7436bool
7437aarch64_float_const_representable_p (rtx x)
7438{
7439 /* This represents our current view of how many bits
7440 make up the mantissa. */
7441 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7442 int exponent;
3520f7cc
JG
7443 unsigned HOST_WIDE_INT mantissa, mask;
7444 HOST_WIDE_INT m1, m2;
7445 REAL_VALUE_TYPE r, m;
7446
7447 if (!CONST_DOUBLE_P (x))
7448 return false;
7449
7450 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7451
7452 /* We cannot represent infinities, NaNs or +/-zero. We won't
7453 know if we have +zero until we analyse the mantissa, but we
7454 can reject the other invalid values. */
7455 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7456 || REAL_VALUE_MINUS_ZERO (r))
7457 return false;
7458
ba96cdfb 7459 /* Extract exponent. */
3520f7cc
JG
7460 r = real_value_abs (&r);
7461 exponent = REAL_EXP (&r);
7462
7463 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7464 highest (sign) bit, with a fixed binary point at bit point_pos.
7465 m1 holds the low part of the mantissa, m2 the high part.
7466 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7467 bits for the mantissa, this can fail (low bits will be lost). */
7468 real_ldexp (&m, &r, point_pos - exponent);
7469 REAL_VALUE_TO_INT (&m1, &m2, m);
7470
7471 /* If the low part of the mantissa has bits set we cannot represent
7472 the value. */
7473 if (m1 != 0)
7474 return false;
7475 /* We have rejected the lower HOST_WIDE_INT, so update our
7476 understanding of how many bits lie in the mantissa and
7477 look only at the high HOST_WIDE_INT. */
7478 mantissa = m2;
7479 point_pos -= HOST_BITS_PER_WIDE_INT;
7480
7481 /* We can only represent values with a mantissa of the form 1.xxxx. */
7482 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7483 if ((mantissa & mask) != 0)
7484 return false;
7485
7486 /* Having filtered unrepresentable values, we may now remove all
7487 but the highest 5 bits. */
7488 mantissa >>= point_pos - 5;
7489
7490 /* We cannot represent the value 0.0, so reject it. This is handled
7491 elsewhere. */
7492 if (mantissa == 0)
7493 return false;
7494
7495 /* Then, as bit 4 is always set, we can mask it off, leaving
7496 the mantissa in the range [0, 15]. */
7497 mantissa &= ~(1 << 4);
7498 gcc_assert (mantissa <= 15);
7499
7500 /* GCC internally does not use IEEE754-like encoding (where normalized
7501 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7502 Our mantissa values are shifted 4 places to the left relative to
7503 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7504 by 5 places to correct for GCC's representation. */
7505 exponent = 5 - exponent;
7506
7507 return (exponent >= 0 && exponent <= 7);
7508}
7509
7510char*
81c2dfb9 7511aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7512 enum machine_mode mode,
7513 unsigned width)
7514{
3ea63f60 7515 bool is_valid;
3520f7cc 7516 static char templ[40];
3520f7cc 7517 const char *mnemonic;
e4f0f84d 7518 const char *shift_op;
3520f7cc 7519 unsigned int lane_count = 0;
81c2dfb9 7520 char element_char;
3520f7cc 7521
e4f0f84d 7522 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
7523
7524 /* This will return true to show const_vector is legal for use as either
7525 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7526 also update INFO to show how the immediate should be generated. */
81c2dfb9 7527 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7528 gcc_assert (is_valid);
7529
81c2dfb9 7530 element_char = sizetochar (info.element_width);
48063b9d
IB
7531 lane_count = width / info.element_width;
7532
3520f7cc
JG
7533 mode = GET_MODE_INNER (mode);
7534 if (mode == SFmode || mode == DFmode)
7535 {
48063b9d
IB
7536 gcc_assert (info.shift == 0 && ! info.mvn);
7537 if (aarch64_float_const_zero_rtx_p (info.value))
7538 info.value = GEN_INT (0);
7539 else
7540 {
7541#define buf_size 20
7542 REAL_VALUE_TYPE r;
7543 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7544 char float_buf[buf_size] = {'\0'};
7545 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7546#undef buf_size
7547
7548 if (lane_count == 1)
7549 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7550 else
7551 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7552 lane_count, element_char, float_buf);
48063b9d
IB
7553 return templ;
7554 }
3520f7cc 7555 }
3520f7cc 7556
48063b9d 7557 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 7558 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
7559
7560 if (lane_count == 1)
48063b9d
IB
7561 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7562 mnemonic, UINTVAL (info.value));
7563 else if (info.shift)
7564 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
7565 ", %s %d", mnemonic, lane_count, element_char,
7566 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 7567 else
48063b9d 7568 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7569 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7570 return templ;
7571}
7572
b7342d25
IB
7573char*
7574aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7575 enum machine_mode mode)
7576{
7577 enum machine_mode vmode;
7578
7579 gcc_assert (!VECTOR_MODE_P (mode));
7580 vmode = aarch64_simd_container_mode (mode, 64);
7581 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7582 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7583}
7584
88b08073
JG
7585/* Split operands into moves from op[1] + op[2] into op[0]. */
7586
7587void
7588aarch64_split_combinev16qi (rtx operands[3])
7589{
7590 unsigned int dest = REGNO (operands[0]);
7591 unsigned int src1 = REGNO (operands[1]);
7592 unsigned int src2 = REGNO (operands[2]);
7593 enum machine_mode halfmode = GET_MODE (operands[1]);
7594 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7595 rtx destlo, desthi;
7596
7597 gcc_assert (halfmode == V16QImode);
7598
7599 if (src1 == dest && src2 == dest + halfregs)
7600 {
7601 /* No-op move. Can't split to nothing; emit something. */
7602 emit_note (NOTE_INSN_DELETED);
7603 return;
7604 }
7605
7606 /* Preserve register attributes for variable tracking. */
7607 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7608 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7609 GET_MODE_SIZE (halfmode));
7610
7611 /* Special case of reversed high/low parts. */
7612 if (reg_overlap_mentioned_p (operands[2], destlo)
7613 && reg_overlap_mentioned_p (operands[1], desthi))
7614 {
7615 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7616 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7617 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7618 }
7619 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7620 {
7621 /* Try to avoid unnecessary moves if part of the result
7622 is in the right place already. */
7623 if (src1 != dest)
7624 emit_move_insn (destlo, operands[1]);
7625 if (src2 != dest + halfregs)
7626 emit_move_insn (desthi, operands[2]);
7627 }
7628 else
7629 {
7630 if (src2 != dest + halfregs)
7631 emit_move_insn (desthi, operands[2]);
7632 if (src1 != dest)
7633 emit_move_insn (destlo, operands[1]);
7634 }
7635}
7636
7637/* vec_perm support. */
7638
7639#define MAX_VECT_LEN 16
7640
7641struct expand_vec_perm_d
7642{
7643 rtx target, op0, op1;
7644 unsigned char perm[MAX_VECT_LEN];
7645 enum machine_mode vmode;
7646 unsigned char nelt;
7647 bool one_vector_p;
7648 bool testing_p;
7649};
7650
7651/* Generate a variable permutation. */
7652
7653static void
7654aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7655{
7656 enum machine_mode vmode = GET_MODE (target);
7657 bool one_vector_p = rtx_equal_p (op0, op1);
7658
7659 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7660 gcc_checking_assert (GET_MODE (op0) == vmode);
7661 gcc_checking_assert (GET_MODE (op1) == vmode);
7662 gcc_checking_assert (GET_MODE (sel) == vmode);
7663 gcc_checking_assert (TARGET_SIMD);
7664
7665 if (one_vector_p)
7666 {
7667 if (vmode == V8QImode)
7668 {
7669 /* Expand the argument to a V16QI mode by duplicating it. */
7670 rtx pair = gen_reg_rtx (V16QImode);
7671 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7672 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7673 }
7674 else
7675 {
7676 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7677 }
7678 }
7679 else
7680 {
7681 rtx pair;
7682
7683 if (vmode == V8QImode)
7684 {
7685 pair = gen_reg_rtx (V16QImode);
7686 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7687 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7688 }
7689 else
7690 {
7691 pair = gen_reg_rtx (OImode);
7692 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7693 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7694 }
7695 }
7696}
7697
7698void
7699aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7700{
7701 enum machine_mode vmode = GET_MODE (target);
7702 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7703 bool one_vector_p = rtx_equal_p (op0, op1);
7704 rtx rmask[MAX_VECT_LEN], mask;
7705
7706 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7707
7708 /* The TBL instruction does not use a modulo index, so we must take care
7709 of that ourselves. */
7710 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7711 for (i = 0; i < nelt; ++i)
7712 rmask[i] = mask;
7713 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7714 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7715
7716 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7717}
7718
cc4d934f
JG
7719/* Recognize patterns suitable for the TRN instructions. */
7720static bool
7721aarch64_evpc_trn (struct expand_vec_perm_d *d)
7722{
7723 unsigned int i, odd, mask, nelt = d->nelt;
7724 rtx out, in0, in1, x;
7725 rtx (*gen) (rtx, rtx, rtx);
7726 enum machine_mode vmode = d->vmode;
7727
7728 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7729 return false;
7730
7731 /* Note that these are little-endian tests.
7732 We correct for big-endian later. */
7733 if (d->perm[0] == 0)
7734 odd = 0;
7735 else if (d->perm[0] == 1)
7736 odd = 1;
7737 else
7738 return false;
7739 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7740
7741 for (i = 0; i < nelt; i += 2)
7742 {
7743 if (d->perm[i] != i + odd)
7744 return false;
7745 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7746 return false;
7747 }
7748
7749 /* Success! */
7750 if (d->testing_p)
7751 return true;
7752
7753 in0 = d->op0;
7754 in1 = d->op1;
7755 if (BYTES_BIG_ENDIAN)
7756 {
7757 x = in0, in0 = in1, in1 = x;
7758 odd = !odd;
7759 }
7760 out = d->target;
7761
7762 if (odd)
7763 {
7764 switch (vmode)
7765 {
7766 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7767 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7768 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7769 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7770 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7771 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7772 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7773 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7774 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7775 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7776 default:
7777 return false;
7778 }
7779 }
7780 else
7781 {
7782 switch (vmode)
7783 {
7784 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7785 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7786 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7787 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7788 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7789 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7790 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7791 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7792 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7793 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7794 default:
7795 return false;
7796 }
7797 }
7798
7799 emit_insn (gen (out, in0, in1));
7800 return true;
7801}
7802
7803/* Recognize patterns suitable for the UZP instructions. */
7804static bool
7805aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7806{
7807 unsigned int i, odd, mask, nelt = d->nelt;
7808 rtx out, in0, in1, x;
7809 rtx (*gen) (rtx, rtx, rtx);
7810 enum machine_mode vmode = d->vmode;
7811
7812 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7813 return false;
7814
7815 /* Note that these are little-endian tests.
7816 We correct for big-endian later. */
7817 if (d->perm[0] == 0)
7818 odd = 0;
7819 else if (d->perm[0] == 1)
7820 odd = 1;
7821 else
7822 return false;
7823 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7824
7825 for (i = 0; i < nelt; i++)
7826 {
7827 unsigned elt = (i * 2 + odd) & mask;
7828 if (d->perm[i] != elt)
7829 return false;
7830 }
7831
7832 /* Success! */
7833 if (d->testing_p)
7834 return true;
7835
7836 in0 = d->op0;
7837 in1 = d->op1;
7838 if (BYTES_BIG_ENDIAN)
7839 {
7840 x = in0, in0 = in1, in1 = x;
7841 odd = !odd;
7842 }
7843 out = d->target;
7844
7845 if (odd)
7846 {
7847 switch (vmode)
7848 {
7849 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7850 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7851 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7852 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7853 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7854 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7855 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7856 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7857 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7858 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7859 default:
7860 return false;
7861 }
7862 }
7863 else
7864 {
7865 switch (vmode)
7866 {
7867 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7868 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7869 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7870 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7871 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7872 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7873 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7874 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7875 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7876 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7877 default:
7878 return false;
7879 }
7880 }
7881
7882 emit_insn (gen (out, in0, in1));
7883 return true;
7884}
7885
7886/* Recognize patterns suitable for the ZIP instructions. */
7887static bool
7888aarch64_evpc_zip (struct expand_vec_perm_d *d)
7889{
7890 unsigned int i, high, mask, nelt = d->nelt;
7891 rtx out, in0, in1, x;
7892 rtx (*gen) (rtx, rtx, rtx);
7893 enum machine_mode vmode = d->vmode;
7894
7895 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7896 return false;
7897
7898 /* Note that these are little-endian tests.
7899 We correct for big-endian later. */
7900 high = nelt / 2;
7901 if (d->perm[0] == high)
7902 /* Do Nothing. */
7903 ;
7904 else if (d->perm[0] == 0)
7905 high = 0;
7906 else
7907 return false;
7908 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7909
7910 for (i = 0; i < nelt / 2; i++)
7911 {
7912 unsigned elt = (i + high) & mask;
7913 if (d->perm[i * 2] != elt)
7914 return false;
7915 elt = (elt + nelt) & mask;
7916 if (d->perm[i * 2 + 1] != elt)
7917 return false;
7918 }
7919
7920 /* Success! */
7921 if (d->testing_p)
7922 return true;
7923
7924 in0 = d->op0;
7925 in1 = d->op1;
7926 if (BYTES_BIG_ENDIAN)
7927 {
7928 x = in0, in0 = in1, in1 = x;
7929 high = !high;
7930 }
7931 out = d->target;
7932
7933 if (high)
7934 {
7935 switch (vmode)
7936 {
7937 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7938 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7939 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7940 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7941 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7942 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7943 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7944 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7945 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7946 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7947 default:
7948 return false;
7949 }
7950 }
7951 else
7952 {
7953 switch (vmode)
7954 {
7955 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7956 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7957 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7958 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7959 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7960 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7961 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7962 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7963 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7964 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7965 default:
7966 return false;
7967 }
7968 }
7969
7970 emit_insn (gen (out, in0, in1));
7971 return true;
7972}
7973
91bd4114
JG
7974static bool
7975aarch64_evpc_dup (struct expand_vec_perm_d *d)
7976{
7977 rtx (*gen) (rtx, rtx, rtx);
7978 rtx out = d->target;
7979 rtx in0;
7980 enum machine_mode vmode = d->vmode;
7981 unsigned int i, elt, nelt = d->nelt;
7982 rtx lane;
7983
7984 /* TODO: This may not be big-endian safe. */
7985 if (BYTES_BIG_ENDIAN)
7986 return false;
7987
7988 elt = d->perm[0];
7989 for (i = 1; i < nelt; i++)
7990 {
7991 if (elt != d->perm[i])
7992 return false;
7993 }
7994
7995 /* The generic preparation in aarch64_expand_vec_perm_const_1
7996 swaps the operand order and the permute indices if it finds
7997 d->perm[0] to be in the second operand. Thus, we can always
7998 use d->op0 and need not do any extra arithmetic to get the
7999 correct lane number. */
8000 in0 = d->op0;
8001 lane = GEN_INT (elt);
8002
8003 switch (vmode)
8004 {
8005 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8006 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8007 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8008 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8009 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8010 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8011 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8012 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8013 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8014 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8015 default:
8016 return false;
8017 }
8018
8019 emit_insn (gen (out, in0, lane));
8020 return true;
8021}
8022
88b08073
JG
8023static bool
8024aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8025{
8026 rtx rperm[MAX_VECT_LEN], sel;
8027 enum machine_mode vmode = d->vmode;
8028 unsigned int i, nelt = d->nelt;
8029
8030 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8031 numbering of elements for big-endian, we must reverse the order. */
8032 if (BYTES_BIG_ENDIAN)
8033 return false;
8034
8035 if (d->testing_p)
8036 return true;
8037
8038 /* Generic code will try constant permutation twice. Once with the
8039 original mode and again with the elements lowered to QImode.
8040 So wait and don't do the selector expansion ourselves. */
8041 if (vmode != V8QImode && vmode != V16QImode)
8042 return false;
8043
8044 for (i = 0; i < nelt; ++i)
8045 rperm[i] = GEN_INT (d->perm[i]);
8046 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8047 sel = force_reg (vmode, sel);
8048
8049 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8050 return true;
8051}
8052
8053static bool
8054aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8055{
8056 /* The pattern matching functions above are written to look for a small
8057 number to begin the sequence (0, 1, N/2). If we begin with an index
8058 from the second operand, we can swap the operands. */
8059 if (d->perm[0] >= d->nelt)
8060 {
8061 unsigned i, nelt = d->nelt;
8062 rtx x;
8063
8064 for (i = 0; i < nelt; ++i)
8065 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8066
8067 x = d->op0;
8068 d->op0 = d->op1;
8069 d->op1 = x;
8070 }
8071
8072 if (TARGET_SIMD)
cc4d934f
JG
8073 {
8074 if (aarch64_evpc_zip (d))
8075 return true;
8076 else if (aarch64_evpc_uzp (d))
8077 return true;
8078 else if (aarch64_evpc_trn (d))
8079 return true;
91bd4114
JG
8080 else if (aarch64_evpc_dup (d))
8081 return true;
cc4d934f
JG
8082 return aarch64_evpc_tbl (d);
8083 }
88b08073
JG
8084 return false;
8085}
8086
8087/* Expand a vec_perm_const pattern. */
8088
8089bool
8090aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8091{
8092 struct expand_vec_perm_d d;
8093 int i, nelt, which;
8094
8095 d.target = target;
8096 d.op0 = op0;
8097 d.op1 = op1;
8098
8099 d.vmode = GET_MODE (target);
8100 gcc_assert (VECTOR_MODE_P (d.vmode));
8101 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8102 d.testing_p = false;
8103
8104 for (i = which = 0; i < nelt; ++i)
8105 {
8106 rtx e = XVECEXP (sel, 0, i);
8107 int ei = INTVAL (e) & (2 * nelt - 1);
8108 which |= (ei < nelt ? 1 : 2);
8109 d.perm[i] = ei;
8110 }
8111
8112 switch (which)
8113 {
8114 default:
8115 gcc_unreachable ();
8116
8117 case 3:
8118 d.one_vector_p = false;
8119 if (!rtx_equal_p (op0, op1))
8120 break;
8121
8122 /* The elements of PERM do not suggest that only the first operand
8123 is used, but both operands are identical. Allow easier matching
8124 of the permutation by folding the permutation into the single
8125 input vector. */
8126 /* Fall Through. */
8127 case 2:
8128 for (i = 0; i < nelt; ++i)
8129 d.perm[i] &= nelt - 1;
8130 d.op0 = op1;
8131 d.one_vector_p = true;
8132 break;
8133
8134 case 1:
8135 d.op1 = op0;
8136 d.one_vector_p = true;
8137 break;
8138 }
8139
8140 return aarch64_expand_vec_perm_const_1 (&d);
8141}
8142
8143static bool
8144aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8145 const unsigned char *sel)
8146{
8147 struct expand_vec_perm_d d;
8148 unsigned int i, nelt, which;
8149 bool ret;
8150
8151 d.vmode = vmode;
8152 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8153 d.testing_p = true;
8154 memcpy (d.perm, sel, nelt);
8155
8156 /* Calculate whether all elements are in one vector. */
8157 for (i = which = 0; i < nelt; ++i)
8158 {
8159 unsigned char e = d.perm[i];
8160 gcc_assert (e < 2 * nelt);
8161 which |= (e < nelt ? 1 : 2);
8162 }
8163
8164 /* If all elements are from the second vector, reindex as if from the
8165 first vector. */
8166 if (which == 2)
8167 for (i = 0; i < nelt; ++i)
8168 d.perm[i] -= nelt;
8169
8170 /* Check whether the mask can be applied to a single vector. */
8171 d.one_vector_p = (which != 3);
8172
8173 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8174 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8175 if (!d.one_vector_p)
8176 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8177
8178 start_sequence ();
8179 ret = aarch64_expand_vec_perm_const_1 (&d);
8180 end_sequence ();
8181
8182 return ret;
8183}
8184
43e9d192
IB
8185#undef TARGET_ADDRESS_COST
8186#define TARGET_ADDRESS_COST aarch64_address_cost
8187
8188/* This hook will determines whether unnamed bitfields affect the alignment
8189 of the containing structure. The hook returns true if the structure
8190 should inherit the alignment requirements of an unnamed bitfield's
8191 type. */
8192#undef TARGET_ALIGN_ANON_BITFIELD
8193#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8194
8195#undef TARGET_ASM_ALIGNED_DI_OP
8196#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8197
8198#undef TARGET_ASM_ALIGNED_HI_OP
8199#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8200
8201#undef TARGET_ASM_ALIGNED_SI_OP
8202#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8203
8204#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8205#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8206 hook_bool_const_tree_hwi_hwi_const_tree_true
8207
8208#undef TARGET_ASM_FILE_START
8209#define TARGET_ASM_FILE_START aarch64_start_file
8210
8211#undef TARGET_ASM_OUTPUT_MI_THUNK
8212#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8213
8214#undef TARGET_ASM_SELECT_RTX_SECTION
8215#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8216
8217#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8218#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8219
8220#undef TARGET_BUILD_BUILTIN_VA_LIST
8221#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8222
8223#undef TARGET_CALLEE_COPIES
8224#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8225
8226#undef TARGET_CAN_ELIMINATE
8227#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8228
8229#undef TARGET_CANNOT_FORCE_CONST_MEM
8230#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8231
8232#undef TARGET_CONDITIONAL_REGISTER_USAGE
8233#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8234
8235/* Only the least significant bit is used for initialization guard
8236 variables. */
8237#undef TARGET_CXX_GUARD_MASK_BIT
8238#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8239
8240#undef TARGET_C_MODE_FOR_SUFFIX
8241#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8242
8243#ifdef TARGET_BIG_ENDIAN_DEFAULT
8244#undef TARGET_DEFAULT_TARGET_FLAGS
8245#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8246#endif
8247
8248#undef TARGET_CLASS_MAX_NREGS
8249#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8250
119103ca
JG
8251#undef TARGET_BUILTIN_DECL
8252#define TARGET_BUILTIN_DECL aarch64_builtin_decl
8253
43e9d192
IB
8254#undef TARGET_EXPAND_BUILTIN
8255#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8256
8257#undef TARGET_EXPAND_BUILTIN_VA_START
8258#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8259
9697e620
JG
8260#undef TARGET_FOLD_BUILTIN
8261#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8262
43e9d192
IB
8263#undef TARGET_FUNCTION_ARG
8264#define TARGET_FUNCTION_ARG aarch64_function_arg
8265
8266#undef TARGET_FUNCTION_ARG_ADVANCE
8267#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8268
8269#undef TARGET_FUNCTION_ARG_BOUNDARY
8270#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8271
8272#undef TARGET_FUNCTION_OK_FOR_SIBCALL
8273#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8274
8275#undef TARGET_FUNCTION_VALUE
8276#define TARGET_FUNCTION_VALUE aarch64_function_value
8277
8278#undef TARGET_FUNCTION_VALUE_REGNO_P
8279#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8280
8281#undef TARGET_FRAME_POINTER_REQUIRED
8282#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8283
0ac198d3
JG
8284#undef TARGET_GIMPLE_FOLD_BUILTIN
8285#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8286
43e9d192
IB
8287#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8288#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8289
8290#undef TARGET_INIT_BUILTINS
8291#define TARGET_INIT_BUILTINS aarch64_init_builtins
8292
8293#undef TARGET_LEGITIMATE_ADDRESS_P
8294#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8295
8296#undef TARGET_LEGITIMATE_CONSTANT_P
8297#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8298
8299#undef TARGET_LIBGCC_CMP_RETURN_MODE
8300#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8301
38e8f663
YR
8302#undef TARGET_LRA_P
8303#define TARGET_LRA_P aarch64_lra_p
8304
ac2b960f
YZ
8305#undef TARGET_MANGLE_TYPE
8306#define TARGET_MANGLE_TYPE aarch64_mangle_type
8307
43e9d192
IB
8308#undef TARGET_MEMORY_MOVE_COST
8309#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8310
8311#undef TARGET_MUST_PASS_IN_STACK
8312#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8313
8314/* This target hook should return true if accesses to volatile bitfields
8315 should use the narrowest mode possible. It should return false if these
8316 accesses should use the bitfield container type. */
8317#undef TARGET_NARROW_VOLATILE_BITFIELD
8318#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8319
8320#undef TARGET_OPTION_OVERRIDE
8321#define TARGET_OPTION_OVERRIDE aarch64_override_options
8322
8323#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8324#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8325 aarch64_override_options_after_change
8326
8327#undef TARGET_PASS_BY_REFERENCE
8328#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8329
8330#undef TARGET_PREFERRED_RELOAD_CLASS
8331#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8332
8333#undef TARGET_SECONDARY_RELOAD
8334#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8335
8336#undef TARGET_SHIFT_TRUNCATION_MASK
8337#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8338
8339#undef TARGET_SETUP_INCOMING_VARARGS
8340#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8341
8342#undef TARGET_STRUCT_VALUE_RTX
8343#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8344
8345#undef TARGET_REGISTER_MOVE_COST
8346#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8347
8348#undef TARGET_RETURN_IN_MEMORY
8349#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8350
8351#undef TARGET_RETURN_IN_MSB
8352#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8353
8354#undef TARGET_RTX_COSTS
8355#define TARGET_RTX_COSTS aarch64_rtx_costs
8356
8357#undef TARGET_TRAMPOLINE_INIT
8358#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8359
8360#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8361#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8362
8363#undef TARGET_VECTOR_MODE_SUPPORTED_P
8364#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8365
8366#undef TARGET_ARRAY_MODE_SUPPORTED_P
8367#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8368
8990e73a
TB
8369#undef TARGET_VECTORIZE_ADD_STMT_COST
8370#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8371
8372#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8373#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8374 aarch64_builtin_vectorization_cost
8375
43e9d192
IB
8376#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8377#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8378
42fc9a7f
JG
8379#undef TARGET_VECTORIZE_BUILTINS
8380#define TARGET_VECTORIZE_BUILTINS
8381
8382#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8383#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8384 aarch64_builtin_vectorized_function
8385
3b357264
JG
8386#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8387#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8388 aarch64_autovectorize_vector_sizes
8389
43e9d192
IB
8390/* Section anchor support. */
8391
8392#undef TARGET_MIN_ANCHOR_OFFSET
8393#define TARGET_MIN_ANCHOR_OFFSET -256
8394
8395/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8396 byte offset; we can do much more for larger data types, but have no way
8397 to determine the size of the access. We assume accesses are aligned. */
8398#undef TARGET_MAX_ANCHOR_OFFSET
8399#define TARGET_MAX_ANCHOR_OFFSET 4095
8400
db0253a4
TB
8401#undef TARGET_VECTOR_ALIGNMENT
8402#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8403
8404#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8405#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8406 aarch64_simd_vector_alignment_reachable
8407
88b08073
JG
8408/* vec_perm support. */
8409
8410#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8411#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8412 aarch64_vectorize_vec_perm_const_ok
8413
70f09188 8414
706b2314 8415#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8416#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8417
43e9d192
IB
8418struct gcc_target targetm = TARGET_INITIALIZER;
8419
8420#include "gt-aarch64.h"