]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Add missing CL entry.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
43e9d192 66
28514dda
YZ
67/* Defined for convenience. */
68#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
69
43e9d192
IB
70/* Classifies an address.
71
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
74
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
77
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
80
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
83
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
86
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
89
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
92
93enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
101};
102
103struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
109};
110
48063b9d
IB
111struct simd_immediate_info
112{
113 rtx value;
114 int shift;
115 int element_width;
48063b9d 116 bool mvn;
e4f0f84d 117 bool msl;
48063b9d
IB
118};
119
43e9d192
IB
120/* The current code model. */
121enum aarch64_code_model aarch64_cmodel;
122
123#ifdef HAVE_AS_TLS
124#undef TARGET_HAVE_TLS
125#define TARGET_HAVE_TLS 1
126#endif
127
38e8f663 128static bool aarch64_lra_p (void);
43e9d192
IB
129static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 136static void aarch64_override_options_after_change (void);
43e9d192
IB
137static bool aarch64_vector_mode_supported_p (enum machine_mode);
138static unsigned bit_count (unsigned HOST_WIDE_INT);
139static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
141
88b08073
JG
142static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
144
43e9d192 145/* The processor for which instructions should be scheduled. */
02fdbd5b 146enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
147
148/* The current tuning set. */
149const struct tune_params *aarch64_tune_params;
150
151/* Mask to specify which instructions we are allowed to generate. */
152unsigned long aarch64_isa_flags = 0;
153
154/* Mask to specify which instruction scheduling options should be used. */
155unsigned long aarch64_tune_flags = 0;
156
157/* Tuning parameters. */
158
159#if HAVE_DESIGNATED_INITIALIZERS
160#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161#else
162#define NAMED_PARAM(NAME, VAL) (VAL)
163#endif
164
165#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166__extension__
167#endif
43e9d192
IB
168
169#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170__extension__
171#endif
172static const struct cpu_addrcost_table generic_addrcost_table =
173{
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
179};
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct cpu_regmove_cost generic_regmove_cost =
185{
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
193};
194
8990e73a
TB
195/* Generic costs for vector insn classes. */
196#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197__extension__
198#endif
199static const struct cpu_vector_cost generic_vector_cost =
200{
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213};
214
43e9d192
IB
215#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216__extension__
217#endif
218static const struct tune_params generic_tunings =
219{
73250c4c 220 &generic_extra_costs,
43e9d192
IB
221 &generic_addrcost_table,
222 &generic_regmove_cost,
8990e73a 223 &generic_vector_cost,
d126a4ae
AP
224 NAMED_PARAM (memmov_cost, 4),
225 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
226};
227
984239ad
KT
228static const struct tune_params cortexa53_tunings =
229{
230 &cortexa53_extra_costs,
231 &generic_addrcost_table,
232 &generic_regmove_cost,
233 &generic_vector_cost,
d126a4ae
AP
234 NAMED_PARAM (memmov_cost, 4),
235 NAMED_PARAM (issue_rate, 2)
984239ad
KT
236};
237
43e9d192
IB
238/* A processor implementing AArch64. */
239struct processor
240{
241 const char *const name;
242 enum aarch64_processor core;
243 const char *arch;
244 const unsigned long flags;
245 const struct tune_params *const tune;
246};
247
248/* Processor cores implementing AArch64. */
249static const struct processor all_cores[] =
250{
192ed1dd 251#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
252 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
253#include "aarch64-cores.def"
254#undef AARCH64_CORE
02fdbd5b 255 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
256 {NULL, aarch64_none, NULL, 0, NULL}
257};
258
259/* Architectures implementing AArch64. */
260static const struct processor all_architectures[] =
261{
262#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
263 {NAME, CORE, #ARCH, FLAGS, NULL},
264#include "aarch64-arches.def"
265#undef AARCH64_ARCH
43e9d192
IB
266 {NULL, aarch64_none, NULL, 0, NULL}
267};
268
269/* Target specification. These are populated as commandline arguments
270 are processed, or NULL if not specified. */
271static const struct processor *selected_arch;
272static const struct processor *selected_cpu;
273static const struct processor *selected_tune;
274
275#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
276
277/* An ISA extension in the co-processor and main instruction set space. */
278struct aarch64_option_extension
279{
280 const char *const name;
281 const unsigned long flags_on;
282 const unsigned long flags_off;
283};
284
285/* ISA extensions in AArch64. */
286static const struct aarch64_option_extension all_extensions[] =
287{
288#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
289 {NAME, FLAGS_ON, FLAGS_OFF},
290#include "aarch64-option-extensions.def"
291#undef AARCH64_OPT_EXTENSION
292 {NULL, 0, 0}
293};
294
295/* Used to track the size of an address when generating a pre/post
296 increment address. */
297static enum machine_mode aarch64_memory_reference_mode;
298
299/* Used to force GTY into this file. */
300static GTY(()) int gty_dummy;
301
302/* A table of valid AArch64 "bitmask immediate" values for
303 logical instructions. */
304
305#define AARCH64_NUM_BITMASKS 5334
306static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
307
308/* Did we set flag_omit_frame_pointer just so
309 aarch64_frame_pointer_required would be called? */
310static bool faked_omit_frame_pointer;
311
312typedef enum aarch64_cond_code
313{
314 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
315 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
316 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
317}
318aarch64_cc;
319
320#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
321
322/* The condition codes of the processor, and the inverse function. */
323static const char * const aarch64_condition_codes[] =
324{
325 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
326 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
327};
328
329/* Provide a mapping from gcc register numbers to dwarf register numbers. */
330unsigned
331aarch64_dbx_register_number (unsigned regno)
332{
333 if (GP_REGNUM_P (regno))
334 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
335 else if (regno == SP_REGNUM)
336 return AARCH64_DWARF_SP;
337 else if (FP_REGNUM_P (regno))
338 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
339
340 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
341 equivalent DWARF register. */
342 return DWARF_FRAME_REGISTERS;
343}
344
345/* Return TRUE if MODE is any of the large INT modes. */
346static bool
347aarch64_vect_struct_mode_p (enum machine_mode mode)
348{
349 return mode == OImode || mode == CImode || mode == XImode;
350}
351
352/* Return TRUE if MODE is any of the vector modes. */
353static bool
354aarch64_vector_mode_p (enum machine_mode mode)
355{
356 return aarch64_vector_mode_supported_p (mode)
357 || aarch64_vect_struct_mode_p (mode);
358}
359
360/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
361static bool
362aarch64_array_mode_supported_p (enum machine_mode mode,
363 unsigned HOST_WIDE_INT nelems)
364{
365 if (TARGET_SIMD
366 && AARCH64_VALID_SIMD_QREG_MODE (mode)
367 && (nelems >= 2 && nelems <= 4))
368 return true;
369
370 return false;
371}
372
373/* Implement HARD_REGNO_NREGS. */
374
375int
376aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
377{
378 switch (aarch64_regno_regclass (regno))
379 {
380 case FP_REGS:
381 case FP_LO_REGS:
382 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
383 default:
384 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
385 }
386 gcc_unreachable ();
387}
388
389/* Implement HARD_REGNO_MODE_OK. */
390
391int
392aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
393{
394 if (GET_MODE_CLASS (mode) == MODE_CC)
395 return regno == CC_REGNUM;
396
9259db42
YZ
397 if (regno == SP_REGNUM)
398 /* The purpose of comparing with ptr_mode is to support the
399 global register variable associated with the stack pointer
400 register via the syntax of asm ("wsp") in ILP32. */
401 return mode == Pmode || mode == ptr_mode;
402
403 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
404 return mode == Pmode;
405
406 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
407 return 1;
408
409 if (FP_REGNUM_P (regno))
410 {
411 if (aarch64_vect_struct_mode_p (mode))
412 return
413 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
414 else
415 return 1;
416 }
417
418 return 0;
419}
420
421/* Return true if calls to DECL should be treated as
422 long-calls (ie called via a register). */
423static bool
424aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
425{
426 return false;
427}
428
429/* Return true if calls to symbol-ref SYM should be treated as
430 long-calls (ie called via a register). */
431bool
432aarch64_is_long_call_p (rtx sym)
433{
434 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
435}
436
437/* Return true if the offsets to a zero/sign-extract operation
438 represent an expression that matches an extend operation. The
439 operands represent the paramters from
440
441 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
442bool
443aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
444 rtx extract_imm)
445{
446 HOST_WIDE_INT mult_val, extract_val;
447
448 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
449 return false;
450
451 mult_val = INTVAL (mult_imm);
452 extract_val = INTVAL (extract_imm);
453
454 if (extract_val > 8
455 && extract_val < GET_MODE_BITSIZE (mode)
456 && exact_log2 (extract_val & ~7) > 0
457 && (extract_val & 7) <= 4
458 && mult_val == (1 << (extract_val & 7)))
459 return true;
460
461 return false;
462}
463
464/* Emit an insn that's a simple single-set. Both the operands must be
465 known to be valid. */
466inline static rtx
467emit_set_insn (rtx x, rtx y)
468{
469 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
470}
471
472/* X and Y are two things to compare using CODE. Emit the compare insn and
473 return the rtx for register 0 in the proper mode. */
474rtx
475aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
476{
477 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
478 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
479
480 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
481 return cc_reg;
482}
483
484/* Build the SYMBOL_REF for __tls_get_addr. */
485
486static GTY(()) rtx tls_get_addr_libfunc;
487
488rtx
489aarch64_tls_get_addr (void)
490{
491 if (!tls_get_addr_libfunc)
492 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
493 return tls_get_addr_libfunc;
494}
495
496/* Return the TLS model to use for ADDR. */
497
498static enum tls_model
499tls_symbolic_operand_type (rtx addr)
500{
501 enum tls_model tls_kind = TLS_MODEL_NONE;
502 rtx sym, addend;
503
504 if (GET_CODE (addr) == CONST)
505 {
506 split_const (addr, &sym, &addend);
507 if (GET_CODE (sym) == SYMBOL_REF)
508 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
509 }
510 else if (GET_CODE (addr) == SYMBOL_REF)
511 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
512
513 return tls_kind;
514}
515
516/* We'll allow lo_sum's in addresses in our legitimate addresses
517 so that combine would take care of combining addresses where
518 necessary, but for generation purposes, we'll generate the address
519 as :
520 RTL Absolute
521 tmp = hi (symbol_ref); adrp x1, foo
522 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
523 nop
524
525 PIC TLS
526 adrp x1, :got:foo adrp tmp, :tlsgd:foo
527 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
528 bl __tls_get_addr
529 nop
530
531 Load TLS symbol, depending on TLS mechanism and TLS access model.
532
533 Global Dynamic - Traditional TLS:
534 adrp tmp, :tlsgd:imm
535 add dest, tmp, #:tlsgd_lo12:imm
536 bl __tls_get_addr
537
538 Global Dynamic - TLS Descriptors:
539 adrp dest, :tlsdesc:imm
540 ldr tmp, [dest, #:tlsdesc_lo12:imm]
541 add dest, dest, #:tlsdesc_lo12:imm
542 blr tmp
543 mrs tp, tpidr_el0
544 add dest, dest, tp
545
546 Initial Exec:
547 mrs tp, tpidr_el0
548 adrp tmp, :gottprel:imm
549 ldr dest, [tmp, #:gottprel_lo12:imm]
550 add dest, dest, tp
551
552 Local Exec:
553 mrs tp, tpidr_el0
554 add t0, tp, #:tprel_hi12:imm
555 add t0, #:tprel_lo12_nc:imm
556*/
557
558static void
559aarch64_load_symref_appropriately (rtx dest, rtx imm,
560 enum aarch64_symbol_type type)
561{
562 switch (type)
563 {
564 case SYMBOL_SMALL_ABSOLUTE:
565 {
28514dda 566 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 567 rtx tmp_reg = dest;
28514dda
YZ
568 enum machine_mode mode = GET_MODE (dest);
569
570 gcc_assert (mode == Pmode || mode == ptr_mode);
571
43e9d192 572 if (can_create_pseudo_p ())
28514dda 573 tmp_reg = gen_reg_rtx (mode);
43e9d192 574
28514dda 575 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
576 emit_insn (gen_add_losym (dest, tmp_reg, imm));
577 return;
578 }
579
a5350ddc
CSS
580 case SYMBOL_TINY_ABSOLUTE:
581 emit_insn (gen_rtx_SET (Pmode, dest, imm));
582 return;
583
43e9d192
IB
584 case SYMBOL_SMALL_GOT:
585 {
28514dda
YZ
586 /* In ILP32, the mode of dest can be either SImode or DImode,
587 while the got entry is always of SImode size. The mode of
588 dest depends on how dest is used: if dest is assigned to a
589 pointer (e.g. in the memory), it has SImode; it may have
590 DImode if dest is dereferenced to access the memeory.
591 This is why we have to handle three different ldr_got_small
592 patterns here (two patterns for ILP32). */
43e9d192 593 rtx tmp_reg = dest;
28514dda
YZ
594 enum machine_mode mode = GET_MODE (dest);
595
43e9d192 596 if (can_create_pseudo_p ())
28514dda
YZ
597 tmp_reg = gen_reg_rtx (mode);
598
599 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
600 if (mode == ptr_mode)
601 {
602 if (mode == DImode)
603 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
604 else
605 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
606 }
607 else
608 {
609 gcc_assert (mode == Pmode);
610 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
611 }
612
43e9d192
IB
613 return;
614 }
615
616 case SYMBOL_SMALL_TLSGD:
617 {
618 rtx insns;
619 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
620
621 start_sequence ();
622 emit_call_insn (gen_tlsgd_small (result, imm));
623 insns = get_insns ();
624 end_sequence ();
625
626 RTL_CONST_CALL_P (insns) = 1;
627 emit_libcall_block (insns, dest, result, imm);
628 return;
629 }
630
631 case SYMBOL_SMALL_TLSDESC:
632 {
633 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
634 rtx tp;
635
636 emit_insn (gen_tlsdesc_small (imm));
637 tp = aarch64_load_tp (NULL);
638 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
639 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
640 return;
641 }
642
643 case SYMBOL_SMALL_GOTTPREL:
644 {
645 rtx tmp_reg = gen_reg_rtx (Pmode);
646 rtx tp = aarch64_load_tp (NULL);
647 emit_insn (gen_tlsie_small (tmp_reg, imm));
648 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
649 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
650 return;
651 }
652
653 case SYMBOL_SMALL_TPREL:
654 {
655 rtx tp = aarch64_load_tp (NULL);
656 emit_insn (gen_tlsle_small (dest, tp, imm));
657 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
658 return;
659 }
660
87dd8ab0
MS
661 case SYMBOL_TINY_GOT:
662 emit_insn (gen_ldr_got_tiny (dest, imm));
663 return;
664
43e9d192
IB
665 default:
666 gcc_unreachable ();
667 }
668}
669
670/* Emit a move from SRC to DEST. Assume that the move expanders can
671 handle all moves if !can_create_pseudo_p (). The distinction is
672 important because, unlike emit_move_insn, the move expanders know
673 how to force Pmode objects into the constant pool even when the
674 constant pool address is not itself legitimate. */
675static rtx
676aarch64_emit_move (rtx dest, rtx src)
677{
678 return (can_create_pseudo_p ()
679 ? emit_move_insn (dest, src)
680 : emit_move_insn_1 (dest, src));
681}
682
683void
684aarch64_split_128bit_move (rtx dst, rtx src)
685{
686 rtx low_dst;
687
12dc6974
SN
688 enum machine_mode src_mode = GET_MODE (src);
689 enum machine_mode dst_mode = GET_MODE (dst);
690 int src_regno = REGNO (src);
691 int dst_regno = REGNO (dst);
692
693 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
43e9d192
IB
694
695 if (REG_P (dst) && REG_P (src))
696 {
12dc6974 697 gcc_assert (src_mode == TImode || src_mode == TFmode);
43e9d192
IB
698
699 /* Handle r -> w, w -> r. */
700 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
701 {
12dc6974
SN
702 switch (src_mode) {
703 case TImode:
704 emit_insn
705 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
706 emit_insn
707 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
708 return;
709 case TFmode:
710 emit_insn
711 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
712 emit_insn
713 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
714 return;
715 default:
716 gcc_unreachable ();
717 }
43e9d192
IB
718 }
719 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
720 {
12dc6974
SN
721 switch (src_mode) {
722 case TImode:
723 emit_insn
724 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
725 emit_insn
726 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
727 return;
728 case TFmode:
729 emit_insn
730 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
731 emit_insn
732 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
733 return;
734 default:
735 gcc_unreachable ();
736 }
43e9d192
IB
737 }
738 /* Fall through to r -> r cases. */
739 }
740
12dc6974
SN
741 switch (dst_mode) {
742 case TImode:
743 low_dst = gen_lowpart (word_mode, dst);
744 if (REG_P (low_dst)
745 && reg_overlap_mentioned_p (low_dst, src))
746 {
747 aarch64_emit_move (gen_highpart (word_mode, dst),
748 gen_highpart_mode (word_mode, TImode, src));
749 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
750 }
751 else
752 {
753 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
754 aarch64_emit_move (gen_highpart (word_mode, dst),
755 gen_highpart_mode (word_mode, TImode, src));
756 }
757 return;
758 case TFmode:
759 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
760 gen_rtx_REG (DFmode, src_regno));
761 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
762 gen_rtx_REG (DFmode, src_regno + 1));
763 return;
764 default:
765 gcc_unreachable ();
766 }
43e9d192
IB
767}
768
769bool
770aarch64_split_128bit_move_p (rtx dst, rtx src)
771{
772 return (! REG_P (src)
773 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
774}
775
8b033a8a
SN
776/* Split a complex SIMD combine. */
777
778void
779aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
780{
781 enum machine_mode src_mode = GET_MODE (src1);
782 enum machine_mode dst_mode = GET_MODE (dst);
783
784 gcc_assert (VECTOR_MODE_P (dst_mode));
785
786 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
787 {
788 rtx (*gen) (rtx, rtx, rtx);
789
790 switch (src_mode)
791 {
792 case V8QImode:
793 gen = gen_aarch64_simd_combinev8qi;
794 break;
795 case V4HImode:
796 gen = gen_aarch64_simd_combinev4hi;
797 break;
798 case V2SImode:
799 gen = gen_aarch64_simd_combinev2si;
800 break;
801 case V2SFmode:
802 gen = gen_aarch64_simd_combinev2sf;
803 break;
804 case DImode:
805 gen = gen_aarch64_simd_combinedi;
806 break;
807 case DFmode:
808 gen = gen_aarch64_simd_combinedf;
809 break;
810 default:
811 gcc_unreachable ();
812 }
813
814 emit_insn (gen (dst, src1, src2));
815 return;
816 }
817}
818
fd4842cd
SN
819/* Split a complex SIMD move. */
820
821void
822aarch64_split_simd_move (rtx dst, rtx src)
823{
824 enum machine_mode src_mode = GET_MODE (src);
825 enum machine_mode dst_mode = GET_MODE (dst);
826
827 gcc_assert (VECTOR_MODE_P (dst_mode));
828
829 if (REG_P (dst) && REG_P (src))
830 {
c59b7e28
SN
831 rtx (*gen) (rtx, rtx);
832
fd4842cd
SN
833 gcc_assert (VECTOR_MODE_P (src_mode));
834
835 switch (src_mode)
836 {
837 case V16QImode:
c59b7e28 838 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
839 break;
840 case V8HImode:
c59b7e28 841 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
842 break;
843 case V4SImode:
c59b7e28 844 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
845 break;
846 case V2DImode:
c59b7e28 847 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
848 break;
849 case V4SFmode:
c59b7e28 850 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
851 break;
852 case V2DFmode:
c59b7e28 853 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
854 break;
855 default:
856 gcc_unreachable ();
857 }
c59b7e28
SN
858
859 emit_insn (gen (dst, src));
fd4842cd
SN
860 return;
861 }
862}
863
43e9d192 864static rtx
e18b4a81 865aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
866{
867 if (can_create_pseudo_p ())
e18b4a81 868 return force_reg (mode, value);
43e9d192
IB
869 else
870 {
871 x = aarch64_emit_move (x, value);
872 return x;
873 }
874}
875
876
877static rtx
878aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
879{
9c023bf0 880 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
881 {
882 rtx high;
883 /* Load the full offset into a register. This
884 might be improvable in the future. */
885 high = GEN_INT (offset);
886 offset = 0;
e18b4a81
YZ
887 high = aarch64_force_temporary (mode, temp, high);
888 reg = aarch64_force_temporary (mode, temp,
889 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
890 }
891 return plus_constant (mode, reg, offset);
892}
893
894void
895aarch64_expand_mov_immediate (rtx dest, rtx imm)
896{
897 enum machine_mode mode = GET_MODE (dest);
898 unsigned HOST_WIDE_INT mask;
899 int i;
900 bool first;
901 unsigned HOST_WIDE_INT val;
902 bool subtargets;
903 rtx subtarget;
904 int one_match, zero_match;
905
906 gcc_assert (mode == SImode || mode == DImode);
907
908 /* Check on what type of symbol it is. */
909 if (GET_CODE (imm) == SYMBOL_REF
910 || GET_CODE (imm) == LABEL_REF
911 || GET_CODE (imm) == CONST)
912 {
913 rtx mem, base, offset;
914 enum aarch64_symbol_type sty;
915
916 /* If we have (const (plus symbol offset)), separate out the offset
917 before we start classifying the symbol. */
918 split_const (imm, &base, &offset);
919
920 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
921 switch (sty)
922 {
923 case SYMBOL_FORCE_TO_MEM:
924 if (offset != const0_rtx
925 && targetm.cannot_force_const_mem (mode, imm))
926 {
927 gcc_assert(can_create_pseudo_p ());
e18b4a81 928 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
929 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
930 aarch64_emit_move (dest, base);
931 return;
932 }
28514dda 933 mem = force_const_mem (ptr_mode, imm);
43e9d192 934 gcc_assert (mem);
28514dda
YZ
935 if (mode != ptr_mode)
936 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
937 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
938 return;
939
940 case SYMBOL_SMALL_TLSGD:
941 case SYMBOL_SMALL_TLSDESC:
942 case SYMBOL_SMALL_GOTTPREL:
943 case SYMBOL_SMALL_GOT:
87dd8ab0 944 case SYMBOL_TINY_GOT:
43e9d192
IB
945 if (offset != const0_rtx)
946 {
947 gcc_assert(can_create_pseudo_p ());
e18b4a81 948 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
949 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
950 aarch64_emit_move (dest, base);
951 return;
952 }
953 /* FALLTHRU */
954
955 case SYMBOL_SMALL_TPREL:
956 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 957 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
958 aarch64_load_symref_appropriately (dest, imm, sty);
959 return;
960
961 default:
962 gcc_unreachable ();
963 }
964 }
965
966 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
967 {
968 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
969 return;
970 }
971
972 if (!CONST_INT_P (imm))
973 {
974 if (GET_CODE (imm) == HIGH)
975 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
976 else
977 {
978 rtx mem = force_const_mem (mode, imm);
979 gcc_assert (mem);
980 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
981 }
982
983 return;
984 }
985
986 if (mode == SImode)
987 {
988 /* We know we can't do this in 1 insn, and we must be able to do it
989 in two; so don't mess around looking for sequences that don't buy
990 us anything. */
991 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
992 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
993 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
994 return;
995 }
996
997 /* Remaining cases are all for DImode. */
998
999 val = INTVAL (imm);
1000 subtargets = optimize && can_create_pseudo_p ();
1001
1002 one_match = 0;
1003 zero_match = 0;
1004 mask = 0xffff;
1005
1006 for (i = 0; i < 64; i += 16, mask <<= 16)
1007 {
1008 if ((val & mask) == 0)
1009 zero_match++;
1010 else if ((val & mask) == mask)
1011 one_match++;
1012 }
1013
1014 if (one_match == 2)
1015 {
1016 mask = 0xffff;
1017 for (i = 0; i < 64; i += 16, mask <<= 16)
1018 {
1019 if ((val & mask) != mask)
1020 {
1021 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1022 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1023 GEN_INT ((val >> i) & 0xffff)));
1024 return;
1025 }
1026 }
1027 gcc_unreachable ();
1028 }
1029
1030 if (zero_match == 2)
1031 goto simple_sequence;
1032
1033 mask = 0x0ffff0000UL;
1034 for (i = 16; i < 64; i += 16, mask <<= 16)
1035 {
1036 HOST_WIDE_INT comp = mask & ~(mask - 1);
1037
1038 if (aarch64_uimm12_shift (val - (val & mask)))
1039 {
1040 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1041
1042 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1043 emit_insn (gen_adddi3 (dest, subtarget,
1044 GEN_INT (val - (val & mask))));
1045 return;
1046 }
1047 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1048 {
1049 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1050
1051 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1052 GEN_INT ((val + comp) & mask)));
1053 emit_insn (gen_adddi3 (dest, subtarget,
1054 GEN_INT (val - ((val + comp) & mask))));
1055 return;
1056 }
1057 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1058 {
1059 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1060
1061 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1062 GEN_INT ((val - comp) | ~mask)));
1063 emit_insn (gen_adddi3 (dest, subtarget,
1064 GEN_INT (val - ((val - comp) | ~mask))));
1065 return;
1066 }
1067 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1068 {
1069 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1070
1071 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1072 GEN_INT (val | ~mask)));
1073 emit_insn (gen_adddi3 (dest, subtarget,
1074 GEN_INT (val - (val | ~mask))));
1075 return;
1076 }
1077 }
1078
1079 /* See if we can do it by arithmetically combining two
1080 immediates. */
1081 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1082 {
1083 int j;
1084 mask = 0xffff;
1085
1086 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1087 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1088 {
1089 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1090 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1091 GEN_INT (aarch64_bitmasks[i])));
1092 emit_insn (gen_adddi3 (dest, subtarget,
1093 GEN_INT (val - aarch64_bitmasks[i])));
1094 return;
1095 }
1096
1097 for (j = 0; j < 64; j += 16, mask <<= 16)
1098 {
1099 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1100 {
1101 emit_insn (gen_rtx_SET (VOIDmode, dest,
1102 GEN_INT (aarch64_bitmasks[i])));
1103 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1104 GEN_INT ((val >> j) & 0xffff)));
1105 return;
1106 }
1107 }
1108 }
1109
1110 /* See if we can do it by logically combining two immediates. */
1111 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1112 {
1113 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1114 {
1115 int j;
1116
1117 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1118 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1119 {
1120 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1121 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1122 GEN_INT (aarch64_bitmasks[i])));
1123 emit_insn (gen_iordi3 (dest, subtarget,
1124 GEN_INT (aarch64_bitmasks[j])));
1125 return;
1126 }
1127 }
1128 else if ((val & aarch64_bitmasks[i]) == val)
1129 {
1130 int j;
1131
1132 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1133 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1134 {
1135
1136 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1137 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1138 GEN_INT (aarch64_bitmasks[j])));
1139 emit_insn (gen_anddi3 (dest, subtarget,
1140 GEN_INT (aarch64_bitmasks[i])));
1141 return;
1142 }
1143 }
1144 }
1145
1146 simple_sequence:
1147 first = true;
1148 mask = 0xffff;
1149 for (i = 0; i < 64; i += 16, mask <<= 16)
1150 {
1151 if ((val & mask) != 0)
1152 {
1153 if (first)
1154 {
1155 emit_insn (gen_rtx_SET (VOIDmode, dest,
1156 GEN_INT (val & mask)));
1157 first = false;
1158 }
1159 else
1160 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1161 GEN_INT ((val >> i) & 0xffff)));
1162 }
1163 }
1164}
1165
1166static bool
1167aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1168{
1169 /* Indirect calls are not currently supported. */
1170 if (decl == NULL)
1171 return false;
1172
1173 /* Cannot tail-call to long-calls, since these are outside of the
1174 range of a branch instruction (we could handle this if we added
1175 support for indirect tail-calls. */
1176 if (aarch64_decl_is_long_call_p (decl))
1177 return false;
1178
1179 return true;
1180}
1181
1182/* Implement TARGET_PASS_BY_REFERENCE. */
1183
1184static bool
1185aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1186 enum machine_mode mode,
1187 const_tree type,
1188 bool named ATTRIBUTE_UNUSED)
1189{
1190 HOST_WIDE_INT size;
1191 enum machine_mode dummymode;
1192 int nregs;
1193
1194 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1195 size = (mode == BLKmode && type)
1196 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1197
1198 if (type)
1199 {
1200 /* Arrays always passed by reference. */
1201 if (TREE_CODE (type) == ARRAY_TYPE)
1202 return true;
1203 /* Other aggregates based on their size. */
1204 if (AGGREGATE_TYPE_P (type))
1205 size = int_size_in_bytes (type);
1206 }
1207
1208 /* Variable sized arguments are always returned by reference. */
1209 if (size < 0)
1210 return true;
1211
1212 /* Can this be a candidate to be passed in fp/simd register(s)? */
1213 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1214 &dummymode, &nregs,
1215 NULL))
1216 return false;
1217
1218 /* Arguments which are variable sized or larger than 2 registers are
1219 passed by reference unless they are a homogenous floating point
1220 aggregate. */
1221 return size > 2 * UNITS_PER_WORD;
1222}
1223
1224/* Return TRUE if VALTYPE is padded to its least significant bits. */
1225static bool
1226aarch64_return_in_msb (const_tree valtype)
1227{
1228 enum machine_mode dummy_mode;
1229 int dummy_int;
1230
1231 /* Never happens in little-endian mode. */
1232 if (!BYTES_BIG_ENDIAN)
1233 return false;
1234
1235 /* Only composite types smaller than or equal to 16 bytes can
1236 be potentially returned in registers. */
1237 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1238 || int_size_in_bytes (valtype) <= 0
1239 || int_size_in_bytes (valtype) > 16)
1240 return false;
1241
1242 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1243 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1244 is always passed/returned in the least significant bits of fp/simd
1245 register(s). */
1246 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1247 &dummy_mode, &dummy_int, NULL))
1248 return false;
1249
1250 return true;
1251}
1252
1253/* Implement TARGET_FUNCTION_VALUE.
1254 Define how to find the value returned by a function. */
1255
1256static rtx
1257aarch64_function_value (const_tree type, const_tree func,
1258 bool outgoing ATTRIBUTE_UNUSED)
1259{
1260 enum machine_mode mode;
1261 int unsignedp;
1262 int count;
1263 enum machine_mode ag_mode;
1264
1265 mode = TYPE_MODE (type);
1266 if (INTEGRAL_TYPE_P (type))
1267 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1268
1269 if (aarch64_return_in_msb (type))
1270 {
1271 HOST_WIDE_INT size = int_size_in_bytes (type);
1272
1273 if (size % UNITS_PER_WORD != 0)
1274 {
1275 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1276 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1277 }
1278 }
1279
1280 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1281 &ag_mode, &count, NULL))
1282 {
1283 if (!aarch64_composite_type_p (type, mode))
1284 {
1285 gcc_assert (count == 1 && mode == ag_mode);
1286 return gen_rtx_REG (mode, V0_REGNUM);
1287 }
1288 else
1289 {
1290 int i;
1291 rtx par;
1292
1293 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1294 for (i = 0; i < count; i++)
1295 {
1296 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1297 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1299 XVECEXP (par, 0, i) = tmp;
1300 }
1301 return par;
1302 }
1303 }
1304 else
1305 return gen_rtx_REG (mode, R0_REGNUM);
1306}
1307
1308/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1309 Return true if REGNO is the number of a hard register in which the values
1310 of called function may come back. */
1311
1312static bool
1313aarch64_function_value_regno_p (const unsigned int regno)
1314{
1315 /* Maximum of 16 bytes can be returned in the general registers. Examples
1316 of 16-byte return values are: 128-bit integers and 16-byte small
1317 structures (excluding homogeneous floating-point aggregates). */
1318 if (regno == R0_REGNUM || regno == R1_REGNUM)
1319 return true;
1320
1321 /* Up to four fp/simd registers can return a function value, e.g. a
1322 homogeneous floating-point aggregate having four members. */
1323 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1324 return !TARGET_GENERAL_REGS_ONLY;
1325
1326 return false;
1327}
1328
1329/* Implement TARGET_RETURN_IN_MEMORY.
1330
1331 If the type T of the result of a function is such that
1332 void func (T arg)
1333 would require that arg be passed as a value in a register (or set of
1334 registers) according to the parameter passing rules, then the result
1335 is returned in the same registers as would be used for such an
1336 argument. */
1337
1338static bool
1339aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1340{
1341 HOST_WIDE_INT size;
1342 enum machine_mode ag_mode;
1343 int count;
1344
1345 if (!AGGREGATE_TYPE_P (type)
1346 && TREE_CODE (type) != COMPLEX_TYPE
1347 && TREE_CODE (type) != VECTOR_TYPE)
1348 /* Simple scalar types always returned in registers. */
1349 return false;
1350
1351 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1352 type,
1353 &ag_mode,
1354 &count,
1355 NULL))
1356 return false;
1357
1358 /* Types larger than 2 registers returned in memory. */
1359 size = int_size_in_bytes (type);
1360 return (size < 0 || size > 2 * UNITS_PER_WORD);
1361}
1362
1363static bool
1364aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1365 const_tree type, int *nregs)
1366{
1367 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1368 return aarch64_vfp_is_call_or_return_candidate (mode,
1369 type,
1370 &pcum->aapcs_vfp_rmode,
1371 nregs,
1372 NULL);
1373}
1374
1375/* Given MODE and TYPE of a function argument, return the alignment in
1376 bits. The idea is to suppress any stronger alignment requested by
1377 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1378 This is a helper function for local use only. */
1379
1380static unsigned int
1381aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1382{
1383 unsigned int alignment;
1384
1385 if (type)
1386 {
1387 if (!integer_zerop (TYPE_SIZE (type)))
1388 {
1389 if (TYPE_MODE (type) == mode)
1390 alignment = TYPE_ALIGN (type);
1391 else
1392 alignment = GET_MODE_ALIGNMENT (mode);
1393 }
1394 else
1395 alignment = 0;
1396 }
1397 else
1398 alignment = GET_MODE_ALIGNMENT (mode);
1399
1400 return alignment;
1401}
1402
1403/* Layout a function argument according to the AAPCS64 rules. The rule
1404 numbers refer to the rule numbers in the AAPCS64. */
1405
1406static void
1407aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1408 const_tree type,
1409 bool named ATTRIBUTE_UNUSED)
1410{
1411 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1412 int ncrn, nvrn, nregs;
1413 bool allocate_ncrn, allocate_nvrn;
1414
1415 /* We need to do this once per argument. */
1416 if (pcum->aapcs_arg_processed)
1417 return;
1418
1419 pcum->aapcs_arg_processed = true;
1420
1421 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1422 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1423 mode,
1424 type,
1425 &nregs);
1426
1427 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1428 The following code thus handles passing by SIMD/FP registers first. */
1429
1430 nvrn = pcum->aapcs_nvrn;
1431
1432 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1433 and homogenous short-vector aggregates (HVA). */
1434 if (allocate_nvrn)
1435 {
1436 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1437 {
1438 pcum->aapcs_nextnvrn = nvrn + nregs;
1439 if (!aarch64_composite_type_p (type, mode))
1440 {
1441 gcc_assert (nregs == 1);
1442 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1443 }
1444 else
1445 {
1446 rtx par;
1447 int i;
1448 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1449 for (i = 0; i < nregs; i++)
1450 {
1451 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1452 V0_REGNUM + nvrn + i);
1453 tmp = gen_rtx_EXPR_LIST
1454 (VOIDmode, tmp,
1455 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1456 XVECEXP (par, 0, i) = tmp;
1457 }
1458 pcum->aapcs_reg = par;
1459 }
1460 return;
1461 }
1462 else
1463 {
1464 /* C.3 NSRN is set to 8. */
1465 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1466 goto on_stack;
1467 }
1468 }
1469
1470 ncrn = pcum->aapcs_ncrn;
1471 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1472 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1473
1474
1475 /* C6 - C9. though the sign and zero extension semantics are
1476 handled elsewhere. This is the case where the argument fits
1477 entirely general registers. */
1478 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1479 {
1480 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1481
1482 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1483
1484 /* C.8 if the argument has an alignment of 16 then the NGRN is
1485 rounded up to the next even number. */
1486 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1487 {
1488 ++ncrn;
1489 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1490 }
1491 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1492 A reg is still generated for it, but the caller should be smart
1493 enough not to use it. */
1494 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1495 {
1496 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1497 }
1498 else
1499 {
1500 rtx par;
1501 int i;
1502
1503 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1504 for (i = 0; i < nregs; i++)
1505 {
1506 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1507 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1508 GEN_INT (i * UNITS_PER_WORD));
1509 XVECEXP (par, 0, i) = tmp;
1510 }
1511 pcum->aapcs_reg = par;
1512 }
1513
1514 pcum->aapcs_nextncrn = ncrn + nregs;
1515 return;
1516 }
1517
1518 /* C.11 */
1519 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1520
1521 /* The argument is passed on stack; record the needed number of words for
1522 this argument (we can re-use NREGS) and align the total size if
1523 necessary. */
1524on_stack:
1525 pcum->aapcs_stack_words = nregs;
1526 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1527 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1528 16 / UNITS_PER_WORD) + 1;
1529 return;
1530}
1531
1532/* Implement TARGET_FUNCTION_ARG. */
1533
1534static rtx
1535aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1536 const_tree type, bool named)
1537{
1538 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1539 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1540
1541 if (mode == VOIDmode)
1542 return NULL_RTX;
1543
1544 aarch64_layout_arg (pcum_v, mode, type, named);
1545 return pcum->aapcs_reg;
1546}
1547
1548void
1549aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1550 const_tree fntype ATTRIBUTE_UNUSED,
1551 rtx libname ATTRIBUTE_UNUSED,
1552 const_tree fndecl ATTRIBUTE_UNUSED,
1553 unsigned n_named ATTRIBUTE_UNUSED)
1554{
1555 pcum->aapcs_ncrn = 0;
1556 pcum->aapcs_nvrn = 0;
1557 pcum->aapcs_nextncrn = 0;
1558 pcum->aapcs_nextnvrn = 0;
1559 pcum->pcs_variant = ARM_PCS_AAPCS64;
1560 pcum->aapcs_reg = NULL_RTX;
1561 pcum->aapcs_arg_processed = false;
1562 pcum->aapcs_stack_words = 0;
1563 pcum->aapcs_stack_size = 0;
1564
1565 return;
1566}
1567
1568static void
1569aarch64_function_arg_advance (cumulative_args_t pcum_v,
1570 enum machine_mode mode,
1571 const_tree type,
1572 bool named)
1573{
1574 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1575 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1576 {
1577 aarch64_layout_arg (pcum_v, mode, type, named);
1578 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1579 != (pcum->aapcs_stack_words != 0));
1580 pcum->aapcs_arg_processed = false;
1581 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1582 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1583 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1584 pcum->aapcs_stack_words = 0;
1585 pcum->aapcs_reg = NULL_RTX;
1586 }
1587}
1588
1589bool
1590aarch64_function_arg_regno_p (unsigned regno)
1591{
1592 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1593 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1594}
1595
1596/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1597 PARM_BOUNDARY bits of alignment, but will be given anything up
1598 to STACK_BOUNDARY bits if the type requires it. This makes sure
1599 that both before and after the layout of each argument, the Next
1600 Stacked Argument Address (NSAA) will have a minimum alignment of
1601 8 bytes. */
1602
1603static unsigned int
1604aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1605{
1606 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1607
1608 if (alignment < PARM_BOUNDARY)
1609 alignment = PARM_BOUNDARY;
1610 if (alignment > STACK_BOUNDARY)
1611 alignment = STACK_BOUNDARY;
1612 return alignment;
1613}
1614
1615/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1616
1617 Return true if an argument passed on the stack should be padded upwards,
1618 i.e. if the least-significant byte of the stack slot has useful data.
1619
1620 Small aggregate types are placed in the lowest memory address.
1621
1622 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1623
1624bool
1625aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1626{
1627 /* On little-endian targets, the least significant byte of every stack
1628 argument is passed at the lowest byte address of the stack slot. */
1629 if (!BYTES_BIG_ENDIAN)
1630 return true;
1631
00edcfbe 1632 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1633 the least significant byte of a stack argument is passed at the highest
1634 byte address of the stack slot. */
1635 if (type
00edcfbe
YZ
1636 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1637 || POINTER_TYPE_P (type))
43e9d192
IB
1638 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1639 return false;
1640
1641 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1642 return true;
1643}
1644
1645/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1646
1647 It specifies padding for the last (may also be the only)
1648 element of a block move between registers and memory. If
1649 assuming the block is in the memory, padding upward means that
1650 the last element is padded after its highest significant byte,
1651 while in downward padding, the last element is padded at the
1652 its least significant byte side.
1653
1654 Small aggregates and small complex types are always padded
1655 upwards.
1656
1657 We don't need to worry about homogeneous floating-point or
1658 short-vector aggregates; their move is not affected by the
1659 padding direction determined here. Regardless of endianness,
1660 each element of such an aggregate is put in the least
1661 significant bits of a fp/simd register.
1662
1663 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1664 register has useful data, and return the opposite if the most
1665 significant byte does. */
1666
1667bool
1668aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1669 bool first ATTRIBUTE_UNUSED)
1670{
1671
1672 /* Small composite types are always padded upward. */
1673 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1674 {
1675 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1676 : GET_MODE_SIZE (mode));
1677 if (size < 2 * UNITS_PER_WORD)
1678 return true;
1679 }
1680
1681 /* Otherwise, use the default padding. */
1682 return !BYTES_BIG_ENDIAN;
1683}
1684
1685static enum machine_mode
1686aarch64_libgcc_cmp_return_mode (void)
1687{
1688 return SImode;
1689}
1690
1691static bool
1692aarch64_frame_pointer_required (void)
1693{
1694 /* If the function contains dynamic stack allocations, we need to
1695 use the frame pointer to access the static parts of the frame. */
1696 if (cfun->calls_alloca)
1697 return true;
1698
1699 /* We may have turned flag_omit_frame_pointer on in order to have this
1700 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1701 and we'll check it here.
1702 If we really did set flag_omit_frame_pointer normally, then we return false
1703 (no frame pointer required) in all cases. */
1704
1705 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1706 return false;
1707 else if (flag_omit_leaf_frame_pointer)
77436791 1708 return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
43e9d192
IB
1709 return true;
1710}
1711
1712/* Mark the registers that need to be saved by the callee and calculate
1713 the size of the callee-saved registers area and frame record (both FP
1714 and LR may be omitted). */
1715static void
1716aarch64_layout_frame (void)
1717{
1718 HOST_WIDE_INT offset = 0;
1719 int regno;
1720
1721 if (reload_completed && cfun->machine->frame.laid_out)
1722 return;
1723
1724 cfun->machine->frame.fp_lr_offset = 0;
1725
1726 /* First mark all the registers that really need to be saved... */
1727 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1728 cfun->machine->frame.reg_offset[regno] = -1;
1729
1730 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1731 cfun->machine->frame.reg_offset[regno] = -1;
1732
1733 /* ... that includes the eh data registers (if needed)... */
1734 if (crtl->calls_eh_return)
1735 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1736 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1737
1738 /* ... and any callee saved register that dataflow says is live. */
1739 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1740 if (df_regs_ever_live_p (regno)
1741 && !call_used_regs[regno])
1742 cfun->machine->frame.reg_offset[regno] = 0;
1743
1744 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1745 if (df_regs_ever_live_p (regno)
1746 && !call_used_regs[regno])
1747 cfun->machine->frame.reg_offset[regno] = 0;
1748
1749 if (frame_pointer_needed)
1750 {
1751 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1752 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1753 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1754 }
1755
1756 /* Now assign stack slots for them. */
1757 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1758 if (cfun->machine->frame.reg_offset[regno] != -1)
1759 {
1760 cfun->machine->frame.reg_offset[regno] = offset;
1761 offset += UNITS_PER_WORD;
1762 }
1763
1764 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1765 if (cfun->machine->frame.reg_offset[regno] != -1)
1766 {
1767 cfun->machine->frame.reg_offset[regno] = offset;
1768 offset += UNITS_PER_WORD;
1769 }
1770
1771 if (frame_pointer_needed)
1772 {
1773 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1774 offset += UNITS_PER_WORD;
1775 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1776 }
1777
1778 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1779 {
1780 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1781 offset += UNITS_PER_WORD;
1782 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1783 }
1784
1785 cfun->machine->frame.padding0 =
1786 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1787 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1788
1789 cfun->machine->frame.saved_regs_size = offset;
1790 cfun->machine->frame.laid_out = true;
1791}
1792
1793/* Make the last instruction frame-related and note that it performs
1794 the operation described by FRAME_PATTERN. */
1795
1796static void
1797aarch64_set_frame_expr (rtx frame_pattern)
1798{
1799 rtx insn;
1800
1801 insn = get_last_insn ();
1802 RTX_FRAME_RELATED_P (insn) = 1;
1803 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1804 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1805 frame_pattern,
1806 REG_NOTES (insn));
1807}
1808
1809static bool
1810aarch64_register_saved_on_entry (int regno)
1811{
1812 return cfun->machine->frame.reg_offset[regno] != -1;
1813}
1814
1815
1816static void
1817aarch64_save_or_restore_fprs (int start_offset, int increment,
1818 bool restore, rtx base_rtx)
1819
1820{
1821 unsigned regno;
1822 unsigned regno2;
1823 rtx insn;
e0f396bc
MS
1824 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1825 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1826
1827
1828 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1829 {
1830 if (aarch64_register_saved_on_entry (regno))
1831 {
1832 rtx mem;
1833 mem = gen_mem_ref (DFmode,
1834 plus_constant (Pmode,
1835 base_rtx,
1836 start_offset));
1837
1838 for (regno2 = regno + 1;
1839 regno2 <= V31_REGNUM
1840 && !aarch64_register_saved_on_entry (regno2);
1841 regno2++)
1842 {
1843 /* Empty loop. */
1844 }
1845 if (regno2 <= V31_REGNUM &&
1846 aarch64_register_saved_on_entry (regno2))
1847 {
1848 rtx mem2;
1849 /* Next highest register to be saved. */
1850 mem2 = gen_mem_ref (DFmode,
1851 plus_constant
1852 (Pmode,
1853 base_rtx,
1854 start_offset + increment));
1855 if (restore == false)
1856 {
1857 insn = emit_insn
1858 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1859 mem2, gen_rtx_REG (DFmode, regno2)));
1860
1861 }
1862 else
1863 {
1864 insn = emit_insn
1865 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1866 gen_rtx_REG (DFmode, regno2), mem2));
1867
e0f396bc
MS
1868 add_reg_note (insn, REG_CFA_RESTORE,
1869 gen_rtx_REG (DFmode, regno));
1870 add_reg_note (insn, REG_CFA_RESTORE,
1871 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1872 }
1873
1874 /* The first part of a frame-related parallel insn
1875 is always assumed to be relevant to the frame
1876 calculations; subsequent parts, are only
1877 frame-related if explicitly marked. */
e0f396bc 1878 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1879 regno = regno2;
1880 start_offset += increment * 2;
1881 }
1882 else
1883 {
1884 if (restore == false)
1885 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1886 else
1887 {
1888 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1889 add_reg_note (insn, REG_CFA_RESTORE,
1890 gen_rtx_REG (DImode, regno));
43e9d192
IB
1891 }
1892 start_offset += increment;
1893 }
1894 RTX_FRAME_RELATED_P (insn) = 1;
1895 }
1896 }
1897
1898}
1899
1900
1901/* offset from the stack pointer of where the saves and
1902 restore's have to happen. */
1903static void
1904aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1905 bool restore)
1906{
1907 rtx insn;
1908 rtx base_rtx = stack_pointer_rtx;
1909 HOST_WIDE_INT start_offset = offset;
1910 HOST_WIDE_INT increment = UNITS_PER_WORD;
1911 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1912 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1913 unsigned regno;
1914 unsigned regno2;
1915
1916 for (regno = R0_REGNUM; regno <= limit; regno++)
1917 {
1918 if (aarch64_register_saved_on_entry (regno))
1919 {
1920 rtx mem;
1921 mem = gen_mem_ref (Pmode,
1922 plus_constant (Pmode,
1923 base_rtx,
1924 start_offset));
1925
1926 for (regno2 = regno + 1;
1927 regno2 <= limit
1928 && !aarch64_register_saved_on_entry (regno2);
1929 regno2++)
1930 {
1931 /* Empty loop. */
1932 }
1933 if (regno2 <= limit &&
1934 aarch64_register_saved_on_entry (regno2))
1935 {
1936 rtx mem2;
1937 /* Next highest register to be saved. */
1938 mem2 = gen_mem_ref (Pmode,
1939 plus_constant
1940 (Pmode,
1941 base_rtx,
1942 start_offset + increment));
1943 if (restore == false)
1944 {
1945 insn = emit_insn
1946 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1947 mem2, gen_rtx_REG (DImode, regno2)));
1948
1949 }
1950 else
1951 {
1952 insn = emit_insn
1953 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1954 gen_rtx_REG (DImode, regno2), mem2));
1955
1956 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1957 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1958 }
1959
1960 /* The first part of a frame-related parallel insn
1961 is always assumed to be relevant to the frame
1962 calculations; subsequent parts, are only
1963 frame-related if explicitly marked. */
1964 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1965 1)) = 1;
1966 regno = regno2;
1967 start_offset += increment * 2;
1968 }
1969 else
1970 {
1971 if (restore == false)
1972 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1973 else
1974 {
1975 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1976 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1977 }
1978 start_offset += increment;
1979 }
1980 RTX_FRAME_RELATED_P (insn) = 1;
1981 }
1982 }
1983
1984 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1985
1986}
1987
1988/* AArch64 stack frames generated by this compiler look like:
1989
1990 +-------------------------------+
1991 | |
1992 | incoming stack arguments |
1993 | |
1994 +-------------------------------+ <-- arg_pointer_rtx
1995 | |
1996 | callee-allocated save area |
1997 | for register varargs |
1998 | |
1999 +-------------------------------+
2000 | |
2001 | local variables |
2002 | |
2003 +-------------------------------+ <-- frame_pointer_rtx
2004 | |
2005 | callee-saved registers |
2006 | |
2007 +-------------------------------+
2008 | LR' |
2009 +-------------------------------+
2010 | FP' |
2011 P +-------------------------------+ <-- hard_frame_pointer_rtx
2012 | dynamic allocation |
2013 +-------------------------------+
2014 | |
2015 | outgoing stack arguments |
2016 | |
2017 +-------------------------------+ <-- stack_pointer_rtx
2018
2019 Dynamic stack allocations such as alloca insert data at point P.
2020 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2021 hard_frame_pointer_rtx unchanged. */
2022
2023/* Generate the prologue instructions for entry into a function.
2024 Establish the stack frame by decreasing the stack pointer with a
2025 properly calculated size and, if necessary, create a frame record
2026 filled with the values of LR and previous frame pointer. The
6991c977 2027 current FP is also set up if it is in use. */
43e9d192
IB
2028
2029void
2030aarch64_expand_prologue (void)
2031{
2032 /* sub sp, sp, #<frame_size>
2033 stp {fp, lr}, [sp, #<frame_size> - 16]
2034 add fp, sp, #<frame_size> - hardfp_offset
2035 stp {cs_reg}, [fp, #-16] etc.
2036
2037 sub sp, sp, <final_adjustment_if_any>
2038 */
2039 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2040 HOST_WIDE_INT frame_size, offset;
2041 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2042 rtx insn;
2043
2044 aarch64_layout_frame ();
2045 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2046 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2047 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2048 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2049 + crtl->outgoing_args_size);
2050 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2051 STACK_BOUNDARY / BITS_PER_UNIT);
2052
2053 if (flag_stack_usage_info)
2054 current_function_static_stack_size = frame_size;
2055
2056 fp_offset = (offset
2057 - original_frame_size
2058 - cfun->machine->frame.saved_regs_size);
2059
44c0e7b9 2060 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2061 if (offset >= 512)
2062 {
2063 /* When the frame has a large size, an initial decrease is done on
2064 the stack pointer to jump over the callee-allocated save area for
2065 register varargs, the local variable area and/or the callee-saved
2066 register area. This will allow the pre-index write-back
2067 store pair instructions to be used for setting up the stack frame
2068 efficiently. */
2069 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2070 if (offset >= 512)
2071 offset = cfun->machine->frame.saved_regs_size;
2072
2073 frame_size -= (offset + crtl->outgoing_args_size);
2074 fp_offset = 0;
2075
2076 if (frame_size >= 0x1000000)
2077 {
2078 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2079 emit_move_insn (op0, GEN_INT (-frame_size));
2080 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2081 aarch64_set_frame_expr (gen_rtx_SET
2082 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2083 plus_constant (Pmode,
2084 stack_pointer_rtx,
2085 -frame_size)));
43e9d192
IB
2086 }
2087 else if (frame_size > 0)
2088 {
2089 if ((frame_size & 0xfff) != frame_size)
2090 {
2091 insn = emit_insn (gen_add2_insn
2092 (stack_pointer_rtx,
2093 GEN_INT (-(frame_size
2094 & ~(HOST_WIDE_INT)0xfff))));
2095 RTX_FRAME_RELATED_P (insn) = 1;
2096 }
2097 if ((frame_size & 0xfff) != 0)
2098 {
2099 insn = emit_insn (gen_add2_insn
2100 (stack_pointer_rtx,
2101 GEN_INT (-(frame_size
2102 & (HOST_WIDE_INT)0xfff))));
2103 RTX_FRAME_RELATED_P (insn) = 1;
2104 }
2105 }
2106 }
2107 else
2108 frame_size = -1;
2109
2110 if (offset > 0)
2111 {
2112 /* Save the frame pointer and lr if the frame pointer is needed
2113 first. Make the frame pointer point to the location of the
2114 old frame pointer on the stack. */
2115 if (frame_pointer_needed)
2116 {
2117 rtx mem_fp, mem_lr;
2118
2119 if (fp_offset)
2120 {
2121 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2122 GEN_INT (-offset)));
2123 RTX_FRAME_RELATED_P (insn) = 1;
2124 aarch64_set_frame_expr (gen_rtx_SET
2125 (Pmode, stack_pointer_rtx,
2126 gen_rtx_MINUS (Pmode,
2127 stack_pointer_rtx,
2128 GEN_INT (offset))));
2129 mem_fp = gen_frame_mem (DImode,
2130 plus_constant (Pmode,
2131 stack_pointer_rtx,
2132 fp_offset));
2133 mem_lr = gen_frame_mem (DImode,
2134 plus_constant (Pmode,
2135 stack_pointer_rtx,
2136 fp_offset
2137 + UNITS_PER_WORD));
2138 insn = emit_insn (gen_store_pairdi (mem_fp,
2139 hard_frame_pointer_rtx,
2140 mem_lr,
2141 gen_rtx_REG (DImode,
2142 LR_REGNUM)));
2143 }
2144 else
2145 {
2146 insn = emit_insn (gen_storewb_pairdi_di
2147 (stack_pointer_rtx, stack_pointer_rtx,
2148 hard_frame_pointer_rtx,
2149 gen_rtx_REG (DImode, LR_REGNUM),
2150 GEN_INT (-offset),
2151 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2152 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2153 }
2154
2155 /* The first part of a frame-related parallel insn is always
2156 assumed to be relevant to the frame calculations;
2157 subsequent parts, are only frame-related if explicitly
2158 marked. */
2159 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2160 RTX_FRAME_RELATED_P (insn) = 1;
2161
2162 /* Set up frame pointer to point to the location of the
2163 previous frame pointer on the stack. */
2164 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2165 stack_pointer_rtx,
2166 GEN_INT (fp_offset)));
2167 aarch64_set_frame_expr (gen_rtx_SET
2168 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2169 plus_constant (Pmode,
2170 stack_pointer_rtx,
2171 fp_offset)));
43e9d192
IB
2172 RTX_FRAME_RELATED_P (insn) = 1;
2173 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2174 hard_frame_pointer_rtx));
2175 }
2176 else
2177 {
2178 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2179 GEN_INT (-offset)));
2180 RTX_FRAME_RELATED_P (insn) = 1;
2181 }
2182
2183 aarch64_save_or_restore_callee_save_registers
2184 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2185 }
2186
2187 /* when offset >= 512,
2188 sub sp, sp, #<outgoing_args_size> */
2189 if (frame_size > -1)
2190 {
2191 if (crtl->outgoing_args_size > 0)
2192 {
2193 insn = emit_insn (gen_add2_insn
2194 (stack_pointer_rtx,
2195 GEN_INT (- crtl->outgoing_args_size)));
2196 RTX_FRAME_RELATED_P (insn) = 1;
2197 }
2198 }
2199}
2200
2201/* Generate the epilogue instructions for returning from a function. */
2202void
2203aarch64_expand_epilogue (bool for_sibcall)
2204{
2205 HOST_WIDE_INT original_frame_size, frame_size, offset;
2206 HOST_WIDE_INT fp_offset;
2207 rtx insn;
44c0e7b9 2208 rtx cfa_reg;
43e9d192
IB
2209
2210 aarch64_layout_frame ();
2211 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2212 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2213 + crtl->outgoing_args_size);
2214 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2215 STACK_BOUNDARY / BITS_PER_UNIT);
2216
2217 fp_offset = (offset
2218 - original_frame_size
2219 - cfun->machine->frame.saved_regs_size);
2220
44c0e7b9
YZ
2221 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2222
2223 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2224 if (offset >= 512)
2225 {
2226 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2227 if (offset >= 512)
2228 offset = cfun->machine->frame.saved_regs_size;
2229
2230 frame_size -= (offset + crtl->outgoing_args_size);
2231 fp_offset = 0;
2232 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2233 {
2234 insn = emit_insn (gen_add2_insn
2235 (stack_pointer_rtx,
2236 GEN_INT (crtl->outgoing_args_size)));
2237 RTX_FRAME_RELATED_P (insn) = 1;
2238 }
2239 }
2240 else
2241 frame_size = -1;
2242
2243 /* If there were outgoing arguments or we've done dynamic stack
2244 allocation, then restore the stack pointer from the frame
2245 pointer. This is at most one insn and more efficient than using
2246 GCC's internal mechanism. */
2247 if (frame_pointer_needed
2248 && (crtl->outgoing_args_size || cfun->calls_alloca))
2249 {
2250 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2251 hard_frame_pointer_rtx,
2252 GEN_INT (- fp_offset)));
2253 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2254 /* As SP is set to (FP - fp_offset), according to the rules in
2255 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2256 from the value of SP from now on. */
2257 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2258 }
2259
2260 aarch64_save_or_restore_callee_save_registers
2261 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2262
2263 /* Restore the frame pointer and lr if the frame pointer is needed. */
2264 if (offset > 0)
2265 {
2266 if (frame_pointer_needed)
2267 {
2268 rtx mem_fp, mem_lr;
2269
2270 if (fp_offset)
2271 {
2272 mem_fp = gen_frame_mem (DImode,
2273 plus_constant (Pmode,
2274 stack_pointer_rtx,
2275 fp_offset));
2276 mem_lr = gen_frame_mem (DImode,
2277 plus_constant (Pmode,
2278 stack_pointer_rtx,
2279 fp_offset
2280 + UNITS_PER_WORD));
2281 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2282 mem_fp,
2283 gen_rtx_REG (DImode,
2284 LR_REGNUM),
2285 mem_lr));
2286 }
2287 else
2288 {
2289 insn = emit_insn (gen_loadwb_pairdi_di
2290 (stack_pointer_rtx,
2291 stack_pointer_rtx,
2292 hard_frame_pointer_rtx,
2293 gen_rtx_REG (DImode, LR_REGNUM),
2294 GEN_INT (offset),
2295 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2296 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2297 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2298 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2299 plus_constant (Pmode, cfa_reg,
2300 offset))));
43e9d192
IB
2301 }
2302
2303 /* The first part of a frame-related parallel insn
2304 is always assumed to be relevant to the frame
2305 calculations; subsequent parts, are only
2306 frame-related if explicitly marked. */
2307 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2308 RTX_FRAME_RELATED_P (insn) = 1;
2309 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2310 add_reg_note (insn, REG_CFA_RESTORE,
2311 gen_rtx_REG (DImode, LR_REGNUM));
2312
2313 if (fp_offset)
2314 {
2315 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2316 GEN_INT (offset)));
2317 RTX_FRAME_RELATED_P (insn) = 1;
2318 }
2319 }
43e9d192
IB
2320 else
2321 {
2322 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2323 GEN_INT (offset)));
2324 RTX_FRAME_RELATED_P (insn) = 1;
2325 }
2326 }
2327
2328 /* Stack adjustment for exception handler. */
2329 if (crtl->calls_eh_return)
2330 {
2331 /* We need to unwind the stack by the offset computed by
2332 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2333 based on SP. Ideally we would update the SP and define the
2334 CFA along the lines of:
2335
2336 SP = SP + EH_RETURN_STACKADJ_RTX
2337 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2338
2339 However the dwarf emitter only understands a constant
2340 register offset.
2341
631b20a7 2342 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2343 as a temporary register to hold the current SP value. The
2344 CFA is described using IP0 then SP is modified. */
2345
2346 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2347
2348 insn = emit_move_insn (ip0, stack_pointer_rtx);
2349 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2350 RTX_FRAME_RELATED_P (insn) = 1;
2351
2352 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2353
2354 /* Ensure the assignment to IP0 does not get optimized away. */
2355 emit_use (ip0);
2356 }
2357
2358 if (frame_size > -1)
2359 {
2360 if (frame_size >= 0x1000000)
2361 {
2362 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2363 emit_move_insn (op0, GEN_INT (frame_size));
2364 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2365 aarch64_set_frame_expr (gen_rtx_SET
2366 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2367 plus_constant (Pmode,
2368 stack_pointer_rtx,
2369 frame_size)));
43e9d192
IB
2370 }
2371 else if (frame_size > 0)
2372 {
2373 if ((frame_size & 0xfff) != 0)
2374 {
2375 insn = emit_insn (gen_add2_insn
2376 (stack_pointer_rtx,
2377 GEN_INT ((frame_size
2378 & (HOST_WIDE_INT) 0xfff))));
2379 RTX_FRAME_RELATED_P (insn) = 1;
2380 }
2381 if ((frame_size & 0xfff) != frame_size)
2382 {
2383 insn = emit_insn (gen_add2_insn
2384 (stack_pointer_rtx,
2385 GEN_INT ((frame_size
2386 & ~ (HOST_WIDE_INT) 0xfff))));
2387 RTX_FRAME_RELATED_P (insn) = 1;
2388 }
2389 }
2390
f6fe771a
RL
2391 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2392 plus_constant (Pmode,
2393 stack_pointer_rtx,
2394 offset)));
43e9d192
IB
2395 }
2396
2397 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2398 if (!for_sibcall)
2399 emit_jump_insn (ret_rtx);
2400}
2401
2402/* Return the place to copy the exception unwinding return address to.
2403 This will probably be a stack slot, but could (in theory be the
2404 return register). */
2405rtx
2406aarch64_final_eh_return_addr (void)
2407{
2408 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2409 aarch64_layout_frame ();
2410 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2411 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2412 + crtl->outgoing_args_size);
2413 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2414 STACK_BOUNDARY / BITS_PER_UNIT);
2415 fp_offset = offset
2416 - original_frame_size
2417 - cfun->machine->frame.saved_regs_size;
2418
2419 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2420 return gen_rtx_REG (DImode, LR_REGNUM);
2421
2422 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2423 result in a store to save LR introduced by builtin_eh_return () being
2424 incorrectly deleted because the alias is not detected.
2425 So in the calculation of the address to copy the exception unwinding
2426 return address to, we note 2 cases.
2427 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2428 we return a SP-relative location since all the addresses are SP-relative
2429 in this case. This prevents the store from being optimized away.
2430 If the fp_offset is not 0, then the addresses will be FP-relative and
2431 therefore we return a FP-relative location. */
2432
2433 if (frame_pointer_needed)
2434 {
2435 if (fp_offset)
2436 return gen_frame_mem (DImode,
2437 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2438 else
2439 return gen_frame_mem (DImode,
2440 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2441 }
2442
2443 /* If FP is not needed, we calculate the location of LR, which would be
2444 at the top of the saved registers block. */
2445
2446 return gen_frame_mem (DImode,
2447 plus_constant (Pmode,
2448 stack_pointer_rtx,
2449 fp_offset
2450 + cfun->machine->frame.saved_regs_size
2451 - 2 * UNITS_PER_WORD));
2452}
2453
2454/* Output code to build up a constant in a register. */
2455static void
d9600ae5 2456aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2457{
2458 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2459 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2460 else
2461 {
2462 int i;
2463 int ncount = 0;
2464 int zcount = 0;
2465 HOST_WIDE_INT valp = val >> 16;
2466 HOST_WIDE_INT valm;
2467 HOST_WIDE_INT tval;
2468
2469 for (i = 16; i < 64; i += 16)
2470 {
2471 valm = (valp & 0xffff);
2472
2473 if (valm != 0)
2474 ++ zcount;
2475
2476 if (valm != 0xffff)
2477 ++ ncount;
2478
2479 valp >>= 16;
2480 }
2481
2482 /* zcount contains the number of additional MOVK instructions
2483 required if the constant is built up with an initial MOVZ instruction,
2484 while ncount is the number of MOVK instructions required if starting
2485 with a MOVN instruction. Choose the sequence that yields the fewest
2486 number of instructions, preferring MOVZ instructions when they are both
2487 the same. */
2488 if (ncount < zcount)
2489 {
d9600ae5 2490 emit_move_insn (gen_rtx_REG (Pmode, regnum),
d103f29b 2491 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192
IB
2492 tval = 0xffff;
2493 }
2494 else
2495 {
d9600ae5
SN
2496 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2497 GEN_INT (val & 0xffff));
43e9d192
IB
2498 tval = 0;
2499 }
2500
2501 val >>= 16;
2502
2503 for (i = 16; i < 64; i += 16)
2504 {
2505 if ((val & 0xffff) != tval)
d9600ae5
SN
2506 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2507 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2508 val >>= 16;
2509 }
2510 }
2511}
2512
2513static void
d9600ae5 2514aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2515{
2516 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2517 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2518 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2519
2520 if (mdelta < 0)
2521 mdelta = -mdelta;
2522
2523 if (mdelta >= 4096 * 4096)
2524 {
d9600ae5
SN
2525 aarch64_build_constant (scratchreg, delta);
2526 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2527 }
2528 else if (mdelta > 0)
2529 {
43e9d192 2530 if (mdelta >= 4096)
d9600ae5
SN
2531 {
2532 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2533 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2534 if (delta < 0)
2535 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2536 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2537 else
2538 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2539 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2540 }
43e9d192 2541 if (mdelta % 4096 != 0)
d9600ae5
SN
2542 {
2543 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2544 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2545 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2546 }
43e9d192
IB
2547 }
2548}
2549
2550/* Output code to add DELTA to the first argument, and then jump
2551 to FUNCTION. Used for C++ multiple inheritance. */
2552static void
2553aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2554 HOST_WIDE_INT delta,
2555 HOST_WIDE_INT vcall_offset,
2556 tree function)
2557{
2558 /* The this pointer is always in x0. Note that this differs from
2559 Arm where the this pointer maybe bumped to r1 if r0 is required
2560 to return a pointer to an aggregate. On AArch64 a result value
2561 pointer will be in x8. */
2562 int this_regno = R0_REGNUM;
75f1d6fc 2563 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2564
75f1d6fc
SN
2565 reload_completed = 1;
2566 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2567
2568 if (vcall_offset == 0)
d9600ae5 2569 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2570 else
2571 {
28514dda 2572 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2573
75f1d6fc
SN
2574 this_rtx = gen_rtx_REG (Pmode, this_regno);
2575 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2576 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2577
75f1d6fc
SN
2578 addr = this_rtx;
2579 if (delta != 0)
2580 {
2581 if (delta >= -256 && delta < 256)
2582 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2583 plus_constant (Pmode, this_rtx, delta));
2584 else
d9600ae5 2585 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2586 }
2587
28514dda
YZ
2588 if (Pmode == ptr_mode)
2589 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2590 else
2591 aarch64_emit_move (temp0,
2592 gen_rtx_ZERO_EXTEND (Pmode,
2593 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2594
28514dda 2595 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2596 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2597 else
2598 {
d9600ae5 2599 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2600 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2601 }
2602
28514dda
YZ
2603 if (Pmode == ptr_mode)
2604 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2605 else
2606 aarch64_emit_move (temp1,
2607 gen_rtx_SIGN_EXTEND (Pmode,
2608 gen_rtx_MEM (ptr_mode, addr)));
2609
75f1d6fc 2610 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2611 }
2612
75f1d6fc
SN
2613 /* Generate a tail call to the target function. */
2614 if (!TREE_USED (function))
2615 {
2616 assemble_external (function);
2617 TREE_USED (function) = 1;
2618 }
2619 funexp = XEXP (DECL_RTL (function), 0);
2620 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2621 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2622 SIBLING_CALL_P (insn) = 1;
2623
2624 insn = get_insns ();
2625 shorten_branches (insn);
2626 final_start_function (insn, file, 1);
2627 final (insn, file, 1);
43e9d192 2628 final_end_function ();
75f1d6fc
SN
2629
2630 /* Stop pretending to be a post-reload pass. */
2631 reload_completed = 0;
43e9d192
IB
2632}
2633
43e9d192
IB
2634static int
2635aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2636{
2637 if (GET_CODE (*x) == SYMBOL_REF)
2638 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2639
2640 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2641 TLS offsets, not real symbol references. */
2642 if (GET_CODE (*x) == UNSPEC
2643 && XINT (*x, 1) == UNSPEC_TLS)
2644 return -1;
2645
2646 return 0;
2647}
2648
2649static bool
2650aarch64_tls_referenced_p (rtx x)
2651{
2652 if (!TARGET_HAVE_TLS)
2653 return false;
2654
2655 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2656}
2657
2658
2659static int
2660aarch64_bitmasks_cmp (const void *i1, const void *i2)
2661{
2662 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2663 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2664
2665 if (*imm1 < *imm2)
2666 return -1;
2667 if (*imm1 > *imm2)
2668 return +1;
2669 return 0;
2670}
2671
2672
2673static void
2674aarch64_build_bitmask_table (void)
2675{
2676 unsigned HOST_WIDE_INT mask, imm;
2677 unsigned int log_e, e, s, r;
2678 unsigned int nimms = 0;
2679
2680 for (log_e = 1; log_e <= 6; log_e++)
2681 {
2682 e = 1 << log_e;
2683 if (e == 64)
2684 mask = ~(HOST_WIDE_INT) 0;
2685 else
2686 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2687 for (s = 1; s < e; s++)
2688 {
2689 for (r = 0; r < e; r++)
2690 {
2691 /* set s consecutive bits to 1 (s < 64) */
2692 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2693 /* rotate right by r */
2694 if (r != 0)
2695 imm = ((imm >> r) | (imm << (e - r))) & mask;
2696 /* replicate the constant depending on SIMD size */
2697 switch (log_e) {
2698 case 1: imm |= (imm << 2);
2699 case 2: imm |= (imm << 4);
2700 case 3: imm |= (imm << 8);
2701 case 4: imm |= (imm << 16);
2702 case 5: imm |= (imm << 32);
2703 case 6:
2704 break;
2705 default:
2706 gcc_unreachable ();
2707 }
2708 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2709 aarch64_bitmasks[nimms++] = imm;
2710 }
2711 }
2712 }
2713
2714 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2715 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2716 aarch64_bitmasks_cmp);
2717}
2718
2719
2720/* Return true if val can be encoded as a 12-bit unsigned immediate with
2721 a left shift of 0 or 12 bits. */
2722bool
2723aarch64_uimm12_shift (HOST_WIDE_INT val)
2724{
2725 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2726 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2727 );
2728}
2729
2730
2731/* Return true if val is an immediate that can be loaded into a
2732 register by a MOVZ instruction. */
2733static bool
2734aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2735{
2736 if (GET_MODE_SIZE (mode) > 4)
2737 {
2738 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2739 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2740 return 1;
2741 }
2742 else
2743 {
2744 /* Ignore sign extension. */
2745 val &= (HOST_WIDE_INT) 0xffffffff;
2746 }
2747 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2748 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2749}
2750
2751
2752/* Return true if val is a valid bitmask immediate. */
2753bool
2754aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2755{
2756 if (GET_MODE_SIZE (mode) < 8)
2757 {
2758 /* Replicate bit pattern. */
2759 val &= (HOST_WIDE_INT) 0xffffffff;
2760 val |= val << 32;
2761 }
2762 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2763 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2764}
2765
2766
2767/* Return true if val is an immediate that can be loaded into a
2768 register in a single instruction. */
2769bool
2770aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2771{
2772 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2773 return 1;
2774 return aarch64_bitmask_imm (val, mode);
2775}
2776
2777static bool
2778aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2779{
2780 rtx base, offset;
7eda14e1 2781
43e9d192
IB
2782 if (GET_CODE (x) == HIGH)
2783 return true;
2784
2785 split_const (x, &base, &offset);
2786 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2787 {
2788 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2789 != SYMBOL_FORCE_TO_MEM)
2790 return true;
2791 else
2792 /* Avoid generating a 64-bit relocation in ILP32; leave
2793 to aarch64_expand_mov_immediate to handle it properly. */
2794 return mode != ptr_mode;
2795 }
43e9d192
IB
2796
2797 return aarch64_tls_referenced_p (x);
2798}
2799
2800/* Return true if register REGNO is a valid index register.
2801 STRICT_P is true if REG_OK_STRICT is in effect. */
2802
2803bool
2804aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2805{
2806 if (!HARD_REGISTER_NUM_P (regno))
2807 {
2808 if (!strict_p)
2809 return true;
2810
2811 if (!reg_renumber)
2812 return false;
2813
2814 regno = reg_renumber[regno];
2815 }
2816 return GP_REGNUM_P (regno);
2817}
2818
2819/* Return true if register REGNO is a valid base register for mode MODE.
2820 STRICT_P is true if REG_OK_STRICT is in effect. */
2821
2822bool
2823aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2824{
2825 if (!HARD_REGISTER_NUM_P (regno))
2826 {
2827 if (!strict_p)
2828 return true;
2829
2830 if (!reg_renumber)
2831 return false;
2832
2833 regno = reg_renumber[regno];
2834 }
2835
2836 /* The fake registers will be eliminated to either the stack or
2837 hard frame pointer, both of which are usually valid base registers.
2838 Reload deals with the cases where the eliminated form isn't valid. */
2839 return (GP_REGNUM_P (regno)
2840 || regno == SP_REGNUM
2841 || regno == FRAME_POINTER_REGNUM
2842 || regno == ARG_POINTER_REGNUM);
2843}
2844
2845/* Return true if X is a valid base register for mode MODE.
2846 STRICT_P is true if REG_OK_STRICT is in effect. */
2847
2848static bool
2849aarch64_base_register_rtx_p (rtx x, bool strict_p)
2850{
2851 if (!strict_p && GET_CODE (x) == SUBREG)
2852 x = SUBREG_REG (x);
2853
2854 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2855}
2856
2857/* Return true if address offset is a valid index. If it is, fill in INFO
2858 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2859
2860static bool
2861aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2862 enum machine_mode mode, bool strict_p)
2863{
2864 enum aarch64_address_type type;
2865 rtx index;
2866 int shift;
2867
2868 /* (reg:P) */
2869 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2870 && GET_MODE (x) == Pmode)
2871 {
2872 type = ADDRESS_REG_REG;
2873 index = x;
2874 shift = 0;
2875 }
2876 /* (sign_extend:DI (reg:SI)) */
2877 else if ((GET_CODE (x) == SIGN_EXTEND
2878 || GET_CODE (x) == ZERO_EXTEND)
2879 && GET_MODE (x) == DImode
2880 && GET_MODE (XEXP (x, 0)) == SImode)
2881 {
2882 type = (GET_CODE (x) == SIGN_EXTEND)
2883 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2884 index = XEXP (x, 0);
2885 shift = 0;
2886 }
2887 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2888 else if (GET_CODE (x) == MULT
2889 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2890 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2891 && GET_MODE (XEXP (x, 0)) == DImode
2892 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2893 && CONST_INT_P (XEXP (x, 1)))
2894 {
2895 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2896 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2897 index = XEXP (XEXP (x, 0), 0);
2898 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2899 }
2900 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2901 else if (GET_CODE (x) == ASHIFT
2902 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2903 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2904 && GET_MODE (XEXP (x, 0)) == DImode
2905 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2906 && CONST_INT_P (XEXP (x, 1)))
2907 {
2908 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2909 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2910 index = XEXP (XEXP (x, 0), 0);
2911 shift = INTVAL (XEXP (x, 1));
2912 }
2913 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2914 else if ((GET_CODE (x) == SIGN_EXTRACT
2915 || GET_CODE (x) == ZERO_EXTRACT)
2916 && GET_MODE (x) == DImode
2917 && GET_CODE (XEXP (x, 0)) == MULT
2918 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2919 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2920 {
2921 type = (GET_CODE (x) == SIGN_EXTRACT)
2922 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2923 index = XEXP (XEXP (x, 0), 0);
2924 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2925 if (INTVAL (XEXP (x, 1)) != 32 + shift
2926 || INTVAL (XEXP (x, 2)) != 0)
2927 shift = -1;
2928 }
2929 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2930 (const_int 0xffffffff<<shift)) */
2931 else if (GET_CODE (x) == AND
2932 && GET_MODE (x) == DImode
2933 && GET_CODE (XEXP (x, 0)) == MULT
2934 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2935 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2936 && CONST_INT_P (XEXP (x, 1)))
2937 {
2938 type = ADDRESS_REG_UXTW;
2939 index = XEXP (XEXP (x, 0), 0);
2940 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2941 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2942 shift = -1;
2943 }
2944 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2945 else if ((GET_CODE (x) == SIGN_EXTRACT
2946 || GET_CODE (x) == ZERO_EXTRACT)
2947 && GET_MODE (x) == DImode
2948 && GET_CODE (XEXP (x, 0)) == ASHIFT
2949 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2950 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2951 {
2952 type = (GET_CODE (x) == SIGN_EXTRACT)
2953 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2954 index = XEXP (XEXP (x, 0), 0);
2955 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2956 if (INTVAL (XEXP (x, 1)) != 32 + shift
2957 || INTVAL (XEXP (x, 2)) != 0)
2958 shift = -1;
2959 }
2960 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2961 (const_int 0xffffffff<<shift)) */
2962 else if (GET_CODE (x) == AND
2963 && GET_MODE (x) == DImode
2964 && GET_CODE (XEXP (x, 0)) == ASHIFT
2965 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2966 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2967 && CONST_INT_P (XEXP (x, 1)))
2968 {
2969 type = ADDRESS_REG_UXTW;
2970 index = XEXP (XEXP (x, 0), 0);
2971 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2972 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2973 shift = -1;
2974 }
2975 /* (mult:P (reg:P) (const_int scale)) */
2976 else if (GET_CODE (x) == MULT
2977 && GET_MODE (x) == Pmode
2978 && GET_MODE (XEXP (x, 0)) == Pmode
2979 && CONST_INT_P (XEXP (x, 1)))
2980 {
2981 type = ADDRESS_REG_REG;
2982 index = XEXP (x, 0);
2983 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2984 }
2985 /* (ashift:P (reg:P) (const_int shift)) */
2986 else if (GET_CODE (x) == ASHIFT
2987 && GET_MODE (x) == Pmode
2988 && GET_MODE (XEXP (x, 0)) == Pmode
2989 && CONST_INT_P (XEXP (x, 1)))
2990 {
2991 type = ADDRESS_REG_REG;
2992 index = XEXP (x, 0);
2993 shift = INTVAL (XEXP (x, 1));
2994 }
2995 else
2996 return false;
2997
2998 if (GET_CODE (index) == SUBREG)
2999 index = SUBREG_REG (index);
3000
3001 if ((shift == 0 ||
3002 (shift > 0 && shift <= 3
3003 && (1 << shift) == GET_MODE_SIZE (mode)))
3004 && REG_P (index)
3005 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3006 {
3007 info->type = type;
3008 info->offset = index;
3009 info->shift = shift;
3010 return true;
3011 }
3012
3013 return false;
3014}
3015
3016static inline bool
3017offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3018{
3019 return (offset >= -64 * GET_MODE_SIZE (mode)
3020 && offset < 64 * GET_MODE_SIZE (mode)
3021 && offset % GET_MODE_SIZE (mode) == 0);
3022}
3023
3024static inline bool
3025offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3026 HOST_WIDE_INT offset)
3027{
3028 return offset >= -256 && offset < 256;
3029}
3030
3031static inline bool
3032offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3033{
3034 return (offset >= 0
3035 && offset < 4096 * GET_MODE_SIZE (mode)
3036 && offset % GET_MODE_SIZE (mode) == 0);
3037}
3038
3039/* Return true if X is a valid address for machine mode MODE. If it is,
3040 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3041 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3042
3043static bool
3044aarch64_classify_address (struct aarch64_address_info *info,
3045 rtx x, enum machine_mode mode,
3046 RTX_CODE outer_code, bool strict_p)
3047{
3048 enum rtx_code code = GET_CODE (x);
3049 rtx op0, op1;
3050 bool allow_reg_index_p =
3051 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3052
3053 /* Don't support anything other than POST_INC or REG addressing for
3054 AdvSIMD. */
3055 if (aarch64_vector_mode_p (mode)
3056 && (code != POST_INC && code != REG))
3057 return false;
3058
3059 switch (code)
3060 {
3061 case REG:
3062 case SUBREG:
3063 info->type = ADDRESS_REG_IMM;
3064 info->base = x;
3065 info->offset = const0_rtx;
3066 return aarch64_base_register_rtx_p (x, strict_p);
3067
3068 case PLUS:
3069 op0 = XEXP (x, 0);
3070 op1 = XEXP (x, 1);
3071 if (GET_MODE_SIZE (mode) != 0
3072 && CONST_INT_P (op1)
3073 && aarch64_base_register_rtx_p (op0, strict_p))
3074 {
3075 HOST_WIDE_INT offset = INTVAL (op1);
3076
3077 info->type = ADDRESS_REG_IMM;
3078 info->base = op0;
3079 info->offset = op1;
3080
3081 /* TImode and TFmode values are allowed in both pairs of X
3082 registers and individual Q registers. The available
3083 address modes are:
3084 X,X: 7-bit signed scaled offset
3085 Q: 9-bit signed offset
3086 We conservatively require an offset representable in either mode.
3087 */
3088 if (mode == TImode || mode == TFmode)
3089 return (offset_7bit_signed_scaled_p (mode, offset)
3090 && offset_9bit_signed_unscaled_p (mode, offset));
3091
3092 if (outer_code == PARALLEL)
3093 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3094 && offset_7bit_signed_scaled_p (mode, offset));
3095 else
3096 return (offset_9bit_signed_unscaled_p (mode, offset)
3097 || offset_12bit_unsigned_scaled_p (mode, offset));
3098 }
3099
3100 if (allow_reg_index_p)
3101 {
3102 /* Look for base + (scaled/extended) index register. */
3103 if (aarch64_base_register_rtx_p (op0, strict_p)
3104 && aarch64_classify_index (info, op1, mode, strict_p))
3105 {
3106 info->base = op0;
3107 return true;
3108 }
3109 if (aarch64_base_register_rtx_p (op1, strict_p)
3110 && aarch64_classify_index (info, op0, mode, strict_p))
3111 {
3112 info->base = op1;
3113 return true;
3114 }
3115 }
3116
3117 return false;
3118
3119 case POST_INC:
3120 case POST_DEC:
3121 case PRE_INC:
3122 case PRE_DEC:
3123 info->type = ADDRESS_REG_WB;
3124 info->base = XEXP (x, 0);
3125 info->offset = NULL_RTX;
3126 return aarch64_base_register_rtx_p (info->base, strict_p);
3127
3128 case POST_MODIFY:
3129 case PRE_MODIFY:
3130 info->type = ADDRESS_REG_WB;
3131 info->base = XEXP (x, 0);
3132 if (GET_CODE (XEXP (x, 1)) == PLUS
3133 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3134 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3135 && aarch64_base_register_rtx_p (info->base, strict_p))
3136 {
3137 HOST_WIDE_INT offset;
3138 info->offset = XEXP (XEXP (x, 1), 1);
3139 offset = INTVAL (info->offset);
3140
3141 /* TImode and TFmode values are allowed in both pairs of X
3142 registers and individual Q registers. The available
3143 address modes are:
3144 X,X: 7-bit signed scaled offset
3145 Q: 9-bit signed offset
3146 We conservatively require an offset representable in either mode.
3147 */
3148 if (mode == TImode || mode == TFmode)
3149 return (offset_7bit_signed_scaled_p (mode, offset)
3150 && offset_9bit_signed_unscaled_p (mode, offset));
3151
3152 if (outer_code == PARALLEL)
3153 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3154 && offset_7bit_signed_scaled_p (mode, offset));
3155 else
3156 return offset_9bit_signed_unscaled_p (mode, offset);
3157 }
3158 return false;
3159
3160 case CONST:
3161 case SYMBOL_REF:
3162 case LABEL_REF:
79517551
SN
3163 /* load literal: pc-relative constant pool entry. Only supported
3164 for SI mode or larger. */
43e9d192 3165 info->type = ADDRESS_SYMBOLIC;
79517551 3166 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3167 {
3168 rtx sym, addend;
3169
3170 split_const (x, &sym, &addend);
3171 return (GET_CODE (sym) == LABEL_REF
3172 || (GET_CODE (sym) == SYMBOL_REF
3173 && CONSTANT_POOL_ADDRESS_P (sym)));
3174 }
3175 return false;
3176
3177 case LO_SUM:
3178 info->type = ADDRESS_LO_SUM;
3179 info->base = XEXP (x, 0);
3180 info->offset = XEXP (x, 1);
3181 if (allow_reg_index_p
3182 && aarch64_base_register_rtx_p (info->base, strict_p))
3183 {
3184 rtx sym, offs;
3185 split_const (info->offset, &sym, &offs);
3186 if (GET_CODE (sym) == SYMBOL_REF
3187 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3188 == SYMBOL_SMALL_ABSOLUTE))
3189 {
3190 /* The symbol and offset must be aligned to the access size. */
3191 unsigned int align;
3192 unsigned int ref_size;
3193
3194 if (CONSTANT_POOL_ADDRESS_P (sym))
3195 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3196 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3197 {
3198 tree exp = SYMBOL_REF_DECL (sym);
3199 align = TYPE_ALIGN (TREE_TYPE (exp));
3200 align = CONSTANT_ALIGNMENT (exp, align);
3201 }
3202 else if (SYMBOL_REF_DECL (sym))
3203 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3204 else
3205 align = BITS_PER_UNIT;
3206
3207 ref_size = GET_MODE_SIZE (mode);
3208 if (ref_size == 0)
3209 ref_size = GET_MODE_SIZE (DImode);
3210
3211 return ((INTVAL (offs) & (ref_size - 1)) == 0
3212 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3213 }
3214 }
3215 return false;
3216
3217 default:
3218 return false;
3219 }
3220}
3221
3222bool
3223aarch64_symbolic_address_p (rtx x)
3224{
3225 rtx offset;
3226
3227 split_const (x, &x, &offset);
3228 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3229}
3230
3231/* Classify the base of symbolic expression X, given that X appears in
3232 context CONTEXT. */
da4f13a4
MS
3233
3234enum aarch64_symbol_type
3235aarch64_classify_symbolic_expression (rtx x,
3236 enum aarch64_symbol_context context)
43e9d192
IB
3237{
3238 rtx offset;
da4f13a4 3239
43e9d192
IB
3240 split_const (x, &x, &offset);
3241 return aarch64_classify_symbol (x, context);
3242}
3243
3244
3245/* Return TRUE if X is a legitimate address for accessing memory in
3246 mode MODE. */
3247static bool
3248aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3249{
3250 struct aarch64_address_info addr;
3251
3252 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3253}
3254
3255/* Return TRUE if X is a legitimate address for accessing memory in
3256 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3257 pair operation. */
3258bool
3259aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3260 RTX_CODE outer_code, bool strict_p)
3261{
3262 struct aarch64_address_info addr;
3263
3264 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3265}
3266
3267/* Return TRUE if rtx X is immediate constant 0.0 */
3268bool
3520f7cc 3269aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3270{
3271 REAL_VALUE_TYPE r;
3272
3273 if (GET_MODE (x) == VOIDmode)
3274 return false;
3275
3276 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3277 if (REAL_VALUE_MINUS_ZERO (r))
3278 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3279 return REAL_VALUES_EQUAL (r, dconst0);
3280}
3281
70f09188
AP
3282/* Return the fixed registers used for condition codes. */
3283
3284static bool
3285aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3286{
3287 *p1 = CC_REGNUM;
3288 *p2 = INVALID_REGNUM;
3289 return true;
3290}
3291
43e9d192
IB
3292enum machine_mode
3293aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3294{
3295 /* All floating point compares return CCFP if it is an equality
3296 comparison, and CCFPE otherwise. */
3297 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3298 {
3299 switch (code)
3300 {
3301 case EQ:
3302 case NE:
3303 case UNORDERED:
3304 case ORDERED:
3305 case UNLT:
3306 case UNLE:
3307 case UNGT:
3308 case UNGE:
3309 case UNEQ:
3310 case LTGT:
3311 return CCFPmode;
3312
3313 case LT:
3314 case LE:
3315 case GT:
3316 case GE:
3317 return CCFPEmode;
3318
3319 default:
3320 gcc_unreachable ();
3321 }
3322 }
3323
3324 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3325 && y == const0_rtx
3326 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3327 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3328 || GET_CODE (x) == NEG))
43e9d192
IB
3329 return CC_NZmode;
3330
1c992d1e 3331 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3332 the comparison will have to be swapped when we emit the assembly
3333 code. */
3334 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3335 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3336 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3337 || GET_CODE (x) == LSHIFTRT
1c992d1e 3338 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3339 return CC_SWPmode;
3340
1c992d1e
RE
3341 /* Similarly for a negated operand, but we can only do this for
3342 equalities. */
3343 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3344 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3345 && (code == EQ || code == NE)
3346 && GET_CODE (x) == NEG)
3347 return CC_Zmode;
3348
43e9d192
IB
3349 /* A compare of a mode narrower than SI mode against zero can be done
3350 by extending the value in the comparison. */
3351 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3352 && y == const0_rtx)
3353 /* Only use sign-extension if we really need it. */
3354 return ((code == GT || code == GE || code == LE || code == LT)
3355 ? CC_SESWPmode : CC_ZESWPmode);
3356
3357 /* For everything else, return CCmode. */
3358 return CCmode;
3359}
3360
3361static unsigned
3362aarch64_get_condition_code (rtx x)
3363{
3364 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3365 enum rtx_code comp_code = GET_CODE (x);
3366
3367 if (GET_MODE_CLASS (mode) != MODE_CC)
3368 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3369
3370 switch (mode)
3371 {
3372 case CCFPmode:
3373 case CCFPEmode:
3374 switch (comp_code)
3375 {
3376 case GE: return AARCH64_GE;
3377 case GT: return AARCH64_GT;
3378 case LE: return AARCH64_LS;
3379 case LT: return AARCH64_MI;
3380 case NE: return AARCH64_NE;
3381 case EQ: return AARCH64_EQ;
3382 case ORDERED: return AARCH64_VC;
3383 case UNORDERED: return AARCH64_VS;
3384 case UNLT: return AARCH64_LT;
3385 case UNLE: return AARCH64_LE;
3386 case UNGT: return AARCH64_HI;
3387 case UNGE: return AARCH64_PL;
3388 default: gcc_unreachable ();
3389 }
3390 break;
3391
3392 case CCmode:
3393 switch (comp_code)
3394 {
3395 case NE: return AARCH64_NE;
3396 case EQ: return AARCH64_EQ;
3397 case GE: return AARCH64_GE;
3398 case GT: return AARCH64_GT;
3399 case LE: return AARCH64_LE;
3400 case LT: return AARCH64_LT;
3401 case GEU: return AARCH64_CS;
3402 case GTU: return AARCH64_HI;
3403 case LEU: return AARCH64_LS;
3404 case LTU: return AARCH64_CC;
3405 default: gcc_unreachable ();
3406 }
3407 break;
3408
3409 case CC_SWPmode:
3410 case CC_ZESWPmode:
3411 case CC_SESWPmode:
3412 switch (comp_code)
3413 {
3414 case NE: return AARCH64_NE;
3415 case EQ: return AARCH64_EQ;
3416 case GE: return AARCH64_LE;
3417 case GT: return AARCH64_LT;
3418 case LE: return AARCH64_GE;
3419 case LT: return AARCH64_GT;
3420 case GEU: return AARCH64_LS;
3421 case GTU: return AARCH64_CC;
3422 case LEU: return AARCH64_CS;
3423 case LTU: return AARCH64_HI;
3424 default: gcc_unreachable ();
3425 }
3426 break;
3427
3428 case CC_NZmode:
3429 switch (comp_code)
3430 {
3431 case NE: return AARCH64_NE;
3432 case EQ: return AARCH64_EQ;
3433 case GE: return AARCH64_PL;
3434 case LT: return AARCH64_MI;
3435 default: gcc_unreachable ();
3436 }
3437 break;
3438
1c992d1e
RE
3439 case CC_Zmode:
3440 switch (comp_code)
3441 {
3442 case NE: return AARCH64_NE;
3443 case EQ: return AARCH64_EQ;
3444 default: gcc_unreachable ();
3445 }
3446 break;
3447
43e9d192
IB
3448 default:
3449 gcc_unreachable ();
3450 break;
3451 }
3452}
3453
3454static unsigned
3455bit_count (unsigned HOST_WIDE_INT value)
3456{
3457 unsigned count = 0;
3458
3459 while (value)
3460 {
3461 count++;
3462 value &= value - 1;
3463 }
3464
3465 return count;
3466}
3467
3468void
3469aarch64_print_operand (FILE *f, rtx x, char code)
3470{
3471 switch (code)
3472 {
f541a481
KT
3473 /* An integer or symbol address without a preceding # sign. */
3474 case 'c':
3475 switch (GET_CODE (x))
3476 {
3477 case CONST_INT:
3478 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3479 break;
3480
3481 case SYMBOL_REF:
3482 output_addr_const (f, x);
3483 break;
3484
3485 case CONST:
3486 if (GET_CODE (XEXP (x, 0)) == PLUS
3487 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3488 {
3489 output_addr_const (f, x);
3490 break;
3491 }
3492 /* Fall through. */
3493
3494 default:
3495 output_operand_lossage ("Unsupported operand for code '%c'", code);
3496 }
3497 break;
3498
43e9d192
IB
3499 case 'e':
3500 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3501 {
3502 int n;
3503
3504 if (GET_CODE (x) != CONST_INT
3505 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3506 {
3507 output_operand_lossage ("invalid operand for '%%%c'", code);
3508 return;
3509 }
3510
3511 switch (n)
3512 {
3513 case 3:
3514 fputc ('b', f);
3515 break;
3516 case 4:
3517 fputc ('h', f);
3518 break;
3519 case 5:
3520 fputc ('w', f);
3521 break;
3522 default:
3523 output_operand_lossage ("invalid operand for '%%%c'", code);
3524 return;
3525 }
3526 }
3527 break;
3528
3529 case 'p':
3530 {
3531 int n;
3532
3533 /* Print N such that 2^N == X. */
3534 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3535 {
3536 output_operand_lossage ("invalid operand for '%%%c'", code);
3537 return;
3538 }
3539
3540 asm_fprintf (f, "%d", n);
3541 }
3542 break;
3543
3544 case 'P':
3545 /* Print the number of non-zero bits in X (a const_int). */
3546 if (GET_CODE (x) != CONST_INT)
3547 {
3548 output_operand_lossage ("invalid operand for '%%%c'", code);
3549 return;
3550 }
3551
3552 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3553 break;
3554
3555 case 'H':
3556 /* Print the higher numbered register of a pair (TImode) of regs. */
3557 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3558 {
3559 output_operand_lossage ("invalid operand for '%%%c'", code);
3560 return;
3561 }
3562
01a3a324 3563 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3564 break;
3565
43e9d192
IB
3566 case 'm':
3567 /* Print a condition (eq, ne, etc). */
3568
3569 /* CONST_TRUE_RTX means always -- that's the default. */
3570 if (x == const_true_rtx)
3571 return;
3572
3573 if (!COMPARISON_P (x))
3574 {
3575 output_operand_lossage ("invalid operand for '%%%c'", code);
3576 return;
3577 }
3578
3579 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3580 break;
3581
3582 case 'M':
3583 /* Print the inverse of a condition (eq <-> ne, etc). */
3584
3585 /* CONST_TRUE_RTX means never -- that's the default. */
3586 if (x == const_true_rtx)
3587 {
3588 fputs ("nv", f);
3589 return;
3590 }
3591
3592 if (!COMPARISON_P (x))
3593 {
3594 output_operand_lossage ("invalid operand for '%%%c'", code);
3595 return;
3596 }
3597
3598 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3599 (aarch64_get_condition_code (x))], f);
3600 break;
3601
3602 case 'b':
3603 case 'h':
3604 case 's':
3605 case 'd':
3606 case 'q':
3607 /* Print a scalar FP/SIMD register name. */
3608 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3609 {
3610 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3611 return;
3612 }
50ce6f88 3613 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3614 break;
3615
3616 case 'S':
3617 case 'T':
3618 case 'U':
3619 case 'V':
3620 /* Print the first FP/SIMD register name in a list. */
3621 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3622 {
3623 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3624 return;
3625 }
50ce6f88 3626 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3627 break;
3628
a05c0ddf 3629 case 'X':
50d38551 3630 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3631 if (GET_CODE (x) != CONST_INT)
3632 {
3633 output_operand_lossage ("invalid operand for '%%%c'", code);
3634 return;
3635 }
50d38551 3636 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3637 break;
3638
43e9d192
IB
3639 case 'w':
3640 case 'x':
3641 /* Print a general register name or the zero register (32-bit or
3642 64-bit). */
3520f7cc
JG
3643 if (x == const0_rtx
3644 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3645 {
50ce6f88 3646 asm_fprintf (f, "%czr", code);
43e9d192
IB
3647 break;
3648 }
3649
3650 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3651 {
50ce6f88 3652 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3653 break;
3654 }
3655
3656 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3657 {
50ce6f88 3658 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3659 break;
3660 }
3661
3662 /* Fall through */
3663
3664 case 0:
3665 /* Print a normal operand, if it's a general register, then we
3666 assume DImode. */
3667 if (x == NULL)
3668 {
3669 output_operand_lossage ("missing operand");
3670 return;
3671 }
3672
3673 switch (GET_CODE (x))
3674 {
3675 case REG:
01a3a324 3676 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3677 break;
3678
3679 case MEM:
3680 aarch64_memory_reference_mode = GET_MODE (x);
3681 output_address (XEXP (x, 0));
3682 break;
3683
3684 case LABEL_REF:
3685 case SYMBOL_REF:
3686 output_addr_const (asm_out_file, x);
3687 break;
3688
3689 case CONST_INT:
3690 asm_fprintf (f, "%wd", INTVAL (x));
3691 break;
3692
3693 case CONST_VECTOR:
3520f7cc
JG
3694 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3695 {
3696 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3697 HOST_WIDE_INT_MIN,
3698 HOST_WIDE_INT_MAX));
3699 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3700 }
3701 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3702 {
3703 fputc ('0', f);
3704 }
3705 else
3706 gcc_unreachable ();
43e9d192
IB
3707 break;
3708
3520f7cc
JG
3709 case CONST_DOUBLE:
3710 /* CONST_DOUBLE can represent a double-width integer.
3711 In this case, the mode of x is VOIDmode. */
3712 if (GET_MODE (x) == VOIDmode)
3713 ; /* Do Nothing. */
3714 else if (aarch64_float_const_zero_rtx_p (x))
3715 {
3716 fputc ('0', f);
3717 break;
3718 }
3719 else if (aarch64_float_const_representable_p (x))
3720 {
3721#define buf_size 20
3722 char float_buf[buf_size] = {'\0'};
3723 REAL_VALUE_TYPE r;
3724 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3725 real_to_decimal_for_mode (float_buf, &r,
3726 buf_size, buf_size,
3727 1, GET_MODE (x));
3728 asm_fprintf (asm_out_file, "%s", float_buf);
3729 break;
3730#undef buf_size
3731 }
3732 output_operand_lossage ("invalid constant");
3733 return;
43e9d192
IB
3734 default:
3735 output_operand_lossage ("invalid operand");
3736 return;
3737 }
3738 break;
3739
3740 case 'A':
3741 if (GET_CODE (x) == HIGH)
3742 x = XEXP (x, 0);
3743
3744 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3745 {
3746 case SYMBOL_SMALL_GOT:
3747 asm_fprintf (asm_out_file, ":got:");
3748 break;
3749
3750 case SYMBOL_SMALL_TLSGD:
3751 asm_fprintf (asm_out_file, ":tlsgd:");
3752 break;
3753
3754 case SYMBOL_SMALL_TLSDESC:
3755 asm_fprintf (asm_out_file, ":tlsdesc:");
3756 break;
3757
3758 case SYMBOL_SMALL_GOTTPREL:
3759 asm_fprintf (asm_out_file, ":gottprel:");
3760 break;
3761
3762 case SYMBOL_SMALL_TPREL:
3763 asm_fprintf (asm_out_file, ":tprel:");
3764 break;
3765
87dd8ab0
MS
3766 case SYMBOL_TINY_GOT:
3767 gcc_unreachable ();
3768 break;
3769
43e9d192
IB
3770 default:
3771 break;
3772 }
3773 output_addr_const (asm_out_file, x);
3774 break;
3775
3776 case 'L':
3777 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3778 {
3779 case SYMBOL_SMALL_GOT:
3780 asm_fprintf (asm_out_file, ":lo12:");
3781 break;
3782
3783 case SYMBOL_SMALL_TLSGD:
3784 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3785 break;
3786
3787 case SYMBOL_SMALL_TLSDESC:
3788 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3789 break;
3790
3791 case SYMBOL_SMALL_GOTTPREL:
3792 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3793 break;
3794
3795 case SYMBOL_SMALL_TPREL:
3796 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3797 break;
3798
87dd8ab0
MS
3799 case SYMBOL_TINY_GOT:
3800 asm_fprintf (asm_out_file, ":got:");
3801 break;
3802
43e9d192
IB
3803 default:
3804 break;
3805 }
3806 output_addr_const (asm_out_file, x);
3807 break;
3808
3809 case 'G':
3810
3811 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3812 {
3813 case SYMBOL_SMALL_TPREL:
3814 asm_fprintf (asm_out_file, ":tprel_hi12:");
3815 break;
3816 default:
3817 break;
3818 }
3819 output_addr_const (asm_out_file, x);
3820 break;
3821
3822 default:
3823 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3824 return;
3825 }
3826}
3827
3828void
3829aarch64_print_operand_address (FILE *f, rtx x)
3830{
3831 struct aarch64_address_info addr;
3832
3833 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3834 MEM, true))
3835 switch (addr.type)
3836 {
3837 case ADDRESS_REG_IMM:
3838 if (addr.offset == const0_rtx)
01a3a324 3839 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3840 else
01a3a324 3841 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3842 INTVAL (addr.offset));
3843 return;
3844
3845 case ADDRESS_REG_REG:
3846 if (addr.shift == 0)
01a3a324
N
3847 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3848 reg_names [REGNO (addr.offset)]);
43e9d192 3849 else
01a3a324
N
3850 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3851 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3852 return;
3853
3854 case ADDRESS_REG_UXTW:
3855 if (addr.shift == 0)
01a3a324 3856 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3857 REGNO (addr.offset) - R0_REGNUM);
3858 else
01a3a324 3859 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3860 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3861 return;
3862
3863 case ADDRESS_REG_SXTW:
3864 if (addr.shift == 0)
01a3a324 3865 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3866 REGNO (addr.offset) - R0_REGNUM);
3867 else
01a3a324 3868 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3869 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3870 return;
3871
3872 case ADDRESS_REG_WB:
3873 switch (GET_CODE (x))
3874 {
3875 case PRE_INC:
01a3a324 3876 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3877 GET_MODE_SIZE (aarch64_memory_reference_mode));
3878 return;
3879 case POST_INC:
01a3a324 3880 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3881 GET_MODE_SIZE (aarch64_memory_reference_mode));
3882 return;
3883 case PRE_DEC:
01a3a324 3884 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3885 GET_MODE_SIZE (aarch64_memory_reference_mode));
3886 return;
3887 case POST_DEC:
01a3a324 3888 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3889 GET_MODE_SIZE (aarch64_memory_reference_mode));
3890 return;
3891 case PRE_MODIFY:
01a3a324 3892 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3893 INTVAL (addr.offset));
3894 return;
3895 case POST_MODIFY:
01a3a324 3896 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3897 INTVAL (addr.offset));
3898 return;
3899 default:
3900 break;
3901 }
3902 break;
3903
3904 case ADDRESS_LO_SUM:
01a3a324 3905 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3906 output_addr_const (f, addr.offset);
3907 asm_fprintf (f, "]");
3908 return;
3909
3910 case ADDRESS_SYMBOLIC:
3911 break;
3912 }
3913
3914 output_addr_const (f, x);
3915}
3916
43e9d192
IB
3917bool
3918aarch64_label_mentioned_p (rtx x)
3919{
3920 const char *fmt;
3921 int i;
3922
3923 if (GET_CODE (x) == LABEL_REF)
3924 return true;
3925
3926 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3927 referencing instruction, but they are constant offsets, not
3928 symbols. */
3929 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3930 return false;
3931
3932 fmt = GET_RTX_FORMAT (GET_CODE (x));
3933 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3934 {
3935 if (fmt[i] == 'E')
3936 {
3937 int j;
3938
3939 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3940 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3941 return 1;
3942 }
3943 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3944 return 1;
3945 }
3946
3947 return 0;
3948}
3949
3950/* Implement REGNO_REG_CLASS. */
3951
3952enum reg_class
3953aarch64_regno_regclass (unsigned regno)
3954{
3955 if (GP_REGNUM_P (regno))
3956 return CORE_REGS;
3957
3958 if (regno == SP_REGNUM)
3959 return STACK_REG;
3960
3961 if (regno == FRAME_POINTER_REGNUM
3962 || regno == ARG_POINTER_REGNUM)
f24bb080 3963 return POINTER_REGS;
43e9d192
IB
3964
3965 if (FP_REGNUM_P (regno))
3966 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3967
3968 return NO_REGS;
3969}
3970
3971/* Try a machine-dependent way of reloading an illegitimate address
3972 operand. If we find one, push the reload and return the new rtx. */
3973
3974rtx
3975aarch64_legitimize_reload_address (rtx *x_p,
3976 enum machine_mode mode,
3977 int opnum, int type,
3978 int ind_levels ATTRIBUTE_UNUSED)
3979{
3980 rtx x = *x_p;
3981
3982 /* Do not allow mem (plus (reg, const)) if vector mode. */
3983 if (aarch64_vector_mode_p (mode)
3984 && GET_CODE (x) == PLUS
3985 && REG_P (XEXP (x, 0))
3986 && CONST_INT_P (XEXP (x, 1)))
3987 {
3988 rtx orig_rtx = x;
3989 x = copy_rtx (x);
3990 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3991 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3992 opnum, (enum reload_type) type);
3993 return x;
3994 }
3995
3996 /* We must recognize output that we have already generated ourselves. */
3997 if (GET_CODE (x) == PLUS
3998 && GET_CODE (XEXP (x, 0)) == PLUS
3999 && REG_P (XEXP (XEXP (x, 0), 0))
4000 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4001 && CONST_INT_P (XEXP (x, 1)))
4002 {
4003 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4004 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4005 opnum, (enum reload_type) type);
4006 return x;
4007 }
4008
4009 /* We wish to handle large displacements off a base register by splitting
4010 the addend across an add and the mem insn. This can cut the number of
4011 extra insns needed from 3 to 1. It is only useful for load/store of a
4012 single register with 12 bit offset field. */
4013 if (GET_CODE (x) == PLUS
4014 && REG_P (XEXP (x, 0))
4015 && CONST_INT_P (XEXP (x, 1))
4016 && HARD_REGISTER_P (XEXP (x, 0))
4017 && mode != TImode
4018 && mode != TFmode
4019 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4020 {
4021 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4022 HOST_WIDE_INT low = val & 0xfff;
4023 HOST_WIDE_INT high = val - low;
4024 HOST_WIDE_INT offs;
4025 rtx cst;
28514dda
YZ
4026 enum machine_mode xmode = GET_MODE (x);
4027
4028 /* In ILP32, xmode can be either DImode or SImode. */
4029 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4030
4031 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4032 BLKmode alignment. */
4033 if (GET_MODE_SIZE (mode) == 0)
4034 return NULL_RTX;
4035
4036 offs = low % GET_MODE_SIZE (mode);
4037
4038 /* Align misaligned offset by adjusting high part to compensate. */
4039 if (offs != 0)
4040 {
4041 if (aarch64_uimm12_shift (high + offs))
4042 {
4043 /* Align down. */
4044 low = low - offs;
4045 high = high + offs;
4046 }
4047 else
4048 {
4049 /* Align up. */
4050 offs = GET_MODE_SIZE (mode) - offs;
4051 low = low + offs;
4052 high = high + (low & 0x1000) - offs;
4053 low &= 0xfff;
4054 }
4055 }
4056
4057 /* Check for overflow. */
4058 if (high + low != val)
4059 return NULL_RTX;
4060
4061 cst = GEN_INT (high);
4062 if (!aarch64_uimm12_shift (high))
28514dda 4063 cst = force_const_mem (xmode, cst);
43e9d192
IB
4064
4065 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4066 in the mem instruction.
4067 Note that replacing this gen_rtx_PLUS with plus_constant is
4068 wrong in this case because we rely on the
4069 (plus (plus reg c1) c2) structure being preserved so that
4070 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4071 x = gen_rtx_PLUS (xmode,
4072 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4073 GEN_INT (low));
43e9d192
IB
4074
4075 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4076 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4077 opnum, (enum reload_type) type);
4078 return x;
4079 }
4080
4081 return NULL_RTX;
4082}
4083
4084
4085static reg_class_t
4086aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4087 reg_class_t rclass,
4088 enum machine_mode mode,
4089 secondary_reload_info *sri)
4090{
43e9d192
IB
4091 /* Without the TARGET_SIMD instructions we cannot move a Q register
4092 to a Q register directly. We need a scratch. */
4093 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4094 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4095 && reg_class_subset_p (rclass, FP_REGS))
4096 {
4097 if (mode == TFmode)
4098 sri->icode = CODE_FOR_aarch64_reload_movtf;
4099 else if (mode == TImode)
4100 sri->icode = CODE_FOR_aarch64_reload_movti;
4101 return NO_REGS;
4102 }
4103
4104 /* A TFmode or TImode memory access should be handled via an FP_REGS
4105 because AArch64 has richer addressing modes for LDR/STR instructions
4106 than LDP/STP instructions. */
4107 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4108 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4109 return FP_REGS;
4110
4111 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4112 return CORE_REGS;
4113
4114 return NO_REGS;
4115}
4116
4117static bool
4118aarch64_can_eliminate (const int from, const int to)
4119{
4120 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4121 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4122
4123 if (frame_pointer_needed)
4124 {
4125 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4126 return true;
4127 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4128 return false;
4129 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4130 && !cfun->calls_alloca)
4131 return true;
4132 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4133 return true;
4134 return false;
4135 }
4136 else
4137 {
777e6976
IB
4138 /* If we decided that we didn't need a leaf frame pointer but then used
4139 LR in the function, then we'll want a frame pointer after all, so
4140 prevent this elimination to ensure a frame pointer is used.
4141
4142 NOTE: the original value of flag_omit_frame_pointer gets trashed
4143 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4144 of faked_omit_frame_pointer here (which is true when we always
4145 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4146 pointers when LR is clobbered). */
77436791 4147 if (to == STACK_POINTER_REGNUM
777e6976
IB
4148 && df_regs_ever_live_p (LR_REGNUM)
4149 && faked_omit_frame_pointer)
43e9d192
IB
4150 return false;
4151 }
777e6976 4152
43e9d192
IB
4153 return true;
4154}
4155
4156HOST_WIDE_INT
4157aarch64_initial_elimination_offset (unsigned from, unsigned to)
4158{
4159 HOST_WIDE_INT frame_size;
4160 HOST_WIDE_INT offset;
4161
4162 aarch64_layout_frame ();
4163 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4164 + crtl->outgoing_args_size
4165 + cfun->machine->saved_varargs_size);
4166
4167 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4168 offset = frame_size;
4169
4170 if (to == HARD_FRAME_POINTER_REGNUM)
4171 {
4172 if (from == ARG_POINTER_REGNUM)
4173 return offset - crtl->outgoing_args_size;
4174
4175 if (from == FRAME_POINTER_REGNUM)
6991c977 4176 return cfun->machine->frame.saved_regs_size + get_frame_size ();
43e9d192
IB
4177 }
4178
4179 if (to == STACK_POINTER_REGNUM)
4180 {
4181 if (from == FRAME_POINTER_REGNUM)
4182 {
4183 HOST_WIDE_INT elim = crtl->outgoing_args_size
4184 + cfun->machine->frame.saved_regs_size
6991c977 4185 + get_frame_size ()
43e9d192
IB
4186 - cfun->machine->frame.fp_lr_offset;
4187 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4188 return elim;
4189 }
4190 }
4191
4192 return offset;
4193}
4194
4195
4196/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4197 previous frame. */
4198
4199rtx
4200aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4201{
4202 if (count != 0)
4203 return const0_rtx;
4204 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4205}
4206
4207
4208static void
4209aarch64_asm_trampoline_template (FILE *f)
4210{
28514dda
YZ
4211 if (TARGET_ILP32)
4212 {
4213 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4214 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4215 }
4216 else
4217 {
4218 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4219 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4220 }
01a3a324 4221 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4222 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4223 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4224 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4225}
4226
4227static void
4228aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4229{
4230 rtx fnaddr, mem, a_tramp;
28514dda 4231 const int tramp_code_sz = 16;
43e9d192
IB
4232
4233 /* Don't need to copy the trailing D-words, we fill those in below. */
4234 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4235 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4236 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4237 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4238 if (GET_MODE (fnaddr) != ptr_mode)
4239 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4240 emit_move_insn (mem, fnaddr);
4241
28514dda 4242 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4243 emit_move_insn (mem, chain_value);
4244
4245 /* XXX We should really define a "clear_cache" pattern and use
4246 gen_clear_cache(). */
4247 a_tramp = XEXP (m_tramp, 0);
4248 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4249 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4250 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4251 ptr_mode);
43e9d192
IB
4252}
4253
4254static unsigned char
4255aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4256{
4257 switch (regclass)
4258 {
4259 case CORE_REGS:
4260 case POINTER_REGS:
4261 case GENERAL_REGS:
4262 case ALL_REGS:
4263 case FP_REGS:
4264 case FP_LO_REGS:
4265 return
4266 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4267 (GET_MODE_SIZE (mode) + 7) / 8;
4268 case STACK_REG:
4269 return 1;
4270
4271 case NO_REGS:
4272 return 0;
4273
4274 default:
4275 break;
4276 }
4277 gcc_unreachable ();
4278}
4279
4280static reg_class_t
78d8b9f0 4281aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4282{
51bb310d 4283 if (regclass == POINTER_REGS)
78d8b9f0
IB
4284 return GENERAL_REGS;
4285
51bb310d
MS
4286 if (regclass == STACK_REG)
4287 {
4288 if (REG_P(x)
4289 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4290 return regclass;
4291
4292 return NO_REGS;
4293 }
4294
78d8b9f0
IB
4295 /* If it's an integer immediate that MOVI can't handle, then
4296 FP_REGS is not an option, so we return NO_REGS instead. */
4297 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4298 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4299 return NO_REGS;
4300
27bd251b
IB
4301 /* Register eliminiation can result in a request for
4302 SP+constant->FP_REGS. We cannot support such operations which
4303 use SP as source and an FP_REG as destination, so reject out
4304 right now. */
4305 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4306 {
4307 rtx lhs = XEXP (x, 0);
4308
4309 /* Look through a possible SUBREG introduced by ILP32. */
4310 if (GET_CODE (lhs) == SUBREG)
4311 lhs = SUBREG_REG (lhs);
4312
4313 gcc_assert (REG_P (lhs));
4314 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4315 POINTER_REGS));
4316 return NO_REGS;
4317 }
4318
78d8b9f0 4319 return regclass;
43e9d192
IB
4320}
4321
4322void
4323aarch64_asm_output_labelref (FILE* f, const char *name)
4324{
4325 asm_fprintf (f, "%U%s", name);
4326}
4327
4328static void
4329aarch64_elf_asm_constructor (rtx symbol, int priority)
4330{
4331 if (priority == DEFAULT_INIT_PRIORITY)
4332 default_ctor_section_asm_out_constructor (symbol, priority);
4333 else
4334 {
4335 section *s;
4336 char buf[18];
4337 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4338 s = get_section (buf, SECTION_WRITE, NULL);
4339 switch_to_section (s);
4340 assemble_align (POINTER_SIZE);
28514dda 4341 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4342 }
4343}
4344
4345static void
4346aarch64_elf_asm_destructor (rtx symbol, int priority)
4347{
4348 if (priority == DEFAULT_INIT_PRIORITY)
4349 default_dtor_section_asm_out_destructor (symbol, priority);
4350 else
4351 {
4352 section *s;
4353 char buf[18];
4354 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4355 s = get_section (buf, SECTION_WRITE, NULL);
4356 switch_to_section (s);
4357 assemble_align (POINTER_SIZE);
28514dda 4358 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4359 }
4360}
4361
4362const char*
4363aarch64_output_casesi (rtx *operands)
4364{
4365 char buf[100];
4366 char label[100];
592a16fc 4367 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4368 int index;
4369 static const char *const patterns[4][2] =
4370 {
4371 {
4372 "ldrb\t%w3, [%0,%w1,uxtw]",
4373 "add\t%3, %4, %w3, sxtb #2"
4374 },
4375 {
4376 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4377 "add\t%3, %4, %w3, sxth #2"
4378 },
4379 {
4380 "ldr\t%w3, [%0,%w1,uxtw #2]",
4381 "add\t%3, %4, %w3, sxtw #2"
4382 },
4383 /* We assume that DImode is only generated when not optimizing and
4384 that we don't really need 64-bit address offsets. That would
4385 imply an object file with 8GB of code in a single function! */
4386 {
4387 "ldr\t%w3, [%0,%w1,uxtw #2]",
4388 "add\t%3, %4, %w3, sxtw #2"
4389 }
4390 };
4391
4392 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4393
4394 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4395
4396 gcc_assert (index >= 0 && index <= 3);
4397
4398 /* Need to implement table size reduction, by chaning the code below. */
4399 output_asm_insn (patterns[index][0], operands);
4400 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4401 snprintf (buf, sizeof (buf),
4402 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4403 output_asm_insn (buf, operands);
4404 output_asm_insn (patterns[index][1], operands);
4405 output_asm_insn ("br\t%3", operands);
4406 assemble_label (asm_out_file, label);
4407 return "";
4408}
4409
4410
4411/* Return size in bits of an arithmetic operand which is shifted/scaled and
4412 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4413 operator. */
4414
4415int
4416aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4417{
4418 if (shift >= 0 && shift <= 3)
4419 {
4420 int size;
4421 for (size = 8; size <= 32; size *= 2)
4422 {
4423 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4424 if (mask == bits << shift)
4425 return size;
4426 }
4427 }
4428 return 0;
4429}
4430
4431static bool
4432aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4433 const_rtx x ATTRIBUTE_UNUSED)
4434{
4435 /* We can't use blocks for constants when we're using a per-function
4436 constant pool. */
4437 return false;
4438}
4439
4440static section *
4441aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4442 rtx x ATTRIBUTE_UNUSED,
4443 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4444{
4445 /* Force all constant pool entries into the current function section. */
4446 return function_section (current_function_decl);
4447}
4448
4449
4450/* Costs. */
4451
4452/* Helper function for rtx cost calculation. Strip a shift expression
4453 from X. Returns the inner operand if successful, or the original
4454 expression on failure. */
4455static rtx
4456aarch64_strip_shift (rtx x)
4457{
4458 rtx op = x;
4459
4460 if ((GET_CODE (op) == ASHIFT
4461 || GET_CODE (op) == ASHIFTRT
4462 || GET_CODE (op) == LSHIFTRT)
4463 && CONST_INT_P (XEXP (op, 1)))
4464 return XEXP (op, 0);
4465
4466 if (GET_CODE (op) == MULT
4467 && CONST_INT_P (XEXP (op, 1))
4468 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4469 return XEXP (op, 0);
4470
4471 return x;
4472}
4473
4474/* Helper function for rtx cost calculation. Strip a shift or extend
4475 expression from X. Returns the inner operand if successful, or the
4476 original expression on failure. We deal with a number of possible
4477 canonicalization variations here. */
4478static rtx
4479aarch64_strip_shift_or_extend (rtx x)
4480{
4481 rtx op = x;
4482
4483 /* Zero and sign extraction of a widened value. */
4484 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4485 && XEXP (op, 2) == const0_rtx
4486 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4487 XEXP (op, 1)))
4488 return XEXP (XEXP (op, 0), 0);
4489
4490 /* It can also be represented (for zero-extend) as an AND with an
4491 immediate. */
4492 if (GET_CODE (op) == AND
4493 && GET_CODE (XEXP (op, 0)) == MULT
4494 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4495 && CONST_INT_P (XEXP (op, 1))
4496 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4497 INTVAL (XEXP (op, 1))) != 0)
4498 return XEXP (XEXP (op, 0), 0);
4499
4500 /* Now handle extended register, as this may also have an optional
4501 left shift by 1..4. */
4502 if (GET_CODE (op) == ASHIFT
4503 && CONST_INT_P (XEXP (op, 1))
4504 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4505 op = XEXP (op, 0);
4506
4507 if (GET_CODE (op) == ZERO_EXTEND
4508 || GET_CODE (op) == SIGN_EXTEND)
4509 op = XEXP (op, 0);
4510
4511 if (op != x)
4512 return op;
4513
4514 return aarch64_strip_shift (x);
4515}
4516
4517/* Calculate the cost of calculating X, storing it in *COST. Result
4518 is true if the total cost of the operation has now been calculated. */
4519static bool
4520aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4521 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4522{
4523 rtx op0, op1;
73250c4c 4524 const struct cpu_cost_table *extra_cost
43e9d192
IB
4525 = aarch64_tune_params->insn_extra_cost;
4526
4527 switch (code)
4528 {
4529 case SET:
4530 op0 = SET_DEST (x);
4531 op1 = SET_SRC (x);
4532
4533 switch (GET_CODE (op0))
4534 {
4535 case MEM:
4536 if (speed)
73250c4c 4537 *cost += extra_cost->ldst.store;
43e9d192
IB
4538
4539 if (op1 != const0_rtx)
4540 *cost += rtx_cost (op1, SET, 1, speed);
4541 return true;
4542
4543 case SUBREG:
4544 if (! REG_P (SUBREG_REG (op0)))
4545 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4546 /* Fall through. */
4547 case REG:
4548 /* Cost is just the cost of the RHS of the set. */
4549 *cost += rtx_cost (op1, SET, 1, true);
4550 return true;
4551
4552 case ZERO_EXTRACT: /* Bit-field insertion. */
4553 case SIGN_EXTRACT:
4554 /* Strip any redundant widening of the RHS to meet the width of
4555 the target. */
4556 if (GET_CODE (op1) == SUBREG)
4557 op1 = SUBREG_REG (op1);
4558 if ((GET_CODE (op1) == ZERO_EXTEND
4559 || GET_CODE (op1) == SIGN_EXTEND)
4560 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4561 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4562 >= INTVAL (XEXP (op0, 1))))
4563 op1 = XEXP (op1, 0);
4564 *cost += rtx_cost (op1, SET, 1, speed);
4565 return true;
4566
4567 default:
4568 break;
4569 }
4570 return false;
4571
4572 case MEM:
4573 if (speed)
73250c4c 4574 *cost += extra_cost->ldst.load;
43e9d192
IB
4575
4576 return true;
4577
4578 case NEG:
4579 op0 = CONST0_RTX (GET_MODE (x));
4580 op1 = XEXP (x, 0);
4581 goto cost_minus;
4582
4583 case COMPARE:
4584 op0 = XEXP (x, 0);
4585 op1 = XEXP (x, 1);
4586
4587 if (op1 == const0_rtx
4588 && GET_CODE (op0) == AND)
4589 {
4590 x = op0;
4591 goto cost_logic;
4592 }
4593
4594 /* Comparisons can work if the order is swapped.
4595 Canonicalization puts the more complex operation first, but
4596 we want it in op1. */
4597 if (! (REG_P (op0)
4598 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4599 {
4600 op0 = XEXP (x, 1);
4601 op1 = XEXP (x, 0);
4602 }
4603 goto cost_minus;
4604
4605 case MINUS:
4606 op0 = XEXP (x, 0);
4607 op1 = XEXP (x, 1);
4608
4609 cost_minus:
4610 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4611 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4612 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4613 {
4614 if (op0 != const0_rtx)
4615 *cost += rtx_cost (op0, MINUS, 0, speed);
4616
4617 if (CONST_INT_P (op1))
4618 {
4619 if (!aarch64_uimm12_shift (INTVAL (op1)))
4620 *cost += rtx_cost (op1, MINUS, 1, speed);
4621 }
4622 else
4623 {
4624 op1 = aarch64_strip_shift_or_extend (op1);
4625 *cost += rtx_cost (op1, MINUS, 1, speed);
4626 }
4627 return true;
4628 }
4629
4630 return false;
4631
4632 case PLUS:
4633 op0 = XEXP (x, 0);
4634 op1 = XEXP (x, 1);
4635
4636 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4637 {
4638 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4639 {
4640 *cost += rtx_cost (op0, PLUS, 0, speed);
4641 }
4642 else
4643 {
4644 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4645
4646 if (new_op0 == op0
4647 && GET_CODE (op0) == MULT)
4648 {
4649 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4650 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4651 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4652 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4653 {
4654 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4655 speed)
4656 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4657 speed)
4658 + rtx_cost (op1, PLUS, 1, speed));
4659 if (speed)
73250c4c
KT
4660 *cost +=
4661 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
43e9d192
IB
4662 return true;
4663 }
328402a9 4664
43e9d192
IB
4665 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4666 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4667 + rtx_cost (op1, PLUS, 1, speed));
4668
4669 if (speed)
73250c4c 4670 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
328402a9
RE
4671
4672 return true;
43e9d192
IB
4673 }
4674
4675 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4676 + rtx_cost (op1, PLUS, 1, speed));
4677 }
4678 return true;
4679 }
4680
4681 return false;
4682
4683 case IOR:
4684 case XOR:
4685 case AND:
4686 cost_logic:
4687 op0 = XEXP (x, 0);
4688 op1 = XEXP (x, 1);
4689
4690 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4691 {
4692 if (CONST_INT_P (op1)
4693 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4694 {
4695 *cost += rtx_cost (op0, AND, 0, speed);
4696 }
4697 else
4698 {
4699 if (GET_CODE (op0) == NOT)
4700 op0 = XEXP (op0, 0);
4701 op0 = aarch64_strip_shift (op0);
4702 *cost += (rtx_cost (op0, AND, 0, speed)
4703 + rtx_cost (op1, AND, 1, speed));
4704 }
4705 return true;
4706 }
4707 return false;
4708
4709 case ZERO_EXTEND:
4710 if ((GET_MODE (x) == DImode
4711 && GET_MODE (XEXP (x, 0)) == SImode)
4712 || GET_CODE (XEXP (x, 0)) == MEM)
4713 {
4714 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4715 return true;
4716 }
4717 return false;
4718
4719 case SIGN_EXTEND:
4720 if (GET_CODE (XEXP (x, 0)) == MEM)
4721 {
4722 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4723 return true;
4724 }
4725 return false;
4726
4727 case ROTATE:
4728 if (!CONST_INT_P (XEXP (x, 1)))
4729 *cost += COSTS_N_INSNS (2);
4730 /* Fall through. */
4731 case ROTATERT:
4732 case LSHIFTRT:
4733 case ASHIFT:
4734 case ASHIFTRT:
4735
4736 /* Shifting by a register often takes an extra cycle. */
4737 if (speed && !CONST_INT_P (XEXP (x, 1)))
73250c4c 4738 *cost += extra_cost->alu.arith_shift_reg;
43e9d192
IB
4739
4740 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4741 return true;
4742
4743 case HIGH:
4744 if (!CONSTANT_P (XEXP (x, 0)))
4745 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4746 return true;
4747
4748 case LO_SUM:
4749 if (!CONSTANT_P (XEXP (x, 1)))
4750 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4751 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4752 return true;
4753
4754 case ZERO_EXTRACT:
4755 case SIGN_EXTRACT:
4756 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4757 return true;
4758
4759 case MULT:
4760 op0 = XEXP (x, 0);
4761 op1 = XEXP (x, 1);
4762
4763 *cost = COSTS_N_INSNS (1);
4764 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4765 {
4766 if (CONST_INT_P (op1)
4767 && exact_log2 (INTVAL (op1)) > 0)
4768 {
4769 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4770 return true;
4771 }
4772
4773 if ((GET_CODE (op0) == ZERO_EXTEND
4774 && GET_CODE (op1) == ZERO_EXTEND)
4775 || (GET_CODE (op0) == SIGN_EXTEND
4776 && GET_CODE (op1) == SIGN_EXTEND))
4777 {
4778 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4779 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4780 if (speed)
73250c4c 4781 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
43e9d192
IB
4782 return true;
4783 }
4784
4785 if (speed)
73250c4c 4786 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
43e9d192
IB
4787 }
4788 else if (speed)
4789 {
4790 if (GET_MODE (x) == DFmode)
73250c4c 4791 *cost += extra_cost->fp[1].mult;
43e9d192 4792 else if (GET_MODE (x) == SFmode)
73250c4c 4793 *cost += extra_cost->fp[0].mult;
43e9d192
IB
4794 }
4795
4796 return false; /* All arguments need to be in registers. */
4797
4798 case MOD:
4799 case UMOD:
4800 *cost = COSTS_N_INSNS (2);
4801 if (speed)
4802 {
4803 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
4804 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4805 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 4806 else if (GET_MODE (x) == DFmode)
73250c4c
KT
4807 *cost += (extra_cost->fp[1].mult
4808 + extra_cost->fp[1].div);
43e9d192 4809 else if (GET_MODE (x) == SFmode)
73250c4c
KT
4810 *cost += (extra_cost->fp[0].mult
4811 + extra_cost->fp[0].div);
43e9d192
IB
4812 }
4813 return false; /* All arguments need to be in registers. */
4814
4815 case DIV:
4816 case UDIV:
4817 *cost = COSTS_N_INSNS (1);
4818 if (speed)
4819 {
4820 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c 4821 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
43e9d192 4822 else if (GET_MODE (x) == DFmode)
73250c4c 4823 *cost += extra_cost->fp[1].div;
43e9d192 4824 else if (GET_MODE (x) == SFmode)
73250c4c 4825 *cost += extra_cost->fp[0].div;
43e9d192
IB
4826 }
4827 return false; /* All arguments need to be in registers. */
4828
4829 default:
4830 break;
4831 }
4832 return false;
4833}
4834
4835static int
4836aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4837 enum machine_mode mode ATTRIBUTE_UNUSED,
4838 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4839{
4840 enum rtx_code c = GET_CODE (x);
4841 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4842
4843 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4844 return addr_cost->pre_modify;
4845
4846 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4847 return addr_cost->post_modify;
4848
4849 if (c == PLUS)
4850 {
4851 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4852 return addr_cost->imm_offset;
4853 else if (GET_CODE (XEXP (x, 0)) == MULT
4854 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4855 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4856 return addr_cost->register_extend;
4857
4858 return addr_cost->register_offset;
4859 }
4860 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4861 return addr_cost->imm_offset;
4862
4863 return 0;
4864}
4865
4866static int
4867aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4868 reg_class_t from, reg_class_t to)
4869{
4870 const struct cpu_regmove_cost *regmove_cost
4871 = aarch64_tune_params->regmove_cost;
4872
4873 if (from == GENERAL_REGS && to == GENERAL_REGS)
4874 return regmove_cost->GP2GP;
4875 else if (from == GENERAL_REGS)
4876 return regmove_cost->GP2FP;
4877 else if (to == GENERAL_REGS)
4878 return regmove_cost->FP2GP;
4879
4880 /* When AdvSIMD instructions are disabled it is not possible to move
4881 a 128-bit value directly between Q registers. This is handled in
4882 secondary reload. A general register is used as a scratch to move
4883 the upper DI value and the lower DI value is moved directly,
4884 hence the cost is the sum of three moves. */
4885
4886 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4887 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4888
4889 return regmove_cost->FP2FP;
4890}
4891
4892static int
4893aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4894 reg_class_t rclass ATTRIBUTE_UNUSED,
4895 bool in ATTRIBUTE_UNUSED)
4896{
4897 return aarch64_tune_params->memmov_cost;
4898}
4899
d126a4ae
AP
4900/* Return the number of instructions that can be issued per cycle. */
4901static int
4902aarch64_sched_issue_rate (void)
4903{
4904 return aarch64_tune_params->issue_rate;
4905}
4906
8990e73a
TB
4907/* Vectorizer cost model target hooks. */
4908
4909/* Implement targetm.vectorize.builtin_vectorization_cost. */
4910static int
4911aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4912 tree vectype,
4913 int misalign ATTRIBUTE_UNUSED)
4914{
4915 unsigned elements;
4916
4917 switch (type_of_cost)
4918 {
4919 case scalar_stmt:
4920 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4921
4922 case scalar_load:
4923 return aarch64_tune_params->vec_costs->scalar_load_cost;
4924
4925 case scalar_store:
4926 return aarch64_tune_params->vec_costs->scalar_store_cost;
4927
4928 case vector_stmt:
4929 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4930
4931 case vector_load:
4932 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4933
4934 case vector_store:
4935 return aarch64_tune_params->vec_costs->vec_store_cost;
4936
4937 case vec_to_scalar:
4938 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4939
4940 case scalar_to_vec:
4941 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4942
4943 case unaligned_load:
4944 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4945
4946 case unaligned_store:
4947 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4948
4949 case cond_branch_taken:
4950 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4951
4952 case cond_branch_not_taken:
4953 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4954
4955 case vec_perm:
4956 case vec_promote_demote:
4957 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4958
4959 case vec_construct:
4960 elements = TYPE_VECTOR_SUBPARTS (vectype);
4961 return elements / 2 + 1;
4962
4963 default:
4964 gcc_unreachable ();
4965 }
4966}
4967
4968/* Implement targetm.vectorize.add_stmt_cost. */
4969static unsigned
4970aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4971 struct _stmt_vec_info *stmt_info, int misalign,
4972 enum vect_cost_model_location where)
4973{
4974 unsigned *cost = (unsigned *) data;
4975 unsigned retval = 0;
4976
4977 if (flag_vect_cost_model)
4978 {
4979 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4980 int stmt_cost =
4981 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4982
4983 /* Statements in an inner loop relative to the loop being
4984 vectorized are weighted more heavily. The value here is
4985 a function (linear for now) of the loop nest level. */
4986 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4987 {
4988 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4989 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4990 unsigned nest_level = loop_depth (loop);
4991
4992 count *= nest_level;
4993 }
4994
4995 retval = (unsigned) (count * stmt_cost);
4996 cost[where] += retval;
4997 }
4998
4999 return retval;
5000}
5001
43e9d192
IB
5002static void initialize_aarch64_code_model (void);
5003
5004/* Parse the architecture extension string. */
5005
5006static void
5007aarch64_parse_extension (char *str)
5008{
5009 /* The extension string is parsed left to right. */
5010 const struct aarch64_option_extension *opt = NULL;
5011
5012 /* Flag to say whether we are adding or removing an extension. */
5013 int adding_ext = -1;
5014
5015 while (str != NULL && *str != 0)
5016 {
5017 char *ext;
5018 size_t len;
5019
5020 str++;
5021 ext = strchr (str, '+');
5022
5023 if (ext != NULL)
5024 len = ext - str;
5025 else
5026 len = strlen (str);
5027
5028 if (len >= 2 && strncmp (str, "no", 2) == 0)
5029 {
5030 adding_ext = 0;
5031 len -= 2;
5032 str += 2;
5033 }
5034 else if (len > 0)
5035 adding_ext = 1;
5036
5037 if (len == 0)
5038 {
5039 error ("missing feature modifier after %qs", "+no");
5040 return;
5041 }
5042
5043 /* Scan over the extensions table trying to find an exact match. */
5044 for (opt = all_extensions; opt->name != NULL; opt++)
5045 {
5046 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5047 {
5048 /* Add or remove the extension. */
5049 if (adding_ext)
5050 aarch64_isa_flags |= opt->flags_on;
5051 else
5052 aarch64_isa_flags &= ~(opt->flags_off);
5053 break;
5054 }
5055 }
5056
5057 if (opt->name == NULL)
5058 {
5059 /* Extension not found in list. */
5060 error ("unknown feature modifier %qs", str);
5061 return;
5062 }
5063
5064 str = ext;
5065 };
5066
5067 return;
5068}
5069
5070/* Parse the ARCH string. */
5071
5072static void
5073aarch64_parse_arch (void)
5074{
5075 char *ext;
5076 const struct processor *arch;
5077 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5078 size_t len;
5079
5080 strcpy (str, aarch64_arch_string);
5081
5082 ext = strchr (str, '+');
5083
5084 if (ext != NULL)
5085 len = ext - str;
5086 else
5087 len = strlen (str);
5088
5089 if (len == 0)
5090 {
5091 error ("missing arch name in -march=%qs", str);
5092 return;
5093 }
5094
5095 /* Loop through the list of supported ARCHs to find a match. */
5096 for (arch = all_architectures; arch->name != NULL; arch++)
5097 {
5098 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5099 {
5100 selected_arch = arch;
5101 aarch64_isa_flags = selected_arch->flags;
5102 selected_cpu = &all_cores[selected_arch->core];
5103
5104 if (ext != NULL)
5105 {
5106 /* ARCH string contains at least one extension. */
5107 aarch64_parse_extension (ext);
5108 }
5109
5110 return;
5111 }
5112 }
5113
5114 /* ARCH name not found in list. */
5115 error ("unknown value %qs for -march", str);
5116 return;
5117}
5118
5119/* Parse the CPU string. */
5120
5121static void
5122aarch64_parse_cpu (void)
5123{
5124 char *ext;
5125 const struct processor *cpu;
5126 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5127 size_t len;
5128
5129 strcpy (str, aarch64_cpu_string);
5130
5131 ext = strchr (str, '+');
5132
5133 if (ext != NULL)
5134 len = ext - str;
5135 else
5136 len = strlen (str);
5137
5138 if (len == 0)
5139 {
5140 error ("missing cpu name in -mcpu=%qs", str);
5141 return;
5142 }
5143
5144 /* Loop through the list of supported CPUs to find a match. */
5145 for (cpu = all_cores; cpu->name != NULL; cpu++)
5146 {
5147 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5148 {
5149 selected_cpu = cpu;
192ed1dd 5150 selected_tune = cpu;
43e9d192
IB
5151 aarch64_isa_flags = selected_cpu->flags;
5152
5153 if (ext != NULL)
5154 {
5155 /* CPU string contains at least one extension. */
5156 aarch64_parse_extension (ext);
5157 }
5158
5159 return;
5160 }
5161 }
5162
5163 /* CPU name not found in list. */
5164 error ("unknown value %qs for -mcpu", str);
5165 return;
5166}
5167
5168/* Parse the TUNE string. */
5169
5170static void
5171aarch64_parse_tune (void)
5172{
5173 const struct processor *cpu;
5174 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5175 strcpy (str, aarch64_tune_string);
5176
5177 /* Loop through the list of supported CPUs to find a match. */
5178 for (cpu = all_cores; cpu->name != NULL; cpu++)
5179 {
5180 if (strcmp (cpu->name, str) == 0)
5181 {
5182 selected_tune = cpu;
5183 return;
5184 }
5185 }
5186
5187 /* CPU name not found in list. */
5188 error ("unknown value %qs for -mtune", str);
5189 return;
5190}
5191
5192
5193/* Implement TARGET_OPTION_OVERRIDE. */
5194
5195static void
5196aarch64_override_options (void)
5197{
5198 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5199 otherwise march remains undefined. mtune can be used with either march or
5200 mcpu. */
5201
5202 if (aarch64_arch_string)
5203 {
5204 aarch64_parse_arch ();
5205 aarch64_cpu_string = NULL;
5206 }
5207
5208 if (aarch64_cpu_string)
5209 {
5210 aarch64_parse_cpu ();
5211 selected_arch = NULL;
5212 }
5213
5214 if (aarch64_tune_string)
5215 {
5216 aarch64_parse_tune ();
5217 }
5218
63892fa2
KV
5219#ifndef HAVE_AS_MABI_OPTION
5220 /* The compiler may have been configured with 2.23.* binutils, which does
5221 not have support for ILP32. */
5222 if (TARGET_ILP32)
5223 error ("Assembler does not support -mabi=ilp32");
5224#endif
5225
43e9d192
IB
5226 initialize_aarch64_code_model ();
5227
5228 aarch64_build_bitmask_table ();
5229
5230 /* This target defaults to strict volatile bitfields. */
5231 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5232 flag_strict_volatile_bitfields = 1;
5233
5234 /* If the user did not specify a processor, choose the default
5235 one for them. This will be the CPU set during configuration using
02fdbd5b 5236 --with-cpu, otherwise it is "coretex-a53". */
43e9d192
IB
5237 if (!selected_cpu)
5238 {
5239 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5240 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5241 }
5242
5243 gcc_assert (selected_cpu);
5244
5245 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5246 if (!selected_tune)
5247 selected_tune = &all_cores[selected_cpu->core];
5248
5249 aarch64_tune_flags = selected_tune->flags;
5250 aarch64_tune = selected_tune->core;
5251 aarch64_tune_params = selected_tune->tune;
5252
5253 aarch64_override_options_after_change ();
5254}
5255
5256/* Implement targetm.override_options_after_change. */
5257
5258static void
5259aarch64_override_options_after_change (void)
5260{
5261 faked_omit_frame_pointer = false;
5262
5263 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5264 that aarch64_frame_pointer_required will be called. We need to remember
5265 whether flag_omit_frame_pointer was turned on normally or just faked. */
5266
5267 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5268 {
5269 flag_omit_frame_pointer = true;
5270 faked_omit_frame_pointer = true;
5271 }
5272}
5273
5274static struct machine_function *
5275aarch64_init_machine_status (void)
5276{
5277 struct machine_function *machine;
5278 machine = ggc_alloc_cleared_machine_function ();
5279 return machine;
5280}
5281
5282void
5283aarch64_init_expanders (void)
5284{
5285 init_machine_status = aarch64_init_machine_status;
5286}
5287
5288/* A checking mechanism for the implementation of the various code models. */
5289static void
5290initialize_aarch64_code_model (void)
5291{
5292 if (flag_pic)
5293 {
5294 switch (aarch64_cmodel_var)
5295 {
5296 case AARCH64_CMODEL_TINY:
5297 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5298 break;
5299 case AARCH64_CMODEL_SMALL:
5300 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5301 break;
5302 case AARCH64_CMODEL_LARGE:
5303 sorry ("code model %qs with -f%s", "large",
5304 flag_pic > 1 ? "PIC" : "pic");
5305 default:
5306 gcc_unreachable ();
5307 }
5308 }
5309 else
5310 aarch64_cmodel = aarch64_cmodel_var;
5311}
5312
5313/* Return true if SYMBOL_REF X binds locally. */
5314
5315static bool
5316aarch64_symbol_binds_local_p (const_rtx x)
5317{
5318 return (SYMBOL_REF_DECL (x)
5319 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5320 : SYMBOL_REF_LOCAL_P (x));
5321}
5322
5323/* Return true if SYMBOL_REF X is thread local */
5324static bool
5325aarch64_tls_symbol_p (rtx x)
5326{
5327 if (! TARGET_HAVE_TLS)
5328 return false;
5329
5330 if (GET_CODE (x) != SYMBOL_REF)
5331 return false;
5332
5333 return SYMBOL_REF_TLS_MODEL (x) != 0;
5334}
5335
5336/* Classify a TLS symbol into one of the TLS kinds. */
5337enum aarch64_symbol_type
5338aarch64_classify_tls_symbol (rtx x)
5339{
5340 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5341
5342 switch (tls_kind)
5343 {
5344 case TLS_MODEL_GLOBAL_DYNAMIC:
5345 case TLS_MODEL_LOCAL_DYNAMIC:
5346 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5347
5348 case TLS_MODEL_INITIAL_EXEC:
5349 return SYMBOL_SMALL_GOTTPREL;
5350
5351 case TLS_MODEL_LOCAL_EXEC:
5352 return SYMBOL_SMALL_TPREL;
5353
5354 case TLS_MODEL_EMULATED:
5355 case TLS_MODEL_NONE:
5356 return SYMBOL_FORCE_TO_MEM;
5357
5358 default:
5359 gcc_unreachable ();
5360 }
5361}
5362
5363/* Return the method that should be used to access SYMBOL_REF or
5364 LABEL_REF X in context CONTEXT. */
17f4d4bf 5365
43e9d192
IB
5366enum aarch64_symbol_type
5367aarch64_classify_symbol (rtx x,
5368 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5369{
5370 if (GET_CODE (x) == LABEL_REF)
5371 {
5372 switch (aarch64_cmodel)
5373 {
5374 case AARCH64_CMODEL_LARGE:
5375 return SYMBOL_FORCE_TO_MEM;
5376
5377 case AARCH64_CMODEL_TINY_PIC:
5378 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5379 return SYMBOL_TINY_ABSOLUTE;
5380
43e9d192
IB
5381 case AARCH64_CMODEL_SMALL_PIC:
5382 case AARCH64_CMODEL_SMALL:
5383 return SYMBOL_SMALL_ABSOLUTE;
5384
5385 default:
5386 gcc_unreachable ();
5387 }
5388 }
5389
17f4d4bf 5390 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5391 {
17f4d4bf
CSS
5392 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5393 || CONSTANT_POOL_ADDRESS_P (x))
43e9d192
IB
5394 return SYMBOL_FORCE_TO_MEM;
5395
5396 if (aarch64_tls_symbol_p (x))
5397 return aarch64_classify_tls_symbol (x);
5398
17f4d4bf
CSS
5399 switch (aarch64_cmodel)
5400 {
5401 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5402 if (SYMBOL_REF_WEAK (x))
5403 return SYMBOL_FORCE_TO_MEM;
5404 return SYMBOL_TINY_ABSOLUTE;
5405
17f4d4bf
CSS
5406 case AARCH64_CMODEL_SMALL:
5407 if (SYMBOL_REF_WEAK (x))
5408 return SYMBOL_FORCE_TO_MEM;
5409 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5410
17f4d4bf 5411 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 5412 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 5413 return SYMBOL_TINY_GOT;
38e6c9a6
MS
5414 return SYMBOL_TINY_ABSOLUTE;
5415
17f4d4bf
CSS
5416 case AARCH64_CMODEL_SMALL_PIC:
5417 if (!aarch64_symbol_binds_local_p (x))
5418 return SYMBOL_SMALL_GOT;
5419 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5420
17f4d4bf
CSS
5421 default:
5422 gcc_unreachable ();
5423 }
43e9d192 5424 }
17f4d4bf 5425
43e9d192
IB
5426 /* By default push everything into the constant pool. */
5427 return SYMBOL_FORCE_TO_MEM;
5428}
5429
43e9d192
IB
5430bool
5431aarch64_constant_address_p (rtx x)
5432{
5433 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5434}
5435
5436bool
5437aarch64_legitimate_pic_operand_p (rtx x)
5438{
5439 if (GET_CODE (x) == SYMBOL_REF
5440 || (GET_CODE (x) == CONST
5441 && GET_CODE (XEXP (x, 0)) == PLUS
5442 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5443 return false;
5444
5445 return true;
5446}
5447
3520f7cc
JG
5448/* Return true if X holds either a quarter-precision or
5449 floating-point +0.0 constant. */
5450static bool
5451aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5452{
5453 if (!CONST_DOUBLE_P (x))
5454 return false;
5455
5456 /* TODO: We could handle moving 0.0 to a TFmode register,
5457 but first we would like to refactor the movtf_aarch64
5458 to be more amicable to split moves properly and
5459 correctly gate on TARGET_SIMD. For now - reject all
5460 constants which are not to SFmode or DFmode registers. */
5461 if (!(mode == SFmode || mode == DFmode))
5462 return false;
5463
5464 if (aarch64_float_const_zero_rtx_p (x))
5465 return true;
5466 return aarch64_float_const_representable_p (x);
5467}
5468
43e9d192
IB
5469static bool
5470aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5471{
5472 /* Do not allow vector struct mode constants. We could support
5473 0 and -1 easily, but they need support in aarch64-simd.md. */
5474 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5475 return false;
5476
5477 /* This could probably go away because
5478 we now decompose CONST_INTs according to expand_mov_immediate. */
5479 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5480 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5481 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5482 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5483
5484 if (GET_CODE (x) == HIGH
5485 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5486 return true;
5487
5488 return aarch64_constant_address_p (x);
5489}
5490
a5bc806c 5491rtx
43e9d192
IB
5492aarch64_load_tp (rtx target)
5493{
5494 if (!target
5495 || GET_MODE (target) != Pmode
5496 || !register_operand (target, Pmode))
5497 target = gen_reg_rtx (Pmode);
5498
5499 /* Can return in any reg. */
5500 emit_insn (gen_aarch64_load_tp_hard (target));
5501 return target;
5502}
5503
43e9d192
IB
5504/* On AAPCS systems, this is the "struct __va_list". */
5505static GTY(()) tree va_list_type;
5506
5507/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5508 Return the type to use as __builtin_va_list.
5509
5510 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5511
5512 struct __va_list
5513 {
5514 void *__stack;
5515 void *__gr_top;
5516 void *__vr_top;
5517 int __gr_offs;
5518 int __vr_offs;
5519 }; */
5520
5521static tree
5522aarch64_build_builtin_va_list (void)
5523{
5524 tree va_list_name;
5525 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5526
5527 /* Create the type. */
5528 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5529 /* Give it the required name. */
5530 va_list_name = build_decl (BUILTINS_LOCATION,
5531 TYPE_DECL,
5532 get_identifier ("__va_list"),
5533 va_list_type);
5534 DECL_ARTIFICIAL (va_list_name) = 1;
5535 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5536 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5537
5538 /* Create the fields. */
5539 f_stack = build_decl (BUILTINS_LOCATION,
5540 FIELD_DECL, get_identifier ("__stack"),
5541 ptr_type_node);
5542 f_grtop = build_decl (BUILTINS_LOCATION,
5543 FIELD_DECL, get_identifier ("__gr_top"),
5544 ptr_type_node);
5545 f_vrtop = build_decl (BUILTINS_LOCATION,
5546 FIELD_DECL, get_identifier ("__vr_top"),
5547 ptr_type_node);
5548 f_groff = build_decl (BUILTINS_LOCATION,
5549 FIELD_DECL, get_identifier ("__gr_offs"),
5550 integer_type_node);
5551 f_vroff = build_decl (BUILTINS_LOCATION,
5552 FIELD_DECL, get_identifier ("__vr_offs"),
5553 integer_type_node);
5554
5555 DECL_ARTIFICIAL (f_stack) = 1;
5556 DECL_ARTIFICIAL (f_grtop) = 1;
5557 DECL_ARTIFICIAL (f_vrtop) = 1;
5558 DECL_ARTIFICIAL (f_groff) = 1;
5559 DECL_ARTIFICIAL (f_vroff) = 1;
5560
5561 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5562 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5563 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5564 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5565 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5566
5567 TYPE_FIELDS (va_list_type) = f_stack;
5568 DECL_CHAIN (f_stack) = f_grtop;
5569 DECL_CHAIN (f_grtop) = f_vrtop;
5570 DECL_CHAIN (f_vrtop) = f_groff;
5571 DECL_CHAIN (f_groff) = f_vroff;
5572
5573 /* Compute its layout. */
5574 layout_type (va_list_type);
5575
5576 return va_list_type;
5577}
5578
5579/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5580static void
5581aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5582{
5583 const CUMULATIVE_ARGS *cum;
5584 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5585 tree stack, grtop, vrtop, groff, vroff;
5586 tree t;
5587 int gr_save_area_size;
5588 int vr_save_area_size;
5589 int vr_offset;
5590
5591 cum = &crtl->args.info;
5592 gr_save_area_size
5593 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5594 vr_save_area_size
5595 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5596
5597 if (TARGET_GENERAL_REGS_ONLY)
5598 {
5599 if (cum->aapcs_nvrn > 0)
5600 sorry ("%qs and floating point or vector arguments",
5601 "-mgeneral-regs-only");
5602 vr_save_area_size = 0;
5603 }
5604
5605 f_stack = TYPE_FIELDS (va_list_type_node);
5606 f_grtop = DECL_CHAIN (f_stack);
5607 f_vrtop = DECL_CHAIN (f_grtop);
5608 f_groff = DECL_CHAIN (f_vrtop);
5609 f_vroff = DECL_CHAIN (f_groff);
5610
5611 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5612 NULL_TREE);
5613 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5614 NULL_TREE);
5615 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5616 NULL_TREE);
5617 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5618 NULL_TREE);
5619 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5620 NULL_TREE);
5621
5622 /* Emit code to initialize STACK, which points to the next varargs stack
5623 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5624 by named arguments. STACK is 8-byte aligned. */
5625 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5626 if (cum->aapcs_stack_size > 0)
5627 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5628 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5629 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5630
5631 /* Emit code to initialize GRTOP, the top of the GR save area.
5632 virtual_incoming_args_rtx should have been 16 byte aligned. */
5633 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5634 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5635 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5636
5637 /* Emit code to initialize VRTOP, the top of the VR save area.
5638 This address is gr_save_area_bytes below GRTOP, rounded
5639 down to the next 16-byte boundary. */
5640 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5641 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5642 STACK_BOUNDARY / BITS_PER_UNIT);
5643
5644 if (vr_offset)
5645 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5646 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5647 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5648
5649 /* Emit code to initialize GROFF, the offset from GRTOP of the
5650 next GPR argument. */
5651 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5652 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5653 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5654
5655 /* Likewise emit code to initialize VROFF, the offset from FTOP
5656 of the next VR argument. */
5657 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5658 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5659 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5660}
5661
5662/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5663
5664static tree
5665aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5666 gimple_seq *post_p ATTRIBUTE_UNUSED)
5667{
5668 tree addr;
5669 bool indirect_p;
5670 bool is_ha; /* is HFA or HVA. */
5671 bool dw_align; /* double-word align. */
5672 enum machine_mode ag_mode = VOIDmode;
5673 int nregs;
5674 enum machine_mode mode;
5675
5676 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5677 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5678 HOST_WIDE_INT size, rsize, adjust, align;
5679 tree t, u, cond1, cond2;
5680
5681 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5682 if (indirect_p)
5683 type = build_pointer_type (type);
5684
5685 mode = TYPE_MODE (type);
5686
5687 f_stack = TYPE_FIELDS (va_list_type_node);
5688 f_grtop = DECL_CHAIN (f_stack);
5689 f_vrtop = DECL_CHAIN (f_grtop);
5690 f_groff = DECL_CHAIN (f_vrtop);
5691 f_vroff = DECL_CHAIN (f_groff);
5692
5693 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5694 f_stack, NULL_TREE);
5695 size = int_size_in_bytes (type);
5696 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5697
5698 dw_align = false;
5699 adjust = 0;
5700 if (aarch64_vfp_is_call_or_return_candidate (mode,
5701 type,
5702 &ag_mode,
5703 &nregs,
5704 &is_ha))
5705 {
5706 /* TYPE passed in fp/simd registers. */
5707 if (TARGET_GENERAL_REGS_ONLY)
5708 sorry ("%qs and floating point or vector arguments",
5709 "-mgeneral-regs-only");
5710
5711 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5712 unshare_expr (valist), f_vrtop, NULL_TREE);
5713 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5714 unshare_expr (valist), f_vroff, NULL_TREE);
5715
5716 rsize = nregs * UNITS_PER_VREG;
5717
5718 if (is_ha)
5719 {
5720 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5721 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5722 }
5723 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5724 && size < UNITS_PER_VREG)
5725 {
5726 adjust = UNITS_PER_VREG - size;
5727 }
5728 }
5729 else
5730 {
5731 /* TYPE passed in general registers. */
5732 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5733 unshare_expr (valist), f_grtop, NULL_TREE);
5734 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5735 unshare_expr (valist), f_groff, NULL_TREE);
5736 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5737 nregs = rsize / UNITS_PER_WORD;
5738
5739 if (align > 8)
5740 dw_align = true;
5741
5742 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5743 && size < UNITS_PER_WORD)
5744 {
5745 adjust = UNITS_PER_WORD - size;
5746 }
5747 }
5748
5749 /* Get a local temporary for the field value. */
5750 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5751
5752 /* Emit code to branch if off >= 0. */
5753 t = build2 (GE_EXPR, boolean_type_node, off,
5754 build_int_cst (TREE_TYPE (off), 0));
5755 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5756
5757 if (dw_align)
5758 {
5759 /* Emit: offs = (offs + 15) & -16. */
5760 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5761 build_int_cst (TREE_TYPE (off), 15));
5762 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5763 build_int_cst (TREE_TYPE (off), -16));
5764 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5765 }
5766 else
5767 roundup = NULL;
5768
5769 /* Update ap.__[g|v]r_offs */
5770 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5771 build_int_cst (TREE_TYPE (off), rsize));
5772 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5773
5774 /* String up. */
5775 if (roundup)
5776 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5777
5778 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5779 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5780 build_int_cst (TREE_TYPE (f_off), 0));
5781 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5782
5783 /* String up: make sure the assignment happens before the use. */
5784 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5785 COND_EXPR_ELSE (cond1) = t;
5786
5787 /* Prepare the trees handling the argument that is passed on the stack;
5788 the top level node will store in ON_STACK. */
5789 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5790 if (align > 8)
5791 {
5792 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5793 t = fold_convert (intDI_type_node, arg);
5794 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5795 build_int_cst (TREE_TYPE (t), 15));
5796 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5797 build_int_cst (TREE_TYPE (t), -16));
5798 t = fold_convert (TREE_TYPE (arg), t);
5799 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5800 }
5801 else
5802 roundup = NULL;
5803 /* Advance ap.__stack */
5804 t = fold_convert (intDI_type_node, arg);
5805 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5806 build_int_cst (TREE_TYPE (t), size + 7));
5807 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5808 build_int_cst (TREE_TYPE (t), -8));
5809 t = fold_convert (TREE_TYPE (arg), t);
5810 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5811 /* String up roundup and advance. */
5812 if (roundup)
5813 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5814 /* String up with arg */
5815 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5816 /* Big-endianness related address adjustment. */
5817 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5818 && size < UNITS_PER_WORD)
5819 {
5820 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5821 size_int (UNITS_PER_WORD - size));
5822 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5823 }
5824
5825 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5826 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5827
5828 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5829 t = off;
5830 if (adjust)
5831 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5832 build_int_cst (TREE_TYPE (off), adjust));
5833
5834 t = fold_convert (sizetype, t);
5835 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5836
5837 if (is_ha)
5838 {
5839 /* type ha; // treat as "struct {ftype field[n];}"
5840 ... [computing offs]
5841 for (i = 0; i <nregs; ++i, offs += 16)
5842 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5843 return ha; */
5844 int i;
5845 tree tmp_ha, field_t, field_ptr_t;
5846
5847 /* Declare a local variable. */
5848 tmp_ha = create_tmp_var_raw (type, "ha");
5849 gimple_add_tmp_var (tmp_ha);
5850
5851 /* Establish the base type. */
5852 switch (ag_mode)
5853 {
5854 case SFmode:
5855 field_t = float_type_node;
5856 field_ptr_t = float_ptr_type_node;
5857 break;
5858 case DFmode:
5859 field_t = double_type_node;
5860 field_ptr_t = double_ptr_type_node;
5861 break;
5862 case TFmode:
5863 field_t = long_double_type_node;
5864 field_ptr_t = long_double_ptr_type_node;
5865 break;
5866/* The half precision and quad precision are not fully supported yet. Enable
5867 the following code after the support is complete. Need to find the correct
5868 type node for __fp16 *. */
5869#if 0
5870 case HFmode:
5871 field_t = float_type_node;
5872 field_ptr_t = float_ptr_type_node;
5873 break;
5874#endif
5875 case V2SImode:
5876 case V4SImode:
5877 {
5878 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5879 field_t = build_vector_type_for_mode (innertype, ag_mode);
5880 field_ptr_t = build_pointer_type (field_t);
5881 }
5882 break;
5883 default:
5884 gcc_assert (0);
5885 }
5886
5887 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5888 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5889 addr = t;
5890 t = fold_convert (field_ptr_t, addr);
5891 t = build2 (MODIFY_EXPR, field_t,
5892 build1 (INDIRECT_REF, field_t, tmp_ha),
5893 build1 (INDIRECT_REF, field_t, t));
5894
5895 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5896 for (i = 1; i < nregs; ++i)
5897 {
5898 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5899 u = fold_convert (field_ptr_t, addr);
5900 u = build2 (MODIFY_EXPR, field_t,
5901 build2 (MEM_REF, field_t, tmp_ha,
5902 build_int_cst (field_ptr_t,
5903 (i *
5904 int_size_in_bytes (field_t)))),
5905 build1 (INDIRECT_REF, field_t, u));
5906 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5907 }
5908
5909 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5910 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5911 }
5912
5913 COND_EXPR_ELSE (cond2) = t;
5914 addr = fold_convert (build_pointer_type (type), cond1);
5915 addr = build_va_arg_indirect_ref (addr);
5916
5917 if (indirect_p)
5918 addr = build_va_arg_indirect_ref (addr);
5919
5920 return addr;
5921}
5922
5923/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5924
5925static void
5926aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5927 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5928 int no_rtl)
5929{
5930 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5931 CUMULATIVE_ARGS local_cum;
5932 int gr_saved, vr_saved;
5933
5934 /* The caller has advanced CUM up to, but not beyond, the last named
5935 argument. Advance a local copy of CUM past the last "real" named
5936 argument, to find out how many registers are left over. */
5937 local_cum = *cum;
5938 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5939
5940 /* Found out how many registers we need to save. */
5941 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5942 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5943
5944 if (TARGET_GENERAL_REGS_ONLY)
5945 {
5946 if (local_cum.aapcs_nvrn > 0)
5947 sorry ("%qs and floating point or vector arguments",
5948 "-mgeneral-regs-only");
5949 vr_saved = 0;
5950 }
5951
5952 if (!no_rtl)
5953 {
5954 if (gr_saved > 0)
5955 {
5956 rtx ptr, mem;
5957
5958 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5959 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5960 - gr_saved * UNITS_PER_WORD);
5961 mem = gen_frame_mem (BLKmode, ptr);
5962 set_mem_alias_set (mem, get_varargs_alias_set ());
5963
5964 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5965 mem, gr_saved);
5966 }
5967 if (vr_saved > 0)
5968 {
5969 /* We can't use move_block_from_reg, because it will use
5970 the wrong mode, storing D regs only. */
5971 enum machine_mode mode = TImode;
5972 int off, i;
5973
5974 /* Set OFF to the offset from virtual_incoming_args_rtx of
5975 the first vector register. The VR save area lies below
5976 the GR one, and is aligned to 16 bytes. */
5977 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5978 STACK_BOUNDARY / BITS_PER_UNIT);
5979 off -= vr_saved * UNITS_PER_VREG;
5980
5981 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5982 {
5983 rtx ptr, mem;
5984
5985 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5986 mem = gen_frame_mem (mode, ptr);
5987 set_mem_alias_set (mem, get_varargs_alias_set ());
5988 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5989 off += UNITS_PER_VREG;
5990 }
5991 }
5992 }
5993
5994 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5995 any complication of having crtl->args.pretend_args_size changed. */
5996 cfun->machine->saved_varargs_size
5997 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5998 STACK_BOUNDARY / BITS_PER_UNIT)
5999 + vr_saved * UNITS_PER_VREG);
6000}
6001
6002static void
6003aarch64_conditional_register_usage (void)
6004{
6005 int i;
6006 if (!TARGET_FLOAT)
6007 {
6008 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6009 {
6010 fixed_regs[i] = 1;
6011 call_used_regs[i] = 1;
6012 }
6013 }
6014}
6015
6016/* Walk down the type tree of TYPE counting consecutive base elements.
6017 If *MODEP is VOIDmode, then set it to the first valid floating point
6018 type. If a non-floating point type is found, or if a floating point
6019 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6020 otherwise return the count in the sub-tree. */
6021static int
6022aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6023{
6024 enum machine_mode mode;
6025 HOST_WIDE_INT size;
6026
6027 switch (TREE_CODE (type))
6028 {
6029 case REAL_TYPE:
6030 mode = TYPE_MODE (type);
6031 if (mode != DFmode && mode != SFmode && mode != TFmode)
6032 return -1;
6033
6034 if (*modep == VOIDmode)
6035 *modep = mode;
6036
6037 if (*modep == mode)
6038 return 1;
6039
6040 break;
6041
6042 case COMPLEX_TYPE:
6043 mode = TYPE_MODE (TREE_TYPE (type));
6044 if (mode != DFmode && mode != SFmode && mode != TFmode)
6045 return -1;
6046
6047 if (*modep == VOIDmode)
6048 *modep = mode;
6049
6050 if (*modep == mode)
6051 return 2;
6052
6053 break;
6054
6055 case VECTOR_TYPE:
6056 /* Use V2SImode and V4SImode as representatives of all 64-bit
6057 and 128-bit vector types. */
6058 size = int_size_in_bytes (type);
6059 switch (size)
6060 {
6061 case 8:
6062 mode = V2SImode;
6063 break;
6064 case 16:
6065 mode = V4SImode;
6066 break;
6067 default:
6068 return -1;
6069 }
6070
6071 if (*modep == VOIDmode)
6072 *modep = mode;
6073
6074 /* Vector modes are considered to be opaque: two vectors are
6075 equivalent for the purposes of being homogeneous aggregates
6076 if they are the same size. */
6077 if (*modep == mode)
6078 return 1;
6079
6080 break;
6081
6082 case ARRAY_TYPE:
6083 {
6084 int count;
6085 tree index = TYPE_DOMAIN (type);
6086
6087 /* Can't handle incomplete types. */
6088 if (!COMPLETE_TYPE_P (type))
6089 return -1;
6090
6091 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6092 if (count == -1
6093 || !index
6094 || !TYPE_MAX_VALUE (index)
cc269bb6 6095 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 6096 || !TYPE_MIN_VALUE (index)
cc269bb6 6097 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
6098 || count < 0)
6099 return -1;
6100
ae7e9ddd
RS
6101 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6102 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
6103
6104 /* There must be no padding. */
cc269bb6 6105 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6106 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6107 != count * GET_MODE_BITSIZE (*modep)))
6108 return -1;
6109
6110 return count;
6111 }
6112
6113 case RECORD_TYPE:
6114 {
6115 int count = 0;
6116 int sub_count;
6117 tree field;
6118
6119 /* Can't handle incomplete types. */
6120 if (!COMPLETE_TYPE_P (type))
6121 return -1;
6122
6123 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6124 {
6125 if (TREE_CODE (field) != FIELD_DECL)
6126 continue;
6127
6128 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6129 if (sub_count < 0)
6130 return -1;
6131 count += sub_count;
6132 }
6133
6134 /* There must be no padding. */
cc269bb6 6135 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6136 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6137 != count * GET_MODE_BITSIZE (*modep)))
6138 return -1;
6139
6140 return count;
6141 }
6142
6143 case UNION_TYPE:
6144 case QUAL_UNION_TYPE:
6145 {
6146 /* These aren't very interesting except in a degenerate case. */
6147 int count = 0;
6148 int sub_count;
6149 tree field;
6150
6151 /* Can't handle incomplete types. */
6152 if (!COMPLETE_TYPE_P (type))
6153 return -1;
6154
6155 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6156 {
6157 if (TREE_CODE (field) != FIELD_DECL)
6158 continue;
6159
6160 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6161 if (sub_count < 0)
6162 return -1;
6163 count = count > sub_count ? count : sub_count;
6164 }
6165
6166 /* There must be no padding. */
cc269bb6 6167 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6168 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6169 != count * GET_MODE_BITSIZE (*modep)))
6170 return -1;
6171
6172 return count;
6173 }
6174
6175 default:
6176 break;
6177 }
6178
6179 return -1;
6180}
6181
38e8f663
YR
6182/* Return true if we use LRA instead of reload pass. */
6183static bool
6184aarch64_lra_p (void)
6185{
6186 return aarch64_lra_flag;
6187}
6188
43e9d192
IB
6189/* Return TRUE if the type, as described by TYPE and MODE, is a composite
6190 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6191 array types. The C99 floating-point complex types are also considered
6192 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6193 types, which are GCC extensions and out of the scope of AAPCS64, are
6194 treated as composite types here as well.
6195
6196 Note that MODE itself is not sufficient in determining whether a type
6197 is such a composite type or not. This is because
6198 stor-layout.c:compute_record_mode may have already changed the MODE
6199 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6200 structure with only one field may have its MODE set to the mode of the
6201 field. Also an integer mode whose size matches the size of the
6202 RECORD_TYPE type may be used to substitute the original mode
6203 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6204 solely relied on. */
6205
6206static bool
6207aarch64_composite_type_p (const_tree type,
6208 enum machine_mode mode)
6209{
6210 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6211 return true;
6212
6213 if (mode == BLKmode
6214 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6215 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6216 return true;
6217
6218 return false;
6219}
6220
6221/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6222 type as described in AAPCS64 \S 4.1.2.
6223
6224 See the comment above aarch64_composite_type_p for the notes on MODE. */
6225
6226static bool
6227aarch64_short_vector_p (const_tree type,
6228 enum machine_mode mode)
6229{
6230 HOST_WIDE_INT size = -1;
6231
6232 if (type && TREE_CODE (type) == VECTOR_TYPE)
6233 size = int_size_in_bytes (type);
6234 else if (!aarch64_composite_type_p (type, mode)
6235 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6236 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6237 size = GET_MODE_SIZE (mode);
6238
6239 return (size == 8 || size == 16) ? true : false;
6240}
6241
6242/* Return TRUE if an argument, whose type is described by TYPE and MODE,
6243 shall be passed or returned in simd/fp register(s) (providing these
6244 parameter passing registers are available).
6245
6246 Upon successful return, *COUNT returns the number of needed registers,
6247 *BASE_MODE returns the mode of the individual register and when IS_HAF
6248 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6249 floating-point aggregate or a homogeneous short-vector aggregate. */
6250
6251static bool
6252aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6253 const_tree type,
6254 enum machine_mode *base_mode,
6255 int *count,
6256 bool *is_ha)
6257{
6258 enum machine_mode new_mode = VOIDmode;
6259 bool composite_p = aarch64_composite_type_p (type, mode);
6260
6261 if (is_ha != NULL) *is_ha = false;
6262
6263 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6264 || aarch64_short_vector_p (type, mode))
6265 {
6266 *count = 1;
6267 new_mode = mode;
6268 }
6269 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6270 {
6271 if (is_ha != NULL) *is_ha = true;
6272 *count = 2;
6273 new_mode = GET_MODE_INNER (mode);
6274 }
6275 else if (type && composite_p)
6276 {
6277 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6278
6279 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6280 {
6281 if (is_ha != NULL) *is_ha = true;
6282 *count = ag_count;
6283 }
6284 else
6285 return false;
6286 }
6287 else
6288 return false;
6289
6290 *base_mode = new_mode;
6291 return true;
6292}
6293
6294/* Implement TARGET_STRUCT_VALUE_RTX. */
6295
6296static rtx
6297aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6298 int incoming ATTRIBUTE_UNUSED)
6299{
6300 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6301}
6302
6303/* Implements target hook vector_mode_supported_p. */
6304static bool
6305aarch64_vector_mode_supported_p (enum machine_mode mode)
6306{
6307 if (TARGET_SIMD
6308 && (mode == V4SImode || mode == V8HImode
6309 || mode == V16QImode || mode == V2DImode
6310 || mode == V2SImode || mode == V4HImode
6311 || mode == V8QImode || mode == V2SFmode
6312 || mode == V4SFmode || mode == V2DFmode))
6313 return true;
6314
6315 return false;
6316}
6317
b7342d25
IB
6318/* Return appropriate SIMD container
6319 for MODE within a vector of WIDTH bits. */
43e9d192 6320static enum machine_mode
b7342d25 6321aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6322{
b7342d25 6323 gcc_assert (width == 64 || width == 128);
43e9d192 6324 if (TARGET_SIMD)
b7342d25
IB
6325 {
6326 if (width == 128)
6327 switch (mode)
6328 {
6329 case DFmode:
6330 return V2DFmode;
6331 case SFmode:
6332 return V4SFmode;
6333 case SImode:
6334 return V4SImode;
6335 case HImode:
6336 return V8HImode;
6337 case QImode:
6338 return V16QImode;
6339 case DImode:
6340 return V2DImode;
6341 default:
6342 break;
6343 }
6344 else
6345 switch (mode)
6346 {
6347 case SFmode:
6348 return V2SFmode;
6349 case SImode:
6350 return V2SImode;
6351 case HImode:
6352 return V4HImode;
6353 case QImode:
6354 return V8QImode;
6355 default:
6356 break;
6357 }
6358 }
43e9d192
IB
6359 return word_mode;
6360}
6361
b7342d25
IB
6362/* Return 128-bit container as the preferred SIMD mode for MODE. */
6363static enum machine_mode
6364aarch64_preferred_simd_mode (enum machine_mode mode)
6365{
6366 return aarch64_simd_container_mode (mode, 128);
6367}
6368
3b357264
JG
6369/* Return the bitmask of possible vector sizes for the vectorizer
6370 to iterate over. */
6371static unsigned int
6372aarch64_autovectorize_vector_sizes (void)
6373{
6374 return (16 | 8);
6375}
6376
c6fc9e43
YZ
6377/* A table to help perform AArch64-specific name mangling for AdvSIMD
6378 vector types in order to conform to the AAPCS64 (see "Procedure
6379 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6380 qualify for emission with the mangled names defined in that document,
6381 a vector type must not only be of the correct mode but also be
6382 composed of AdvSIMD vector element types (e.g.
6383 _builtin_aarch64_simd_qi); these types are registered by
6384 aarch64_init_simd_builtins (). In other words, vector types defined
6385 in other ways e.g. via vector_size attribute will get default
6386 mangled names. */
6387typedef struct
6388{
6389 enum machine_mode mode;
6390 const char *element_type_name;
6391 const char *mangled_name;
6392} aarch64_simd_mangle_map_entry;
6393
6394static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6395 /* 64-bit containerized types. */
6396 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6397 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6398 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6399 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6400 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6401 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6402 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6403 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6404 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6405 /* 128-bit containerized types. */
6406 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6407 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6408 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6409 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6410 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6411 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6412 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6413 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6414 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6415 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6416 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6417 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 6418 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
6419 { VOIDmode, NULL, NULL }
6420};
6421
ac2b960f
YZ
6422/* Implement TARGET_MANGLE_TYPE. */
6423
6f549691 6424static const char *
ac2b960f
YZ
6425aarch64_mangle_type (const_tree type)
6426{
6427 /* The AArch64 ABI documents say that "__va_list" has to be
6428 managled as if it is in the "std" namespace. */
6429 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6430 return "St9__va_list";
6431
c6fc9e43
YZ
6432 /* Check the mode of the vector type, and the name of the vector
6433 element type, against the table. */
6434 if (TREE_CODE (type) == VECTOR_TYPE)
6435 {
6436 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6437
6438 while (pos->mode != VOIDmode)
6439 {
6440 tree elt_type = TREE_TYPE (type);
6441
6442 if (pos->mode == TYPE_MODE (type)
6443 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6444 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6445 pos->element_type_name))
6446 return pos->mangled_name;
6447
6448 pos++;
6449 }
6450 }
6451
ac2b960f
YZ
6452 /* Use the default mangling. */
6453 return NULL;
6454}
6455
43e9d192 6456/* Return the equivalent letter for size. */
81c2dfb9 6457static char
43e9d192
IB
6458sizetochar (int size)
6459{
6460 switch (size)
6461 {
6462 case 64: return 'd';
6463 case 32: return 's';
6464 case 16: return 'h';
6465 case 8 : return 'b';
6466 default: gcc_unreachable ();
6467 }
6468}
6469
3520f7cc
JG
6470/* Return true iff x is a uniform vector of floating-point
6471 constants, and the constant can be represented in
6472 quarter-precision form. Note, as aarch64_float_const_representable
6473 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6474static bool
6475aarch64_vect_float_const_representable_p (rtx x)
6476{
6477 int i = 0;
6478 REAL_VALUE_TYPE r0, ri;
6479 rtx x0, xi;
6480
6481 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6482 return false;
6483
6484 x0 = CONST_VECTOR_ELT (x, 0);
6485 if (!CONST_DOUBLE_P (x0))
6486 return false;
6487
6488 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6489
6490 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6491 {
6492 xi = CONST_VECTOR_ELT (x, i);
6493 if (!CONST_DOUBLE_P (xi))
6494 return false;
6495
6496 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6497 if (!REAL_VALUES_EQUAL (r0, ri))
6498 return false;
6499 }
6500
6501 return aarch64_float_const_representable_p (x0);
6502}
6503
d8edd899 6504/* Return true for valid and false for invalid. */
3ea63f60 6505bool
48063b9d
IB
6506aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6507 struct simd_immediate_info *info)
43e9d192
IB
6508{
6509#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6510 matches = 1; \
6511 for (i = 0; i < idx; i += (STRIDE)) \
6512 if (!(TEST)) \
6513 matches = 0; \
6514 if (matches) \
6515 { \
6516 immtype = (CLASS); \
6517 elsize = (ELSIZE); \
43e9d192
IB
6518 eshift = (SHIFT); \
6519 emvn = (NEG); \
6520 break; \
6521 }
6522
6523 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6524 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6525 unsigned char bytes[16];
43e9d192
IB
6526 int immtype = -1, matches;
6527 unsigned int invmask = inverse ? 0xff : 0;
6528 int eshift, emvn;
6529
43e9d192 6530 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6531 {
81c2dfb9
IB
6532 if (! (aarch64_simd_imm_zero_p (op, mode)
6533 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6534 return false;
3520f7cc 6535
48063b9d
IB
6536 if (info)
6537 {
6538 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6539 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6540 info->mvn = false;
6541 info->shift = 0;
6542 }
3520f7cc 6543
d8edd899 6544 return true;
3520f7cc 6545 }
43e9d192
IB
6546
6547 /* Splat vector constant out into a byte vector. */
6548 for (i = 0; i < n_elts; i++)
6549 {
6550 rtx el = CONST_VECTOR_ELT (op, i);
6551 unsigned HOST_WIDE_INT elpart;
6552 unsigned int part, parts;
6553
6554 if (GET_CODE (el) == CONST_INT)
6555 {
6556 elpart = INTVAL (el);
6557 parts = 1;
6558 }
6559 else if (GET_CODE (el) == CONST_DOUBLE)
6560 {
6561 elpart = CONST_DOUBLE_LOW (el);
6562 parts = 2;
6563 }
6564 else
6565 gcc_unreachable ();
6566
6567 for (part = 0; part < parts; part++)
6568 {
6569 unsigned int byte;
6570 for (byte = 0; byte < innersize; byte++)
6571 {
6572 bytes[idx++] = (elpart & 0xff) ^ invmask;
6573 elpart >>= BITS_PER_UNIT;
6574 }
6575 if (GET_CODE (el) == CONST_DOUBLE)
6576 elpart = CONST_DOUBLE_HIGH (el);
6577 }
6578 }
6579
6580 /* Sanity check. */
6581 gcc_assert (idx == GET_MODE_SIZE (mode));
6582
6583 do
6584 {
6585 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6586 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6587
6588 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6589 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6590
6591 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6592 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6593
6594 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6595 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6596
6597 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6598
6599 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6600
6601 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6602 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6603
6604 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6605 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6606
6607 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6608 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6609
6610 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6611 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6612
6613 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6614
6615 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6616
6617 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 6618 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
6619
6620 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 6621 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
6622
6623 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 6624 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
6625
6626 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 6627 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
6628
6629 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6630
6631 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6632 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6633 }
6634 while (0);
6635
e4f0f84d 6636 if (immtype == -1)
d8edd899 6637 return false;
43e9d192 6638
48063b9d 6639 if (info)
43e9d192 6640 {
48063b9d 6641 info->element_width = elsize;
48063b9d
IB
6642 info->mvn = emvn != 0;
6643 info->shift = eshift;
6644
43e9d192
IB
6645 unsigned HOST_WIDE_INT imm = 0;
6646
e4f0f84d
TB
6647 if (immtype >= 12 && immtype <= 15)
6648 info->msl = true;
6649
43e9d192
IB
6650 /* Un-invert bytes of recognized vector, if necessary. */
6651 if (invmask != 0)
6652 for (i = 0; i < idx; i++)
6653 bytes[i] ^= invmask;
6654
6655 if (immtype == 17)
6656 {
6657 /* FIXME: Broken on 32-bit H_W_I hosts. */
6658 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6659
6660 for (i = 0; i < 8; i++)
6661 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6662 << (i * BITS_PER_UNIT);
6663
43e9d192 6664
48063b9d
IB
6665 info->value = GEN_INT (imm);
6666 }
6667 else
6668 {
6669 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6670 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6671
6672 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6673 generic constants. */
6674 if (info->mvn)
43e9d192 6675 imm = ~imm;
48063b9d
IB
6676 imm = (imm >> info->shift) & 0xff;
6677 info->value = GEN_INT (imm);
6678 }
43e9d192
IB
6679 }
6680
48063b9d 6681 return true;
43e9d192
IB
6682#undef CHECK
6683}
6684
43e9d192
IB
6685static bool
6686aarch64_const_vec_all_same_int_p (rtx x,
6687 HOST_WIDE_INT minval,
6688 HOST_WIDE_INT maxval)
6689{
6690 HOST_WIDE_INT firstval;
6691 int count, i;
6692
6693 if (GET_CODE (x) != CONST_VECTOR
6694 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6695 return false;
6696
6697 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6698 if (firstval < minval || firstval > maxval)
6699 return false;
6700
6701 count = CONST_VECTOR_NUNITS (x);
6702 for (i = 1; i < count; i++)
6703 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6704 return false;
6705
6706 return true;
6707}
6708
6709/* Check of immediate shift constants are within range. */
6710bool
6711aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6712{
6713 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6714 if (left)
6715 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6716 else
6717 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6718}
6719
3520f7cc
JG
6720/* Return true if X is a uniform vector where all elements
6721 are either the floating-point constant 0.0 or the
6722 integer constant 0. */
43e9d192
IB
6723bool
6724aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6725{
3520f7cc 6726 return x == CONST0_RTX (mode);
43e9d192
IB
6727}
6728
6729bool
6730aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6731{
6732 HOST_WIDE_INT imm = INTVAL (x);
6733 int i;
6734
6735 for (i = 0; i < 8; i++)
6736 {
6737 unsigned int byte = imm & 0xff;
6738 if (byte != 0xff && byte != 0)
6739 return false;
6740 imm >>= 8;
6741 }
6742
6743 return true;
6744}
6745
83f8c414
CSS
6746bool
6747aarch64_mov_operand_p (rtx x,
a5350ddc 6748 enum aarch64_symbol_context context,
83f8c414
CSS
6749 enum machine_mode mode)
6750{
83f8c414
CSS
6751 if (GET_CODE (x) == HIGH
6752 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6753 return true;
6754
6755 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6756 return true;
6757
6758 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6759 return true;
6760
a5350ddc
CSS
6761 return aarch64_classify_symbolic_expression (x, context)
6762 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6763}
6764
43e9d192
IB
6765/* Return a const_int vector of VAL. */
6766rtx
6767aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6768{
6769 int nunits = GET_MODE_NUNITS (mode);
6770 rtvec v = rtvec_alloc (nunits);
6771 int i;
6772
6773 for (i=0; i < nunits; i++)
6774 RTVEC_ELT (v, i) = GEN_INT (val);
6775
6776 return gen_rtx_CONST_VECTOR (mode, v);
6777}
6778
051d0e2f
SN
6779/* Check OP is a legal scalar immediate for the MOVI instruction. */
6780
6781bool
6782aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6783{
6784 enum machine_mode vmode;
6785
6786 gcc_assert (!VECTOR_MODE_P (mode));
6787 vmode = aarch64_preferred_simd_mode (mode);
6788 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6789 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6790}
6791
43e9d192
IB
6792/* Construct and return a PARALLEL RTX vector. */
6793rtx
6794aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6795{
6796 int nunits = GET_MODE_NUNITS (mode);
6797 rtvec v = rtvec_alloc (nunits / 2);
6798 int base = high ? nunits / 2 : 0;
6799 rtx t1;
6800 int i;
6801
6802 for (i=0; i < nunits / 2; i++)
6803 RTVEC_ELT (v, i) = GEN_INT (base + i);
6804
6805 t1 = gen_rtx_PARALLEL (mode, v);
6806 return t1;
6807}
6808
6809/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6810 HIGH (exclusive). */
6811void
6812aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6813{
6814 HOST_WIDE_INT lane;
6815 gcc_assert (GET_CODE (operand) == CONST_INT);
6816 lane = INTVAL (operand);
6817
6818 if (lane < low || lane >= high)
6819 error ("lane out of range");
6820}
6821
6822void
6823aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6824{
6825 gcc_assert (GET_CODE (operand) == CONST_INT);
6826 HOST_WIDE_INT lane = INTVAL (operand);
6827
6828 if (lane < low || lane >= high)
6829 error ("constant out of range");
6830}
6831
6832/* Emit code to reinterpret one AdvSIMD type as another,
6833 without altering bits. */
6834void
6835aarch64_simd_reinterpret (rtx dest, rtx src)
6836{
6837 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6838}
6839
6840/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6841 registers). */
6842void
6843aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6844 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6845 rtx op1)
6846{
6847 rtx mem = gen_rtx_MEM (mode, destaddr);
6848 rtx tmp1 = gen_reg_rtx (mode);
6849 rtx tmp2 = gen_reg_rtx (mode);
6850
6851 emit_insn (intfn (tmp1, op1, tmp2));
6852
6853 emit_move_insn (mem, tmp1);
6854 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6855 emit_move_insn (mem, tmp2);
6856}
6857
6858/* Return TRUE if OP is a valid vector addressing mode. */
6859bool
6860aarch64_simd_mem_operand_p (rtx op)
6861{
6862 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6863 || GET_CODE (XEXP (op, 0)) == REG);
6864}
6865
6866/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6867 not to early-clobber SRC registers in the process.
6868
6869 We assume that the operands described by SRC and DEST represent a
6870 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6871 number of components into which the copy has been decomposed. */
6872void
6873aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6874 rtx *src, unsigned int count)
6875{
6876 unsigned int i;
6877
6878 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6879 || REGNO (operands[0]) < REGNO (operands[1]))
6880 {
6881 for (i = 0; i < count; i++)
6882 {
6883 operands[2 * i] = dest[i];
6884 operands[2 * i + 1] = src[i];
6885 }
6886 }
6887 else
6888 {
6889 for (i = 0; i < count; i++)
6890 {
6891 operands[2 * i] = dest[count - i - 1];
6892 operands[2 * i + 1] = src[count - i - 1];
6893 }
6894 }
6895}
6896
6897/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6898 one of VSTRUCT modes: OI, CI or XI. */
6899int
6900aarch64_simd_attr_length_move (rtx insn)
6901{
43e9d192
IB
6902 enum machine_mode mode;
6903
6904 extract_insn_cached (insn);
6905
6906 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6907 {
6908 mode = GET_MODE (recog_data.operand[0]);
6909 switch (mode)
6910 {
6911 case OImode:
6912 return 8;
6913 case CImode:
6914 return 12;
6915 case XImode:
6916 return 16;
6917 default:
6918 gcc_unreachable ();
6919 }
6920 }
6921 return 4;
6922}
6923
db0253a4
TB
6924/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6925 alignment of a vector to 128 bits. */
6926static HOST_WIDE_INT
6927aarch64_simd_vector_alignment (const_tree type)
6928{
9439e9a1 6929 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
6930 return MIN (align, 128);
6931}
6932
6933/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6934static bool
6935aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6936{
6937 if (is_packed)
6938 return false;
6939
6940 /* We guarantee alignment for vectors up to 128-bits. */
6941 if (tree_int_cst_compare (TYPE_SIZE (type),
6942 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6943 return false;
6944
6945 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6946 return true;
6947}
6948
4369c11e
TB
6949/* If VALS is a vector constant that can be loaded into a register
6950 using DUP, generate instructions to do so and return an RTX to
6951 assign to the register. Otherwise return NULL_RTX. */
6952static rtx
6953aarch64_simd_dup_constant (rtx vals)
6954{
6955 enum machine_mode mode = GET_MODE (vals);
6956 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6957 int n_elts = GET_MODE_NUNITS (mode);
6958 bool all_same = true;
6959 rtx x;
6960 int i;
6961
6962 if (GET_CODE (vals) != CONST_VECTOR)
6963 return NULL_RTX;
6964
6965 for (i = 1; i < n_elts; ++i)
6966 {
6967 x = CONST_VECTOR_ELT (vals, i);
6968 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6969 all_same = false;
6970 }
6971
6972 if (!all_same)
6973 return NULL_RTX;
6974
6975 /* We can load this constant by using DUP and a constant in a
6976 single ARM register. This will be cheaper than a vector
6977 load. */
6978 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6979 return gen_rtx_VEC_DUPLICATE (mode, x);
6980}
6981
6982
6983/* Generate code to load VALS, which is a PARALLEL containing only
6984 constants (for vec_init) or CONST_VECTOR, efficiently into a
6985 register. Returns an RTX to copy into the register, or NULL_RTX
6986 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6987static rtx
4369c11e
TB
6988aarch64_simd_make_constant (rtx vals)
6989{
6990 enum machine_mode mode = GET_MODE (vals);
6991 rtx const_dup;
6992 rtx const_vec = NULL_RTX;
6993 int n_elts = GET_MODE_NUNITS (mode);
6994 int n_const = 0;
6995 int i;
6996
6997 if (GET_CODE (vals) == CONST_VECTOR)
6998 const_vec = vals;
6999 else if (GET_CODE (vals) == PARALLEL)
7000 {
7001 /* A CONST_VECTOR must contain only CONST_INTs and
7002 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7003 Only store valid constants in a CONST_VECTOR. */
7004 for (i = 0; i < n_elts; ++i)
7005 {
7006 rtx x = XVECEXP (vals, 0, i);
7007 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7008 n_const++;
7009 }
7010 if (n_const == n_elts)
7011 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7012 }
7013 else
7014 gcc_unreachable ();
7015
7016 if (const_vec != NULL_RTX
48063b9d 7017 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
7018 /* Load using MOVI/MVNI. */
7019 return const_vec;
7020 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7021 /* Loaded using DUP. */
7022 return const_dup;
7023 else if (const_vec != NULL_RTX)
7024 /* Load from constant pool. We can not take advantage of single-cycle
7025 LD1 because we need a PC-relative addressing mode. */
7026 return const_vec;
7027 else
7028 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7029 We can not construct an initializer. */
7030 return NULL_RTX;
7031}
7032
7033void
7034aarch64_expand_vector_init (rtx target, rtx vals)
7035{
7036 enum machine_mode mode = GET_MODE (target);
7037 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7038 int n_elts = GET_MODE_NUNITS (mode);
7039 int n_var = 0, one_var = -1;
7040 bool all_same = true;
7041 rtx x, mem;
7042 int i;
7043
7044 x = XVECEXP (vals, 0, 0);
7045 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7046 n_var = 1, one_var = 0;
7047
7048 for (i = 1; i < n_elts; ++i)
7049 {
7050 x = XVECEXP (vals, 0, i);
7051 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7052 ++n_var, one_var = i;
7053
7054 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7055 all_same = false;
7056 }
7057
7058 if (n_var == 0)
7059 {
7060 rtx constant = aarch64_simd_make_constant (vals);
7061 if (constant != NULL_RTX)
7062 {
7063 emit_move_insn (target, constant);
7064 return;
7065 }
7066 }
7067
7068 /* Splat a single non-constant element if we can. */
7069 if (all_same)
7070 {
7071 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7072 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7073 return;
7074 }
7075
7076 /* One field is non-constant. Load constant then overwrite varying
7077 field. This is more efficient than using the stack. */
7078 if (n_var == 1)
7079 {
7080 rtx copy = copy_rtx (vals);
7081 rtx index = GEN_INT (one_var);
7082 enum insn_code icode;
7083
7084 /* Load constant part of vector, substitute neighboring value for
7085 varying element. */
7086 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7087 aarch64_expand_vector_init (target, copy);
7088
7089 /* Insert variable. */
7090 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7091 icode = optab_handler (vec_set_optab, mode);
7092 gcc_assert (icode != CODE_FOR_nothing);
7093 emit_insn (GEN_FCN (icode) (target, x, index));
7094 return;
7095 }
7096
7097 /* Construct the vector in memory one field at a time
7098 and load the whole vector. */
7099 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7100 for (i = 0; i < n_elts; i++)
7101 emit_move_insn (adjust_address_nv (mem, inner_mode,
7102 i * GET_MODE_SIZE (inner_mode)),
7103 XVECEXP (vals, 0, i));
7104 emit_move_insn (target, mem);
7105
7106}
7107
43e9d192
IB
7108static unsigned HOST_WIDE_INT
7109aarch64_shift_truncation_mask (enum machine_mode mode)
7110{
7111 return
7112 (aarch64_vector_mode_supported_p (mode)
7113 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7114}
7115
7116#ifndef TLS_SECTION_ASM_FLAG
7117#define TLS_SECTION_ASM_FLAG 'T'
7118#endif
7119
7120void
7121aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7122 tree decl ATTRIBUTE_UNUSED)
7123{
7124 char flagchars[10], *f = flagchars;
7125
7126 /* If we have already declared this section, we can use an
7127 abbreviated form to switch back to it -- unless this section is
7128 part of a COMDAT groups, in which case GAS requires the full
7129 declaration every time. */
7130 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7131 && (flags & SECTION_DECLARED))
7132 {
7133 fprintf (asm_out_file, "\t.section\t%s\n", name);
7134 return;
7135 }
7136
7137 if (!(flags & SECTION_DEBUG))
7138 *f++ = 'a';
7139 if (flags & SECTION_WRITE)
7140 *f++ = 'w';
7141 if (flags & SECTION_CODE)
7142 *f++ = 'x';
7143 if (flags & SECTION_SMALL)
7144 *f++ = 's';
7145 if (flags & SECTION_MERGE)
7146 *f++ = 'M';
7147 if (flags & SECTION_STRINGS)
7148 *f++ = 'S';
7149 if (flags & SECTION_TLS)
7150 *f++ = TLS_SECTION_ASM_FLAG;
7151 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7152 *f++ = 'G';
7153 *f = '\0';
7154
7155 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7156
7157 if (!(flags & SECTION_NOTYPE))
7158 {
7159 const char *type;
7160 const char *format;
7161
7162 if (flags & SECTION_BSS)
7163 type = "nobits";
7164 else
7165 type = "progbits";
7166
7167#ifdef TYPE_OPERAND_FMT
7168 format = "," TYPE_OPERAND_FMT;
7169#else
7170 format = ",@%s";
7171#endif
7172
7173 fprintf (asm_out_file, format, type);
7174
7175 if (flags & SECTION_ENTSIZE)
7176 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7177 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7178 {
7179 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7180 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7181 else
7182 fprintf (asm_out_file, ",%s,comdat",
7183 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7184 }
7185 }
7186
7187 putc ('\n', asm_out_file);
7188}
7189
7190/* Select a format to encode pointers in exception handling data. */
7191int
7192aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7193{
7194 int type;
7195 switch (aarch64_cmodel)
7196 {
7197 case AARCH64_CMODEL_TINY:
7198 case AARCH64_CMODEL_TINY_PIC:
7199 case AARCH64_CMODEL_SMALL:
7200 case AARCH64_CMODEL_SMALL_PIC:
7201 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7202 for everything. */
7203 type = DW_EH_PE_sdata4;
7204 break;
7205 default:
7206 /* No assumptions here. 8-byte relocs required. */
7207 type = DW_EH_PE_sdata8;
7208 break;
7209 }
7210 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7211}
7212
0462169c
SN
7213/* Emit load exclusive. */
7214
7215static void
7216aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7217 rtx mem, rtx model_rtx)
7218{
7219 rtx (*gen) (rtx, rtx, rtx);
7220
7221 switch (mode)
7222 {
7223 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7224 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7225 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7226 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7227 default:
7228 gcc_unreachable ();
7229 }
7230
7231 emit_insn (gen (rval, mem, model_rtx));
7232}
7233
7234/* Emit store exclusive. */
7235
7236static void
7237aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7238 rtx rval, rtx mem, rtx model_rtx)
7239{
7240 rtx (*gen) (rtx, rtx, rtx, rtx);
7241
7242 switch (mode)
7243 {
7244 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7245 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7246 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7247 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7248 default:
7249 gcc_unreachable ();
7250 }
7251
7252 emit_insn (gen (bval, rval, mem, model_rtx));
7253}
7254
7255/* Mark the previous jump instruction as unlikely. */
7256
7257static void
7258aarch64_emit_unlikely_jump (rtx insn)
7259{
e5af9ddd 7260 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
7261
7262 insn = emit_jump_insn (insn);
e5af9ddd 7263 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
7264}
7265
7266/* Expand a compare and swap pattern. */
7267
7268void
7269aarch64_expand_compare_and_swap (rtx operands[])
7270{
7271 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7272 enum machine_mode mode, cmp_mode;
7273 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7274
7275 bval = operands[0];
7276 rval = operands[1];
7277 mem = operands[2];
7278 oldval = operands[3];
7279 newval = operands[4];
7280 is_weak = operands[5];
7281 mod_s = operands[6];
7282 mod_f = operands[7];
7283 mode = GET_MODE (mem);
7284 cmp_mode = mode;
7285
7286 /* Normally the succ memory model must be stronger than fail, but in the
7287 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7288 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7289
7290 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7291 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7292 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7293
7294 switch (mode)
7295 {
7296 case QImode:
7297 case HImode:
7298 /* For short modes, we're going to perform the comparison in SImode,
7299 so do the zero-extension now. */
7300 cmp_mode = SImode;
7301 rval = gen_reg_rtx (SImode);
7302 oldval = convert_modes (SImode, mode, oldval, true);
7303 /* Fall through. */
7304
7305 case SImode:
7306 case DImode:
7307 /* Force the value into a register if needed. */
7308 if (!aarch64_plus_operand (oldval, mode))
7309 oldval = force_reg (cmp_mode, oldval);
7310 break;
7311
7312 default:
7313 gcc_unreachable ();
7314 }
7315
7316 switch (mode)
7317 {
7318 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7319 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7320 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7321 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7322 default:
7323 gcc_unreachable ();
7324 }
7325
7326 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7327
7328 if (mode == QImode || mode == HImode)
7329 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7330
7331 x = gen_rtx_REG (CCmode, CC_REGNUM);
7332 x = gen_rtx_EQ (SImode, x, const0_rtx);
7333 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7334}
7335
7336/* Split a compare and swap pattern. */
7337
7338void
7339aarch64_split_compare_and_swap (rtx operands[])
7340{
7341 rtx rval, mem, oldval, newval, scratch;
7342 enum machine_mode mode;
0462169c
SN
7343 bool is_weak;
7344 rtx label1, label2, x, cond;
7345
7346 rval = operands[0];
7347 mem = operands[1];
7348 oldval = operands[2];
7349 newval = operands[3];
7350 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7351 scratch = operands[7];
7352 mode = GET_MODE (mem);
7353
7354 label1 = NULL_RTX;
7355 if (!is_weak)
7356 {
7357 label1 = gen_label_rtx ();
7358 emit_label (label1);
7359 }
7360 label2 = gen_label_rtx ();
7361
7362 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7363
7364 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7365 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7366 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7367 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7368 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7369
7370 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7371
7372 if (!is_weak)
7373 {
7374 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7375 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7376 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7377 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7378 }
7379 else
7380 {
7381 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7382 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7383 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7384 }
7385
7386 emit_label (label2);
7387}
7388
7389/* Split an atomic operation. */
7390
7391void
7392aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7393 rtx value, rtx model_rtx, rtx cond)
7394{
7395 enum machine_mode mode = GET_MODE (mem);
7396 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7397 rtx label, x;
7398
7399 label = gen_label_rtx ();
7400 emit_label (label);
7401
7402 if (new_out)
7403 new_out = gen_lowpart (wmode, new_out);
7404 if (old_out)
7405 old_out = gen_lowpart (wmode, old_out);
7406 else
7407 old_out = new_out;
7408 value = simplify_gen_subreg (wmode, value, mode, 0);
7409
7410 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7411
7412 switch (code)
7413 {
7414 case SET:
7415 new_out = value;
7416 break;
7417
7418 case NOT:
7419 x = gen_rtx_AND (wmode, old_out, value);
7420 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7421 x = gen_rtx_NOT (wmode, new_out);
7422 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7423 break;
7424
7425 case MINUS:
7426 if (CONST_INT_P (value))
7427 {
7428 value = GEN_INT (-INTVAL (value));
7429 code = PLUS;
7430 }
7431 /* Fall through. */
7432
7433 default:
7434 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7435 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7436 break;
7437 }
7438
7439 aarch64_emit_store_exclusive (mode, cond, mem,
7440 gen_lowpart (mode, new_out), model_rtx);
7441
7442 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7443 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7444 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7445 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7446}
7447
95ca411e
YZ
7448static void
7449aarch64_print_extension (void)
7450{
7451 const struct aarch64_option_extension *opt = NULL;
7452
7453 for (opt = all_extensions; opt->name != NULL; opt++)
7454 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7455 asm_fprintf (asm_out_file, "+%s", opt->name);
7456
7457 asm_fprintf (asm_out_file, "\n");
7458}
7459
43e9d192
IB
7460static void
7461aarch64_start_file (void)
7462{
7463 if (selected_arch)
95ca411e
YZ
7464 {
7465 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7466 aarch64_print_extension ();
7467 }
43e9d192 7468 else if (selected_cpu)
95ca411e 7469 {
682287fb
JG
7470 const char *truncated_name
7471 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7472 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
7473 aarch64_print_extension ();
7474 }
43e9d192
IB
7475 default_file_start();
7476}
7477
7478/* Target hook for c_mode_for_suffix. */
7479static enum machine_mode
7480aarch64_c_mode_for_suffix (char suffix)
7481{
7482 if (suffix == 'q')
7483 return TFmode;
7484
7485 return VOIDmode;
7486}
7487
3520f7cc
JG
7488/* We can only represent floating point constants which will fit in
7489 "quarter-precision" values. These values are characterised by
7490 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7491 by:
7492
7493 (-1)^s * (n/16) * 2^r
7494
7495 Where:
7496 's' is the sign bit.
7497 'n' is an integer in the range 16 <= n <= 31.
7498 'r' is an integer in the range -3 <= r <= 4. */
7499
7500/* Return true iff X can be represented by a quarter-precision
7501 floating point immediate operand X. Note, we cannot represent 0.0. */
7502bool
7503aarch64_float_const_representable_p (rtx x)
7504{
7505 /* This represents our current view of how many bits
7506 make up the mantissa. */
7507 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7508 int exponent;
3520f7cc
JG
7509 unsigned HOST_WIDE_INT mantissa, mask;
7510 HOST_WIDE_INT m1, m2;
7511 REAL_VALUE_TYPE r, m;
7512
7513 if (!CONST_DOUBLE_P (x))
7514 return false;
7515
7516 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7517
7518 /* We cannot represent infinities, NaNs or +/-zero. We won't
7519 know if we have +zero until we analyse the mantissa, but we
7520 can reject the other invalid values. */
7521 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7522 || REAL_VALUE_MINUS_ZERO (r))
7523 return false;
7524
ba96cdfb 7525 /* Extract exponent. */
3520f7cc
JG
7526 r = real_value_abs (&r);
7527 exponent = REAL_EXP (&r);
7528
7529 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7530 highest (sign) bit, with a fixed binary point at bit point_pos.
7531 m1 holds the low part of the mantissa, m2 the high part.
7532 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7533 bits for the mantissa, this can fail (low bits will be lost). */
7534 real_ldexp (&m, &r, point_pos - exponent);
7535 REAL_VALUE_TO_INT (&m1, &m2, m);
7536
7537 /* If the low part of the mantissa has bits set we cannot represent
7538 the value. */
7539 if (m1 != 0)
7540 return false;
7541 /* We have rejected the lower HOST_WIDE_INT, so update our
7542 understanding of how many bits lie in the mantissa and
7543 look only at the high HOST_WIDE_INT. */
7544 mantissa = m2;
7545 point_pos -= HOST_BITS_PER_WIDE_INT;
7546
7547 /* We can only represent values with a mantissa of the form 1.xxxx. */
7548 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7549 if ((mantissa & mask) != 0)
7550 return false;
7551
7552 /* Having filtered unrepresentable values, we may now remove all
7553 but the highest 5 bits. */
7554 mantissa >>= point_pos - 5;
7555
7556 /* We cannot represent the value 0.0, so reject it. This is handled
7557 elsewhere. */
7558 if (mantissa == 0)
7559 return false;
7560
7561 /* Then, as bit 4 is always set, we can mask it off, leaving
7562 the mantissa in the range [0, 15]. */
7563 mantissa &= ~(1 << 4);
7564 gcc_assert (mantissa <= 15);
7565
7566 /* GCC internally does not use IEEE754-like encoding (where normalized
7567 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7568 Our mantissa values are shifted 4 places to the left relative to
7569 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7570 by 5 places to correct for GCC's representation. */
7571 exponent = 5 - exponent;
7572
7573 return (exponent >= 0 && exponent <= 7);
7574}
7575
7576char*
81c2dfb9 7577aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7578 enum machine_mode mode,
7579 unsigned width)
7580{
3ea63f60 7581 bool is_valid;
3520f7cc 7582 static char templ[40];
3520f7cc 7583 const char *mnemonic;
e4f0f84d 7584 const char *shift_op;
3520f7cc 7585 unsigned int lane_count = 0;
81c2dfb9 7586 char element_char;
3520f7cc 7587
e4f0f84d 7588 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
7589
7590 /* This will return true to show const_vector is legal for use as either
7591 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7592 also update INFO to show how the immediate should be generated. */
81c2dfb9 7593 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7594 gcc_assert (is_valid);
7595
81c2dfb9 7596 element_char = sizetochar (info.element_width);
48063b9d
IB
7597 lane_count = width / info.element_width;
7598
3520f7cc
JG
7599 mode = GET_MODE_INNER (mode);
7600 if (mode == SFmode || mode == DFmode)
7601 {
48063b9d
IB
7602 gcc_assert (info.shift == 0 && ! info.mvn);
7603 if (aarch64_float_const_zero_rtx_p (info.value))
7604 info.value = GEN_INT (0);
7605 else
7606 {
7607#define buf_size 20
7608 REAL_VALUE_TYPE r;
7609 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7610 char float_buf[buf_size] = {'\0'};
7611 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7612#undef buf_size
7613
7614 if (lane_count == 1)
7615 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7616 else
7617 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7618 lane_count, element_char, float_buf);
48063b9d
IB
7619 return templ;
7620 }
3520f7cc 7621 }
3520f7cc 7622
48063b9d 7623 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 7624 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
7625
7626 if (lane_count == 1)
48063b9d
IB
7627 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7628 mnemonic, UINTVAL (info.value));
7629 else if (info.shift)
7630 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
7631 ", %s %d", mnemonic, lane_count, element_char,
7632 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 7633 else
48063b9d 7634 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7635 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7636 return templ;
7637}
7638
b7342d25
IB
7639char*
7640aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7641 enum machine_mode mode)
7642{
7643 enum machine_mode vmode;
7644
7645 gcc_assert (!VECTOR_MODE_P (mode));
7646 vmode = aarch64_simd_container_mode (mode, 64);
7647 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7648 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7649}
7650
88b08073
JG
7651/* Split operands into moves from op[1] + op[2] into op[0]. */
7652
7653void
7654aarch64_split_combinev16qi (rtx operands[3])
7655{
7656 unsigned int dest = REGNO (operands[0]);
7657 unsigned int src1 = REGNO (operands[1]);
7658 unsigned int src2 = REGNO (operands[2]);
7659 enum machine_mode halfmode = GET_MODE (operands[1]);
7660 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7661 rtx destlo, desthi;
7662
7663 gcc_assert (halfmode == V16QImode);
7664
7665 if (src1 == dest && src2 == dest + halfregs)
7666 {
7667 /* No-op move. Can't split to nothing; emit something. */
7668 emit_note (NOTE_INSN_DELETED);
7669 return;
7670 }
7671
7672 /* Preserve register attributes for variable tracking. */
7673 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7674 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7675 GET_MODE_SIZE (halfmode));
7676
7677 /* Special case of reversed high/low parts. */
7678 if (reg_overlap_mentioned_p (operands[2], destlo)
7679 && reg_overlap_mentioned_p (operands[1], desthi))
7680 {
7681 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7682 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7683 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7684 }
7685 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7686 {
7687 /* Try to avoid unnecessary moves if part of the result
7688 is in the right place already. */
7689 if (src1 != dest)
7690 emit_move_insn (destlo, operands[1]);
7691 if (src2 != dest + halfregs)
7692 emit_move_insn (desthi, operands[2]);
7693 }
7694 else
7695 {
7696 if (src2 != dest + halfregs)
7697 emit_move_insn (desthi, operands[2]);
7698 if (src1 != dest)
7699 emit_move_insn (destlo, operands[1]);
7700 }
7701}
7702
7703/* vec_perm support. */
7704
7705#define MAX_VECT_LEN 16
7706
7707struct expand_vec_perm_d
7708{
7709 rtx target, op0, op1;
7710 unsigned char perm[MAX_VECT_LEN];
7711 enum machine_mode vmode;
7712 unsigned char nelt;
7713 bool one_vector_p;
7714 bool testing_p;
7715};
7716
7717/* Generate a variable permutation. */
7718
7719static void
7720aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7721{
7722 enum machine_mode vmode = GET_MODE (target);
7723 bool one_vector_p = rtx_equal_p (op0, op1);
7724
7725 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7726 gcc_checking_assert (GET_MODE (op0) == vmode);
7727 gcc_checking_assert (GET_MODE (op1) == vmode);
7728 gcc_checking_assert (GET_MODE (sel) == vmode);
7729 gcc_checking_assert (TARGET_SIMD);
7730
7731 if (one_vector_p)
7732 {
7733 if (vmode == V8QImode)
7734 {
7735 /* Expand the argument to a V16QI mode by duplicating it. */
7736 rtx pair = gen_reg_rtx (V16QImode);
7737 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7738 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7739 }
7740 else
7741 {
7742 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7743 }
7744 }
7745 else
7746 {
7747 rtx pair;
7748
7749 if (vmode == V8QImode)
7750 {
7751 pair = gen_reg_rtx (V16QImode);
7752 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7753 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7754 }
7755 else
7756 {
7757 pair = gen_reg_rtx (OImode);
7758 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7759 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7760 }
7761 }
7762}
7763
7764void
7765aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7766{
7767 enum machine_mode vmode = GET_MODE (target);
7768 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7769 bool one_vector_p = rtx_equal_p (op0, op1);
7770 rtx rmask[MAX_VECT_LEN], mask;
7771
7772 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7773
7774 /* The TBL instruction does not use a modulo index, so we must take care
7775 of that ourselves. */
7776 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7777 for (i = 0; i < nelt; ++i)
7778 rmask[i] = mask;
7779 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7780 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7781
7782 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7783}
7784
cc4d934f
JG
7785/* Recognize patterns suitable for the TRN instructions. */
7786static bool
7787aarch64_evpc_trn (struct expand_vec_perm_d *d)
7788{
7789 unsigned int i, odd, mask, nelt = d->nelt;
7790 rtx out, in0, in1, x;
7791 rtx (*gen) (rtx, rtx, rtx);
7792 enum machine_mode vmode = d->vmode;
7793
7794 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7795 return false;
7796
7797 /* Note that these are little-endian tests.
7798 We correct for big-endian later. */
7799 if (d->perm[0] == 0)
7800 odd = 0;
7801 else if (d->perm[0] == 1)
7802 odd = 1;
7803 else
7804 return false;
7805 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7806
7807 for (i = 0; i < nelt; i += 2)
7808 {
7809 if (d->perm[i] != i + odd)
7810 return false;
7811 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7812 return false;
7813 }
7814
7815 /* Success! */
7816 if (d->testing_p)
7817 return true;
7818
7819 in0 = d->op0;
7820 in1 = d->op1;
7821 if (BYTES_BIG_ENDIAN)
7822 {
7823 x = in0, in0 = in1, in1 = x;
7824 odd = !odd;
7825 }
7826 out = d->target;
7827
7828 if (odd)
7829 {
7830 switch (vmode)
7831 {
7832 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7833 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7834 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7835 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7836 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7837 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7838 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7839 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7840 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7841 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7842 default:
7843 return false;
7844 }
7845 }
7846 else
7847 {
7848 switch (vmode)
7849 {
7850 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7851 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7852 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7853 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7854 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7855 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7856 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7857 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7858 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7859 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7860 default:
7861 return false;
7862 }
7863 }
7864
7865 emit_insn (gen (out, in0, in1));
7866 return true;
7867}
7868
7869/* Recognize patterns suitable for the UZP instructions. */
7870static bool
7871aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7872{
7873 unsigned int i, odd, mask, nelt = d->nelt;
7874 rtx out, in0, in1, x;
7875 rtx (*gen) (rtx, rtx, rtx);
7876 enum machine_mode vmode = d->vmode;
7877
7878 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7879 return false;
7880
7881 /* Note that these are little-endian tests.
7882 We correct for big-endian later. */
7883 if (d->perm[0] == 0)
7884 odd = 0;
7885 else if (d->perm[0] == 1)
7886 odd = 1;
7887 else
7888 return false;
7889 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7890
7891 for (i = 0; i < nelt; i++)
7892 {
7893 unsigned elt = (i * 2 + odd) & mask;
7894 if (d->perm[i] != elt)
7895 return false;
7896 }
7897
7898 /* Success! */
7899 if (d->testing_p)
7900 return true;
7901
7902 in0 = d->op0;
7903 in1 = d->op1;
7904 if (BYTES_BIG_ENDIAN)
7905 {
7906 x = in0, in0 = in1, in1 = x;
7907 odd = !odd;
7908 }
7909 out = d->target;
7910
7911 if (odd)
7912 {
7913 switch (vmode)
7914 {
7915 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7916 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7917 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7918 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7919 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7920 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7921 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7922 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7923 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7924 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7925 default:
7926 return false;
7927 }
7928 }
7929 else
7930 {
7931 switch (vmode)
7932 {
7933 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7934 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7935 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7936 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7937 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7938 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7939 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7940 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7941 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7942 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7943 default:
7944 return false;
7945 }
7946 }
7947
7948 emit_insn (gen (out, in0, in1));
7949 return true;
7950}
7951
7952/* Recognize patterns suitable for the ZIP instructions. */
7953static bool
7954aarch64_evpc_zip (struct expand_vec_perm_d *d)
7955{
7956 unsigned int i, high, mask, nelt = d->nelt;
7957 rtx out, in0, in1, x;
7958 rtx (*gen) (rtx, rtx, rtx);
7959 enum machine_mode vmode = d->vmode;
7960
7961 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7962 return false;
7963
7964 /* Note that these are little-endian tests.
7965 We correct for big-endian later. */
7966 high = nelt / 2;
7967 if (d->perm[0] == high)
7968 /* Do Nothing. */
7969 ;
7970 else if (d->perm[0] == 0)
7971 high = 0;
7972 else
7973 return false;
7974 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7975
7976 for (i = 0; i < nelt / 2; i++)
7977 {
7978 unsigned elt = (i + high) & mask;
7979 if (d->perm[i * 2] != elt)
7980 return false;
7981 elt = (elt + nelt) & mask;
7982 if (d->perm[i * 2 + 1] != elt)
7983 return false;
7984 }
7985
7986 /* Success! */
7987 if (d->testing_p)
7988 return true;
7989
7990 in0 = d->op0;
7991 in1 = d->op1;
7992 if (BYTES_BIG_ENDIAN)
7993 {
7994 x = in0, in0 = in1, in1 = x;
7995 high = !high;
7996 }
7997 out = d->target;
7998
7999 if (high)
8000 {
8001 switch (vmode)
8002 {
8003 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8004 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8005 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8006 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8007 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8008 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8009 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8010 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8011 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8012 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8013 default:
8014 return false;
8015 }
8016 }
8017 else
8018 {
8019 switch (vmode)
8020 {
8021 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8022 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8023 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8024 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8025 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8026 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8027 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8028 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8029 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8030 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8031 default:
8032 return false;
8033 }
8034 }
8035
8036 emit_insn (gen (out, in0, in1));
8037 return true;
8038}
8039
91bd4114
JG
8040static bool
8041aarch64_evpc_dup (struct expand_vec_perm_d *d)
8042{
8043 rtx (*gen) (rtx, rtx, rtx);
8044 rtx out = d->target;
8045 rtx in0;
8046 enum machine_mode vmode = d->vmode;
8047 unsigned int i, elt, nelt = d->nelt;
8048 rtx lane;
8049
8050 /* TODO: This may not be big-endian safe. */
8051 if (BYTES_BIG_ENDIAN)
8052 return false;
8053
8054 elt = d->perm[0];
8055 for (i = 1; i < nelt; i++)
8056 {
8057 if (elt != d->perm[i])
8058 return false;
8059 }
8060
8061 /* The generic preparation in aarch64_expand_vec_perm_const_1
8062 swaps the operand order and the permute indices if it finds
8063 d->perm[0] to be in the second operand. Thus, we can always
8064 use d->op0 and need not do any extra arithmetic to get the
8065 correct lane number. */
8066 in0 = d->op0;
8067 lane = GEN_INT (elt);
8068
8069 switch (vmode)
8070 {
8071 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8072 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8073 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8074 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8075 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8076 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8077 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8078 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8079 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8080 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8081 default:
8082 return false;
8083 }
8084
8085 emit_insn (gen (out, in0, lane));
8086 return true;
8087}
8088
88b08073
JG
8089static bool
8090aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8091{
8092 rtx rperm[MAX_VECT_LEN], sel;
8093 enum machine_mode vmode = d->vmode;
8094 unsigned int i, nelt = d->nelt;
8095
8096 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8097 numbering of elements for big-endian, we must reverse the order. */
8098 if (BYTES_BIG_ENDIAN)
8099 return false;
8100
8101 if (d->testing_p)
8102 return true;
8103
8104 /* Generic code will try constant permutation twice. Once with the
8105 original mode and again with the elements lowered to QImode.
8106 So wait and don't do the selector expansion ourselves. */
8107 if (vmode != V8QImode && vmode != V16QImode)
8108 return false;
8109
8110 for (i = 0; i < nelt; ++i)
8111 rperm[i] = GEN_INT (d->perm[i]);
8112 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8113 sel = force_reg (vmode, sel);
8114
8115 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8116 return true;
8117}
8118
8119static bool
8120aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8121{
8122 /* The pattern matching functions above are written to look for a small
8123 number to begin the sequence (0, 1, N/2). If we begin with an index
8124 from the second operand, we can swap the operands. */
8125 if (d->perm[0] >= d->nelt)
8126 {
8127 unsigned i, nelt = d->nelt;
8128 rtx x;
8129
8130 for (i = 0; i < nelt; ++i)
8131 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8132
8133 x = d->op0;
8134 d->op0 = d->op1;
8135 d->op1 = x;
8136 }
8137
8138 if (TARGET_SIMD)
cc4d934f
JG
8139 {
8140 if (aarch64_evpc_zip (d))
8141 return true;
8142 else if (aarch64_evpc_uzp (d))
8143 return true;
8144 else if (aarch64_evpc_trn (d))
8145 return true;
91bd4114
JG
8146 else if (aarch64_evpc_dup (d))
8147 return true;
cc4d934f
JG
8148 return aarch64_evpc_tbl (d);
8149 }
88b08073
JG
8150 return false;
8151}
8152
8153/* Expand a vec_perm_const pattern. */
8154
8155bool
8156aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8157{
8158 struct expand_vec_perm_d d;
8159 int i, nelt, which;
8160
8161 d.target = target;
8162 d.op0 = op0;
8163 d.op1 = op1;
8164
8165 d.vmode = GET_MODE (target);
8166 gcc_assert (VECTOR_MODE_P (d.vmode));
8167 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8168 d.testing_p = false;
8169
8170 for (i = which = 0; i < nelt; ++i)
8171 {
8172 rtx e = XVECEXP (sel, 0, i);
8173 int ei = INTVAL (e) & (2 * nelt - 1);
8174 which |= (ei < nelt ? 1 : 2);
8175 d.perm[i] = ei;
8176 }
8177
8178 switch (which)
8179 {
8180 default:
8181 gcc_unreachable ();
8182
8183 case 3:
8184 d.one_vector_p = false;
8185 if (!rtx_equal_p (op0, op1))
8186 break;
8187
8188 /* The elements of PERM do not suggest that only the first operand
8189 is used, but both operands are identical. Allow easier matching
8190 of the permutation by folding the permutation into the single
8191 input vector. */
8192 /* Fall Through. */
8193 case 2:
8194 for (i = 0; i < nelt; ++i)
8195 d.perm[i] &= nelt - 1;
8196 d.op0 = op1;
8197 d.one_vector_p = true;
8198 break;
8199
8200 case 1:
8201 d.op1 = op0;
8202 d.one_vector_p = true;
8203 break;
8204 }
8205
8206 return aarch64_expand_vec_perm_const_1 (&d);
8207}
8208
8209static bool
8210aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8211 const unsigned char *sel)
8212{
8213 struct expand_vec_perm_d d;
8214 unsigned int i, nelt, which;
8215 bool ret;
8216
8217 d.vmode = vmode;
8218 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8219 d.testing_p = true;
8220 memcpy (d.perm, sel, nelt);
8221
8222 /* Calculate whether all elements are in one vector. */
8223 for (i = which = 0; i < nelt; ++i)
8224 {
8225 unsigned char e = d.perm[i];
8226 gcc_assert (e < 2 * nelt);
8227 which |= (e < nelt ? 1 : 2);
8228 }
8229
8230 /* If all elements are from the second vector, reindex as if from the
8231 first vector. */
8232 if (which == 2)
8233 for (i = 0; i < nelt; ++i)
8234 d.perm[i] -= nelt;
8235
8236 /* Check whether the mask can be applied to a single vector. */
8237 d.one_vector_p = (which != 3);
8238
8239 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8240 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8241 if (!d.one_vector_p)
8242 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8243
8244 start_sequence ();
8245 ret = aarch64_expand_vec_perm_const_1 (&d);
8246 end_sequence ();
8247
8248 return ret;
8249}
8250
43e9d192
IB
8251#undef TARGET_ADDRESS_COST
8252#define TARGET_ADDRESS_COST aarch64_address_cost
8253
8254/* This hook will determines whether unnamed bitfields affect the alignment
8255 of the containing structure. The hook returns true if the structure
8256 should inherit the alignment requirements of an unnamed bitfield's
8257 type. */
8258#undef TARGET_ALIGN_ANON_BITFIELD
8259#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8260
8261#undef TARGET_ASM_ALIGNED_DI_OP
8262#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8263
8264#undef TARGET_ASM_ALIGNED_HI_OP
8265#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8266
8267#undef TARGET_ASM_ALIGNED_SI_OP
8268#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8269
8270#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8271#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8272 hook_bool_const_tree_hwi_hwi_const_tree_true
8273
8274#undef TARGET_ASM_FILE_START
8275#define TARGET_ASM_FILE_START aarch64_start_file
8276
8277#undef TARGET_ASM_OUTPUT_MI_THUNK
8278#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8279
8280#undef TARGET_ASM_SELECT_RTX_SECTION
8281#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8282
8283#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8284#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8285
8286#undef TARGET_BUILD_BUILTIN_VA_LIST
8287#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8288
8289#undef TARGET_CALLEE_COPIES
8290#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8291
8292#undef TARGET_CAN_ELIMINATE
8293#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8294
8295#undef TARGET_CANNOT_FORCE_CONST_MEM
8296#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8297
8298#undef TARGET_CONDITIONAL_REGISTER_USAGE
8299#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8300
8301/* Only the least significant bit is used for initialization guard
8302 variables. */
8303#undef TARGET_CXX_GUARD_MASK_BIT
8304#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8305
8306#undef TARGET_C_MODE_FOR_SUFFIX
8307#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8308
8309#ifdef TARGET_BIG_ENDIAN_DEFAULT
8310#undef TARGET_DEFAULT_TARGET_FLAGS
8311#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8312#endif
8313
8314#undef TARGET_CLASS_MAX_NREGS
8315#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8316
119103ca
JG
8317#undef TARGET_BUILTIN_DECL
8318#define TARGET_BUILTIN_DECL aarch64_builtin_decl
8319
43e9d192
IB
8320#undef TARGET_EXPAND_BUILTIN
8321#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8322
8323#undef TARGET_EXPAND_BUILTIN_VA_START
8324#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8325
9697e620
JG
8326#undef TARGET_FOLD_BUILTIN
8327#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8328
43e9d192
IB
8329#undef TARGET_FUNCTION_ARG
8330#define TARGET_FUNCTION_ARG aarch64_function_arg
8331
8332#undef TARGET_FUNCTION_ARG_ADVANCE
8333#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8334
8335#undef TARGET_FUNCTION_ARG_BOUNDARY
8336#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8337
8338#undef TARGET_FUNCTION_OK_FOR_SIBCALL
8339#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8340
8341#undef TARGET_FUNCTION_VALUE
8342#define TARGET_FUNCTION_VALUE aarch64_function_value
8343
8344#undef TARGET_FUNCTION_VALUE_REGNO_P
8345#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8346
8347#undef TARGET_FRAME_POINTER_REQUIRED
8348#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8349
0ac198d3
JG
8350#undef TARGET_GIMPLE_FOLD_BUILTIN
8351#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8352
43e9d192
IB
8353#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8354#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8355
8356#undef TARGET_INIT_BUILTINS
8357#define TARGET_INIT_BUILTINS aarch64_init_builtins
8358
8359#undef TARGET_LEGITIMATE_ADDRESS_P
8360#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8361
8362#undef TARGET_LEGITIMATE_CONSTANT_P
8363#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8364
8365#undef TARGET_LIBGCC_CMP_RETURN_MODE
8366#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8367
38e8f663
YR
8368#undef TARGET_LRA_P
8369#define TARGET_LRA_P aarch64_lra_p
8370
ac2b960f
YZ
8371#undef TARGET_MANGLE_TYPE
8372#define TARGET_MANGLE_TYPE aarch64_mangle_type
8373
43e9d192
IB
8374#undef TARGET_MEMORY_MOVE_COST
8375#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8376
8377#undef TARGET_MUST_PASS_IN_STACK
8378#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8379
8380/* This target hook should return true if accesses to volatile bitfields
8381 should use the narrowest mode possible. It should return false if these
8382 accesses should use the bitfield container type. */
8383#undef TARGET_NARROW_VOLATILE_BITFIELD
8384#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8385
8386#undef TARGET_OPTION_OVERRIDE
8387#define TARGET_OPTION_OVERRIDE aarch64_override_options
8388
8389#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8390#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8391 aarch64_override_options_after_change
8392
8393#undef TARGET_PASS_BY_REFERENCE
8394#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8395
8396#undef TARGET_PREFERRED_RELOAD_CLASS
8397#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8398
8399#undef TARGET_SECONDARY_RELOAD
8400#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8401
8402#undef TARGET_SHIFT_TRUNCATION_MASK
8403#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8404
8405#undef TARGET_SETUP_INCOMING_VARARGS
8406#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8407
8408#undef TARGET_STRUCT_VALUE_RTX
8409#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8410
8411#undef TARGET_REGISTER_MOVE_COST
8412#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8413
8414#undef TARGET_RETURN_IN_MEMORY
8415#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8416
8417#undef TARGET_RETURN_IN_MSB
8418#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8419
8420#undef TARGET_RTX_COSTS
8421#define TARGET_RTX_COSTS aarch64_rtx_costs
8422
d126a4ae
AP
8423#undef TARGET_SCHED_ISSUE_RATE
8424#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8425
43e9d192
IB
8426#undef TARGET_TRAMPOLINE_INIT
8427#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8428
8429#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8430#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8431
8432#undef TARGET_VECTOR_MODE_SUPPORTED_P
8433#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8434
8435#undef TARGET_ARRAY_MODE_SUPPORTED_P
8436#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8437
8990e73a
TB
8438#undef TARGET_VECTORIZE_ADD_STMT_COST
8439#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8440
8441#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8442#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8443 aarch64_builtin_vectorization_cost
8444
43e9d192
IB
8445#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8446#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8447
42fc9a7f
JG
8448#undef TARGET_VECTORIZE_BUILTINS
8449#define TARGET_VECTORIZE_BUILTINS
8450
8451#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8452#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8453 aarch64_builtin_vectorized_function
8454
3b357264
JG
8455#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8456#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8457 aarch64_autovectorize_vector_sizes
8458
43e9d192
IB
8459/* Section anchor support. */
8460
8461#undef TARGET_MIN_ANCHOR_OFFSET
8462#define TARGET_MIN_ANCHOR_OFFSET -256
8463
8464/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8465 byte offset; we can do much more for larger data types, but have no way
8466 to determine the size of the access. We assume accesses are aligned. */
8467#undef TARGET_MAX_ANCHOR_OFFSET
8468#define TARGET_MAX_ANCHOR_OFFSET 4095
8469
db0253a4
TB
8470#undef TARGET_VECTOR_ALIGNMENT
8471#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8472
8473#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8474#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8475 aarch64_simd_vector_alignment_reachable
8476
88b08073
JG
8477/* vec_perm support. */
8478
8479#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8480#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8481 aarch64_vectorize_vec_perm_const_ok
8482
70f09188 8483
706b2314 8484#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8485#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8486
43e9d192
IB
8487struct gcc_target targetm = TARGET_INITIALIZER;
8488
8489#include "gt-aarch64.h"