]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] 64-bit float vreinterpret implemention
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "pointer-set.h"
50#include "hash-table.h"
51#include "vec.h"
52#include "basic-block.h"
53#include "tree-ssa-alias.h"
54#include "internal-fn.h"
55#include "gimple-fold.h"
56#include "tree-eh.h"
57#include "gimple-expr.h"
58#include "is-a.h"
18f429e2 59#include "gimple.h"
45b0be94 60#include "gimplify.h"
43e9d192
IB
61#include "optabs.h"
62#include "dwarf2.h"
8990e73a
TB
63#include "cfgloop.h"
64#include "tree-vectorizer.h"
73250c4c 65#include "config/arm/aarch-cost-tables.h"
43e9d192 66
28514dda
YZ
67/* Defined for convenience. */
68#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
69
43e9d192
IB
70/* Classifies an address.
71
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
74
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
77
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
80
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
83
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
86
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
89
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
92
93enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
101};
102
103struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
109};
110
48063b9d
IB
111struct simd_immediate_info
112{
113 rtx value;
114 int shift;
115 int element_width;
48063b9d 116 bool mvn;
e4f0f84d 117 bool msl;
48063b9d
IB
118};
119
43e9d192
IB
120/* The current code model. */
121enum aarch64_code_model aarch64_cmodel;
122
123#ifdef HAVE_AS_TLS
124#undef TARGET_HAVE_TLS
125#define TARGET_HAVE_TLS 1
126#endif
127
38e8f663 128static bool aarch64_lra_p (void);
43e9d192
IB
129static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 136static void aarch64_override_options_after_change (void);
43e9d192
IB
137static bool aarch64_vector_mode_supported_p (enum machine_mode);
138static unsigned bit_count (unsigned HOST_WIDE_INT);
139static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
141
88b08073
JG
142static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
144
43e9d192 145/* The processor for which instructions should be scheduled. */
02fdbd5b 146enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
147
148/* The current tuning set. */
149const struct tune_params *aarch64_tune_params;
150
151/* Mask to specify which instructions we are allowed to generate. */
152unsigned long aarch64_isa_flags = 0;
153
154/* Mask to specify which instruction scheduling options should be used. */
155unsigned long aarch64_tune_flags = 0;
156
157/* Tuning parameters. */
158
159#if HAVE_DESIGNATED_INITIALIZERS
160#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161#else
162#define NAMED_PARAM(NAME, VAL) (VAL)
163#endif
164
165#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166__extension__
167#endif
43e9d192
IB
168
169#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170__extension__
171#endif
172static const struct cpu_addrcost_table generic_addrcost_table =
173{
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
179};
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct cpu_regmove_cost generic_regmove_cost =
185{
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
193};
194
8990e73a
TB
195/* Generic costs for vector insn classes. */
196#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197__extension__
198#endif
199static const struct cpu_vector_cost generic_vector_cost =
200{
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213};
214
43e9d192
IB
215#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216__extension__
217#endif
218static const struct tune_params generic_tunings =
219{
4e2cd668 220 &cortexa57_extra_costs,
43e9d192
IB
221 &generic_addrcost_table,
222 &generic_regmove_cost,
8990e73a 223 &generic_vector_cost,
d126a4ae
AP
224 NAMED_PARAM (memmov_cost, 4),
225 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
226};
227
984239ad
KT
228static const struct tune_params cortexa53_tunings =
229{
230 &cortexa53_extra_costs,
231 &generic_addrcost_table,
232 &generic_regmove_cost,
233 &generic_vector_cost,
d126a4ae
AP
234 NAMED_PARAM (memmov_cost, 4),
235 NAMED_PARAM (issue_rate, 2)
984239ad
KT
236};
237
4fd92af6
KT
238static const struct tune_params cortexa57_tunings =
239{
240 &cortexa57_extra_costs,
241 &generic_addrcost_table,
242 &generic_regmove_cost,
243 &generic_vector_cost,
244 NAMED_PARAM (memmov_cost, 4),
245 NAMED_PARAM (issue_rate, 3)
246};
247
43e9d192
IB
248/* A processor implementing AArch64. */
249struct processor
250{
251 const char *const name;
252 enum aarch64_processor core;
253 const char *arch;
254 const unsigned long flags;
255 const struct tune_params *const tune;
256};
257
258/* Processor cores implementing AArch64. */
259static const struct processor all_cores[] =
260{
192ed1dd 261#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
262 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
263#include "aarch64-cores.def"
264#undef AARCH64_CORE
02fdbd5b 265 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
266 {NULL, aarch64_none, NULL, 0, NULL}
267};
268
269/* Architectures implementing AArch64. */
270static const struct processor all_architectures[] =
271{
272#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
273 {NAME, CORE, #ARCH, FLAGS, NULL},
274#include "aarch64-arches.def"
275#undef AARCH64_ARCH
43e9d192
IB
276 {NULL, aarch64_none, NULL, 0, NULL}
277};
278
279/* Target specification. These are populated as commandline arguments
280 are processed, or NULL if not specified. */
281static const struct processor *selected_arch;
282static const struct processor *selected_cpu;
283static const struct processor *selected_tune;
284
285#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
286
287/* An ISA extension in the co-processor and main instruction set space. */
288struct aarch64_option_extension
289{
290 const char *const name;
291 const unsigned long flags_on;
292 const unsigned long flags_off;
293};
294
295/* ISA extensions in AArch64. */
296static const struct aarch64_option_extension all_extensions[] =
297{
298#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
299 {NAME, FLAGS_ON, FLAGS_OFF},
300#include "aarch64-option-extensions.def"
301#undef AARCH64_OPT_EXTENSION
302 {NULL, 0, 0}
303};
304
305/* Used to track the size of an address when generating a pre/post
306 increment address. */
307static enum machine_mode aarch64_memory_reference_mode;
308
309/* Used to force GTY into this file. */
310static GTY(()) int gty_dummy;
311
312/* A table of valid AArch64 "bitmask immediate" values for
313 logical instructions. */
314
315#define AARCH64_NUM_BITMASKS 5334
316static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
317
43e9d192
IB
318typedef enum aarch64_cond_code
319{
320 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
321 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
322 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
323}
324aarch64_cc;
325
326#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
327
328/* The condition codes of the processor, and the inverse function. */
329static const char * const aarch64_condition_codes[] =
330{
331 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
332 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
333};
334
335/* Provide a mapping from gcc register numbers to dwarf register numbers. */
336unsigned
337aarch64_dbx_register_number (unsigned regno)
338{
339 if (GP_REGNUM_P (regno))
340 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
341 else if (regno == SP_REGNUM)
342 return AARCH64_DWARF_SP;
343 else if (FP_REGNUM_P (regno))
344 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
345
346 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
347 equivalent DWARF register. */
348 return DWARF_FRAME_REGISTERS;
349}
350
351/* Return TRUE if MODE is any of the large INT modes. */
352static bool
353aarch64_vect_struct_mode_p (enum machine_mode mode)
354{
355 return mode == OImode || mode == CImode || mode == XImode;
356}
357
358/* Return TRUE if MODE is any of the vector modes. */
359static bool
360aarch64_vector_mode_p (enum machine_mode mode)
361{
362 return aarch64_vector_mode_supported_p (mode)
363 || aarch64_vect_struct_mode_p (mode);
364}
365
366/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
367static bool
368aarch64_array_mode_supported_p (enum machine_mode mode,
369 unsigned HOST_WIDE_INT nelems)
370{
371 if (TARGET_SIMD
372 && AARCH64_VALID_SIMD_QREG_MODE (mode)
373 && (nelems >= 2 && nelems <= 4))
374 return true;
375
376 return false;
377}
378
379/* Implement HARD_REGNO_NREGS. */
380
381int
382aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
383{
384 switch (aarch64_regno_regclass (regno))
385 {
386 case FP_REGS:
387 case FP_LO_REGS:
388 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
389 default:
390 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
391 }
392 gcc_unreachable ();
393}
394
395/* Implement HARD_REGNO_MODE_OK. */
396
397int
398aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
399{
400 if (GET_MODE_CLASS (mode) == MODE_CC)
401 return regno == CC_REGNUM;
402
9259db42
YZ
403 if (regno == SP_REGNUM)
404 /* The purpose of comparing with ptr_mode is to support the
405 global register variable associated with the stack pointer
406 register via the syntax of asm ("wsp") in ILP32. */
407 return mode == Pmode || mode == ptr_mode;
408
409 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
410 return mode == Pmode;
411
412 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
413 return 1;
414
415 if (FP_REGNUM_P (regno))
416 {
417 if (aarch64_vect_struct_mode_p (mode))
418 return
419 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
420 else
421 return 1;
422 }
423
424 return 0;
425}
426
427/* Return true if calls to DECL should be treated as
428 long-calls (ie called via a register). */
429static bool
430aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
431{
432 return false;
433}
434
435/* Return true if calls to symbol-ref SYM should be treated as
436 long-calls (ie called via a register). */
437bool
438aarch64_is_long_call_p (rtx sym)
439{
440 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
441}
442
443/* Return true if the offsets to a zero/sign-extract operation
444 represent an expression that matches an extend operation. The
445 operands represent the paramters from
446
447 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
448bool
449aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
450 rtx extract_imm)
451{
452 HOST_WIDE_INT mult_val, extract_val;
453
454 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
455 return false;
456
457 mult_val = INTVAL (mult_imm);
458 extract_val = INTVAL (extract_imm);
459
460 if (extract_val > 8
461 && extract_val < GET_MODE_BITSIZE (mode)
462 && exact_log2 (extract_val & ~7) > 0
463 && (extract_val & 7) <= 4
464 && mult_val == (1 << (extract_val & 7)))
465 return true;
466
467 return false;
468}
469
470/* Emit an insn that's a simple single-set. Both the operands must be
471 known to be valid. */
472inline static rtx
473emit_set_insn (rtx x, rtx y)
474{
475 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
476}
477
478/* X and Y are two things to compare using CODE. Emit the compare insn and
479 return the rtx for register 0 in the proper mode. */
480rtx
481aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
482{
483 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
484 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
485
486 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
487 return cc_reg;
488}
489
490/* Build the SYMBOL_REF for __tls_get_addr. */
491
492static GTY(()) rtx tls_get_addr_libfunc;
493
494rtx
495aarch64_tls_get_addr (void)
496{
497 if (!tls_get_addr_libfunc)
498 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
499 return tls_get_addr_libfunc;
500}
501
502/* Return the TLS model to use for ADDR. */
503
504static enum tls_model
505tls_symbolic_operand_type (rtx addr)
506{
507 enum tls_model tls_kind = TLS_MODEL_NONE;
508 rtx sym, addend;
509
510 if (GET_CODE (addr) == CONST)
511 {
512 split_const (addr, &sym, &addend);
513 if (GET_CODE (sym) == SYMBOL_REF)
514 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
515 }
516 else if (GET_CODE (addr) == SYMBOL_REF)
517 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
518
519 return tls_kind;
520}
521
522/* We'll allow lo_sum's in addresses in our legitimate addresses
523 so that combine would take care of combining addresses where
524 necessary, but for generation purposes, we'll generate the address
525 as :
526 RTL Absolute
527 tmp = hi (symbol_ref); adrp x1, foo
528 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
529 nop
530
531 PIC TLS
532 adrp x1, :got:foo adrp tmp, :tlsgd:foo
533 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
534 bl __tls_get_addr
535 nop
536
537 Load TLS symbol, depending on TLS mechanism and TLS access model.
538
539 Global Dynamic - Traditional TLS:
540 adrp tmp, :tlsgd:imm
541 add dest, tmp, #:tlsgd_lo12:imm
542 bl __tls_get_addr
543
544 Global Dynamic - TLS Descriptors:
545 adrp dest, :tlsdesc:imm
546 ldr tmp, [dest, #:tlsdesc_lo12:imm]
547 add dest, dest, #:tlsdesc_lo12:imm
548 blr tmp
549 mrs tp, tpidr_el0
550 add dest, dest, tp
551
552 Initial Exec:
553 mrs tp, tpidr_el0
554 adrp tmp, :gottprel:imm
555 ldr dest, [tmp, #:gottprel_lo12:imm]
556 add dest, dest, tp
557
558 Local Exec:
559 mrs tp, tpidr_el0
560 add t0, tp, #:tprel_hi12:imm
561 add t0, #:tprel_lo12_nc:imm
562*/
563
564static void
565aarch64_load_symref_appropriately (rtx dest, rtx imm,
566 enum aarch64_symbol_type type)
567{
568 switch (type)
569 {
570 case SYMBOL_SMALL_ABSOLUTE:
571 {
28514dda 572 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 573 rtx tmp_reg = dest;
28514dda
YZ
574 enum machine_mode mode = GET_MODE (dest);
575
576 gcc_assert (mode == Pmode || mode == ptr_mode);
577
43e9d192 578 if (can_create_pseudo_p ())
28514dda 579 tmp_reg = gen_reg_rtx (mode);
43e9d192 580
28514dda 581 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
582 emit_insn (gen_add_losym (dest, tmp_reg, imm));
583 return;
584 }
585
a5350ddc
CSS
586 case SYMBOL_TINY_ABSOLUTE:
587 emit_insn (gen_rtx_SET (Pmode, dest, imm));
588 return;
589
43e9d192
IB
590 case SYMBOL_SMALL_GOT:
591 {
28514dda
YZ
592 /* In ILP32, the mode of dest can be either SImode or DImode,
593 while the got entry is always of SImode size. The mode of
594 dest depends on how dest is used: if dest is assigned to a
595 pointer (e.g. in the memory), it has SImode; it may have
596 DImode if dest is dereferenced to access the memeory.
597 This is why we have to handle three different ldr_got_small
598 patterns here (two patterns for ILP32). */
43e9d192 599 rtx tmp_reg = dest;
28514dda
YZ
600 enum machine_mode mode = GET_MODE (dest);
601
43e9d192 602 if (can_create_pseudo_p ())
28514dda
YZ
603 tmp_reg = gen_reg_rtx (mode);
604
605 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
606 if (mode == ptr_mode)
607 {
608 if (mode == DImode)
609 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
610 else
611 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
612 }
613 else
614 {
615 gcc_assert (mode == Pmode);
616 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
617 }
618
43e9d192
IB
619 return;
620 }
621
622 case SYMBOL_SMALL_TLSGD:
623 {
624 rtx insns;
625 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
626
627 start_sequence ();
628 emit_call_insn (gen_tlsgd_small (result, imm));
629 insns = get_insns ();
630 end_sequence ();
631
632 RTL_CONST_CALL_P (insns) = 1;
633 emit_libcall_block (insns, dest, result, imm);
634 return;
635 }
636
637 case SYMBOL_SMALL_TLSDESC:
638 {
639 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
640 rtx tp;
641
642 emit_insn (gen_tlsdesc_small (imm));
643 tp = aarch64_load_tp (NULL);
644 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
645 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
646 return;
647 }
648
649 case SYMBOL_SMALL_GOTTPREL:
650 {
651 rtx tmp_reg = gen_reg_rtx (Pmode);
652 rtx tp = aarch64_load_tp (NULL);
653 emit_insn (gen_tlsie_small (tmp_reg, imm));
654 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
655 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
656 return;
657 }
658
659 case SYMBOL_SMALL_TPREL:
660 {
661 rtx tp = aarch64_load_tp (NULL);
662 emit_insn (gen_tlsle_small (dest, tp, imm));
663 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
664 return;
665 }
666
87dd8ab0
MS
667 case SYMBOL_TINY_GOT:
668 emit_insn (gen_ldr_got_tiny (dest, imm));
669 return;
670
43e9d192
IB
671 default:
672 gcc_unreachable ();
673 }
674}
675
676/* Emit a move from SRC to DEST. Assume that the move expanders can
677 handle all moves if !can_create_pseudo_p (). The distinction is
678 important because, unlike emit_move_insn, the move expanders know
679 how to force Pmode objects into the constant pool even when the
680 constant pool address is not itself legitimate. */
681static rtx
682aarch64_emit_move (rtx dest, rtx src)
683{
684 return (can_create_pseudo_p ()
685 ? emit_move_insn (dest, src)
686 : emit_move_insn_1 (dest, src));
687}
688
030d03b8
RE
689/* Split a 128-bit move operation into two 64-bit move operations,
690 taking care to handle partial overlap of register to register
691 copies. Special cases are needed when moving between GP regs and
692 FP regs. SRC can be a register, constant or memory; DST a register
693 or memory. If either operand is memory it must not have any side
694 effects. */
43e9d192
IB
695void
696aarch64_split_128bit_move (rtx dst, rtx src)
697{
030d03b8
RE
698 rtx dst_lo, dst_hi;
699 rtx src_lo, src_hi;
43e9d192 700
030d03b8 701 enum machine_mode mode = GET_MODE (dst);
12dc6974 702
030d03b8
RE
703 gcc_assert (mode == TImode || mode == TFmode);
704 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
705 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
706
707 if (REG_P (dst) && REG_P (src))
708 {
030d03b8
RE
709 int src_regno = REGNO (src);
710 int dst_regno = REGNO (dst);
43e9d192 711
030d03b8 712 /* Handle FP <-> GP regs. */
43e9d192
IB
713 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
714 {
030d03b8
RE
715 src_lo = gen_lowpart (word_mode, src);
716 src_hi = gen_highpart (word_mode, src);
717
718 if (mode == TImode)
719 {
720 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
721 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
722 }
723 else
724 {
725 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
726 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
727 }
728 return;
43e9d192
IB
729 }
730 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
731 {
030d03b8
RE
732 dst_lo = gen_lowpart (word_mode, dst);
733 dst_hi = gen_highpart (word_mode, dst);
734
735 if (mode == TImode)
736 {
737 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
738 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
739 }
740 else
741 {
742 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
743 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
744 }
745 return;
43e9d192 746 }
43e9d192
IB
747 }
748
030d03b8
RE
749 dst_lo = gen_lowpart (word_mode, dst);
750 dst_hi = gen_highpart (word_mode, dst);
751 src_lo = gen_lowpart (word_mode, src);
752 src_hi = gen_highpart_mode (word_mode, mode, src);
753
754 /* At most one pairing may overlap. */
755 if (reg_overlap_mentioned_p (dst_lo, src_hi))
756 {
757 aarch64_emit_move (dst_hi, src_hi);
758 aarch64_emit_move (dst_lo, src_lo);
759 }
760 else
761 {
762 aarch64_emit_move (dst_lo, src_lo);
763 aarch64_emit_move (dst_hi, src_hi);
764 }
43e9d192
IB
765}
766
767bool
768aarch64_split_128bit_move_p (rtx dst, rtx src)
769{
770 return (! REG_P (src)
771 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
772}
773
8b033a8a
SN
774/* Split a complex SIMD combine. */
775
776void
777aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
778{
779 enum machine_mode src_mode = GET_MODE (src1);
780 enum machine_mode dst_mode = GET_MODE (dst);
781
782 gcc_assert (VECTOR_MODE_P (dst_mode));
783
784 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
785 {
786 rtx (*gen) (rtx, rtx, rtx);
787
788 switch (src_mode)
789 {
790 case V8QImode:
791 gen = gen_aarch64_simd_combinev8qi;
792 break;
793 case V4HImode:
794 gen = gen_aarch64_simd_combinev4hi;
795 break;
796 case V2SImode:
797 gen = gen_aarch64_simd_combinev2si;
798 break;
799 case V2SFmode:
800 gen = gen_aarch64_simd_combinev2sf;
801 break;
802 case DImode:
803 gen = gen_aarch64_simd_combinedi;
804 break;
805 case DFmode:
806 gen = gen_aarch64_simd_combinedf;
807 break;
808 default:
809 gcc_unreachable ();
810 }
811
812 emit_insn (gen (dst, src1, src2));
813 return;
814 }
815}
816
fd4842cd
SN
817/* Split a complex SIMD move. */
818
819void
820aarch64_split_simd_move (rtx dst, rtx src)
821{
822 enum machine_mode src_mode = GET_MODE (src);
823 enum machine_mode dst_mode = GET_MODE (dst);
824
825 gcc_assert (VECTOR_MODE_P (dst_mode));
826
827 if (REG_P (dst) && REG_P (src))
828 {
c59b7e28
SN
829 rtx (*gen) (rtx, rtx);
830
fd4842cd
SN
831 gcc_assert (VECTOR_MODE_P (src_mode));
832
833 switch (src_mode)
834 {
835 case V16QImode:
c59b7e28 836 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
837 break;
838 case V8HImode:
c59b7e28 839 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
840 break;
841 case V4SImode:
c59b7e28 842 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
843 break;
844 case V2DImode:
c59b7e28 845 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
846 break;
847 case V4SFmode:
c59b7e28 848 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
849 break;
850 case V2DFmode:
c59b7e28 851 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
852 break;
853 default:
854 gcc_unreachable ();
855 }
c59b7e28
SN
856
857 emit_insn (gen (dst, src));
fd4842cd
SN
858 return;
859 }
860}
861
43e9d192 862static rtx
e18b4a81 863aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
864{
865 if (can_create_pseudo_p ())
e18b4a81 866 return force_reg (mode, value);
43e9d192
IB
867 else
868 {
869 x = aarch64_emit_move (x, value);
870 return x;
871 }
872}
873
874
875static rtx
876aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
877{
9c023bf0 878 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
879 {
880 rtx high;
881 /* Load the full offset into a register. This
882 might be improvable in the future. */
883 high = GEN_INT (offset);
884 offset = 0;
e18b4a81
YZ
885 high = aarch64_force_temporary (mode, temp, high);
886 reg = aarch64_force_temporary (mode, temp,
887 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
888 }
889 return plus_constant (mode, reg, offset);
890}
891
892void
893aarch64_expand_mov_immediate (rtx dest, rtx imm)
894{
895 enum machine_mode mode = GET_MODE (dest);
896 unsigned HOST_WIDE_INT mask;
897 int i;
898 bool first;
899 unsigned HOST_WIDE_INT val;
900 bool subtargets;
901 rtx subtarget;
902 int one_match, zero_match;
903
904 gcc_assert (mode == SImode || mode == DImode);
905
906 /* Check on what type of symbol it is. */
907 if (GET_CODE (imm) == SYMBOL_REF
908 || GET_CODE (imm) == LABEL_REF
909 || GET_CODE (imm) == CONST)
910 {
911 rtx mem, base, offset;
912 enum aarch64_symbol_type sty;
913
914 /* If we have (const (plus symbol offset)), separate out the offset
915 before we start classifying the symbol. */
916 split_const (imm, &base, &offset);
917
918 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
919 switch (sty)
920 {
921 case SYMBOL_FORCE_TO_MEM:
922 if (offset != const0_rtx
923 && targetm.cannot_force_const_mem (mode, imm))
924 {
aef66c94 925 gcc_assert (can_create_pseudo_p ());
e18b4a81 926 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
927 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
928 aarch64_emit_move (dest, base);
929 return;
930 }
28514dda 931 mem = force_const_mem (ptr_mode, imm);
43e9d192 932 gcc_assert (mem);
28514dda
YZ
933 if (mode != ptr_mode)
934 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
935 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
936 return;
937
938 case SYMBOL_SMALL_TLSGD:
939 case SYMBOL_SMALL_TLSDESC:
940 case SYMBOL_SMALL_GOTTPREL:
941 case SYMBOL_SMALL_GOT:
87dd8ab0 942 case SYMBOL_TINY_GOT:
43e9d192
IB
943 if (offset != const0_rtx)
944 {
945 gcc_assert(can_create_pseudo_p ());
e18b4a81 946 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
947 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
948 aarch64_emit_move (dest, base);
949 return;
950 }
951 /* FALLTHRU */
952
953 case SYMBOL_SMALL_TPREL:
954 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 955 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
956 aarch64_load_symref_appropriately (dest, imm, sty);
957 return;
958
959 default:
960 gcc_unreachable ();
961 }
962 }
963
964 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
965 {
966 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
967 return;
968 }
969
970 if (!CONST_INT_P (imm))
971 {
972 if (GET_CODE (imm) == HIGH)
973 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
974 else
975 {
976 rtx mem = force_const_mem (mode, imm);
977 gcc_assert (mem);
978 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
979 }
980
981 return;
982 }
983
984 if (mode == SImode)
985 {
986 /* We know we can't do this in 1 insn, and we must be able to do it
987 in two; so don't mess around looking for sequences that don't buy
988 us anything. */
989 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
990 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
991 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
992 return;
993 }
994
995 /* Remaining cases are all for DImode. */
996
997 val = INTVAL (imm);
998 subtargets = optimize && can_create_pseudo_p ();
999
1000 one_match = 0;
1001 zero_match = 0;
1002 mask = 0xffff;
1003
1004 for (i = 0; i < 64; i += 16, mask <<= 16)
1005 {
1006 if ((val & mask) == 0)
1007 zero_match++;
1008 else if ((val & mask) == mask)
1009 one_match++;
1010 }
1011
1012 if (one_match == 2)
1013 {
1014 mask = 0xffff;
1015 for (i = 0; i < 64; i += 16, mask <<= 16)
1016 {
1017 if ((val & mask) != mask)
1018 {
1019 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021 GEN_INT ((val >> i) & 0xffff)));
1022 return;
1023 }
1024 }
1025 gcc_unreachable ();
1026 }
1027
1028 if (zero_match == 2)
1029 goto simple_sequence;
1030
1031 mask = 0x0ffff0000UL;
1032 for (i = 16; i < 64; i += 16, mask <<= 16)
1033 {
1034 HOST_WIDE_INT comp = mask & ~(mask - 1);
1035
1036 if (aarch64_uimm12_shift (val - (val & mask)))
1037 {
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041 emit_insn (gen_adddi3 (dest, subtarget,
1042 GEN_INT (val - (val & mask))));
1043 return;
1044 }
1045 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1046 {
1047 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1048
1049 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050 GEN_INT ((val + comp) & mask)));
1051 emit_insn (gen_adddi3 (dest, subtarget,
1052 GEN_INT (val - ((val + comp) & mask))));
1053 return;
1054 }
1055 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1056 {
1057 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1058
1059 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060 GEN_INT ((val - comp) | ~mask)));
1061 emit_insn (gen_adddi3 (dest, subtarget,
1062 GEN_INT (val - ((val - comp) | ~mask))));
1063 return;
1064 }
1065 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1066 {
1067 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1068
1069 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070 GEN_INT (val | ~mask)));
1071 emit_insn (gen_adddi3 (dest, subtarget,
1072 GEN_INT (val - (val | ~mask))));
1073 return;
1074 }
1075 }
1076
1077 /* See if we can do it by arithmetically combining two
1078 immediates. */
1079 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1080 {
1081 int j;
1082 mask = 0xffff;
1083
1084 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1086 {
1087 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089 GEN_INT (aarch64_bitmasks[i])));
1090 emit_insn (gen_adddi3 (dest, subtarget,
1091 GEN_INT (val - aarch64_bitmasks[i])));
1092 return;
1093 }
1094
1095 for (j = 0; j < 64; j += 16, mask <<= 16)
1096 {
1097 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1098 {
1099 emit_insn (gen_rtx_SET (VOIDmode, dest,
1100 GEN_INT (aarch64_bitmasks[i])));
1101 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102 GEN_INT ((val >> j) & 0xffff)));
1103 return;
1104 }
1105 }
1106 }
1107
1108 /* See if we can do it by logically combining two immediates. */
1109 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1110 {
1111 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1112 {
1113 int j;
1114
1115 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1117 {
1118 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120 GEN_INT (aarch64_bitmasks[i])));
1121 emit_insn (gen_iordi3 (dest, subtarget,
1122 GEN_INT (aarch64_bitmasks[j])));
1123 return;
1124 }
1125 }
1126 else if ((val & aarch64_bitmasks[i]) == val)
1127 {
1128 int j;
1129
1130 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1132 {
1133
1134 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136 GEN_INT (aarch64_bitmasks[j])));
1137 emit_insn (gen_anddi3 (dest, subtarget,
1138 GEN_INT (aarch64_bitmasks[i])));
1139 return;
1140 }
1141 }
1142 }
1143
1144 simple_sequence:
1145 first = true;
1146 mask = 0xffff;
1147 for (i = 0; i < 64; i += 16, mask <<= 16)
1148 {
1149 if ((val & mask) != 0)
1150 {
1151 if (first)
1152 {
1153 emit_insn (gen_rtx_SET (VOIDmode, dest,
1154 GEN_INT (val & mask)));
1155 first = false;
1156 }
1157 else
1158 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159 GEN_INT ((val >> i) & 0xffff)));
1160 }
1161 }
1162}
1163
1164static bool
1165aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1166{
1167 /* Indirect calls are not currently supported. */
1168 if (decl == NULL)
1169 return false;
1170
1171 /* Cannot tail-call to long-calls, since these are outside of the
1172 range of a branch instruction (we could handle this if we added
1173 support for indirect tail-calls. */
1174 if (aarch64_decl_is_long_call_p (decl))
1175 return false;
1176
1177 return true;
1178}
1179
1180/* Implement TARGET_PASS_BY_REFERENCE. */
1181
1182static bool
1183aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184 enum machine_mode mode,
1185 const_tree type,
1186 bool named ATTRIBUTE_UNUSED)
1187{
1188 HOST_WIDE_INT size;
1189 enum machine_mode dummymode;
1190 int nregs;
1191
1192 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1193 size = (mode == BLKmode && type)
1194 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1195
aadc1c43
MHD
1196 /* Aggregates are passed by reference based on their size. */
1197 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1198 {
aadc1c43 1199 size = int_size_in_bytes (type);
43e9d192
IB
1200 }
1201
1202 /* Variable sized arguments are always returned by reference. */
1203 if (size < 0)
1204 return true;
1205
1206 /* Can this be a candidate to be passed in fp/simd register(s)? */
1207 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1208 &dummymode, &nregs,
1209 NULL))
1210 return false;
1211
1212 /* Arguments which are variable sized or larger than 2 registers are
1213 passed by reference unless they are a homogenous floating point
1214 aggregate. */
1215 return size > 2 * UNITS_PER_WORD;
1216}
1217
1218/* Return TRUE if VALTYPE is padded to its least significant bits. */
1219static bool
1220aarch64_return_in_msb (const_tree valtype)
1221{
1222 enum machine_mode dummy_mode;
1223 int dummy_int;
1224
1225 /* Never happens in little-endian mode. */
1226 if (!BYTES_BIG_ENDIAN)
1227 return false;
1228
1229 /* Only composite types smaller than or equal to 16 bytes can
1230 be potentially returned in registers. */
1231 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1232 || int_size_in_bytes (valtype) <= 0
1233 || int_size_in_bytes (valtype) > 16)
1234 return false;
1235
1236 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1237 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1238 is always passed/returned in the least significant bits of fp/simd
1239 register(s). */
1240 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1241 &dummy_mode, &dummy_int, NULL))
1242 return false;
1243
1244 return true;
1245}
1246
1247/* Implement TARGET_FUNCTION_VALUE.
1248 Define how to find the value returned by a function. */
1249
1250static rtx
1251aarch64_function_value (const_tree type, const_tree func,
1252 bool outgoing ATTRIBUTE_UNUSED)
1253{
1254 enum machine_mode mode;
1255 int unsignedp;
1256 int count;
1257 enum machine_mode ag_mode;
1258
1259 mode = TYPE_MODE (type);
1260 if (INTEGRAL_TYPE_P (type))
1261 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1262
1263 if (aarch64_return_in_msb (type))
1264 {
1265 HOST_WIDE_INT size = int_size_in_bytes (type);
1266
1267 if (size % UNITS_PER_WORD != 0)
1268 {
1269 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1270 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1271 }
1272 }
1273
1274 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1275 &ag_mode, &count, NULL))
1276 {
1277 if (!aarch64_composite_type_p (type, mode))
1278 {
1279 gcc_assert (count == 1 && mode == ag_mode);
1280 return gen_rtx_REG (mode, V0_REGNUM);
1281 }
1282 else
1283 {
1284 int i;
1285 rtx par;
1286
1287 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1288 for (i = 0; i < count; i++)
1289 {
1290 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1291 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1292 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1293 XVECEXP (par, 0, i) = tmp;
1294 }
1295 return par;
1296 }
1297 }
1298 else
1299 return gen_rtx_REG (mode, R0_REGNUM);
1300}
1301
1302/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1303 Return true if REGNO is the number of a hard register in which the values
1304 of called function may come back. */
1305
1306static bool
1307aarch64_function_value_regno_p (const unsigned int regno)
1308{
1309 /* Maximum of 16 bytes can be returned in the general registers. Examples
1310 of 16-byte return values are: 128-bit integers and 16-byte small
1311 structures (excluding homogeneous floating-point aggregates). */
1312 if (regno == R0_REGNUM || regno == R1_REGNUM)
1313 return true;
1314
1315 /* Up to four fp/simd registers can return a function value, e.g. a
1316 homogeneous floating-point aggregate having four members. */
1317 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1318 return !TARGET_GENERAL_REGS_ONLY;
1319
1320 return false;
1321}
1322
1323/* Implement TARGET_RETURN_IN_MEMORY.
1324
1325 If the type T of the result of a function is such that
1326 void func (T arg)
1327 would require that arg be passed as a value in a register (or set of
1328 registers) according to the parameter passing rules, then the result
1329 is returned in the same registers as would be used for such an
1330 argument. */
1331
1332static bool
1333aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1334{
1335 HOST_WIDE_INT size;
1336 enum machine_mode ag_mode;
1337 int count;
1338
1339 if (!AGGREGATE_TYPE_P (type)
1340 && TREE_CODE (type) != COMPLEX_TYPE
1341 && TREE_CODE (type) != VECTOR_TYPE)
1342 /* Simple scalar types always returned in registers. */
1343 return false;
1344
1345 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1346 type,
1347 &ag_mode,
1348 &count,
1349 NULL))
1350 return false;
1351
1352 /* Types larger than 2 registers returned in memory. */
1353 size = int_size_in_bytes (type);
1354 return (size < 0 || size > 2 * UNITS_PER_WORD);
1355}
1356
1357static bool
1358aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1359 const_tree type, int *nregs)
1360{
1361 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1362 return aarch64_vfp_is_call_or_return_candidate (mode,
1363 type,
1364 &pcum->aapcs_vfp_rmode,
1365 nregs,
1366 NULL);
1367}
1368
1369/* Given MODE and TYPE of a function argument, return the alignment in
1370 bits. The idea is to suppress any stronger alignment requested by
1371 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1372 This is a helper function for local use only. */
1373
1374static unsigned int
1375aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1376{
1377 unsigned int alignment;
1378
1379 if (type)
1380 {
1381 if (!integer_zerop (TYPE_SIZE (type)))
1382 {
1383 if (TYPE_MODE (type) == mode)
1384 alignment = TYPE_ALIGN (type);
1385 else
1386 alignment = GET_MODE_ALIGNMENT (mode);
1387 }
1388 else
1389 alignment = 0;
1390 }
1391 else
1392 alignment = GET_MODE_ALIGNMENT (mode);
1393
1394 return alignment;
1395}
1396
1397/* Layout a function argument according to the AAPCS64 rules. The rule
1398 numbers refer to the rule numbers in the AAPCS64. */
1399
1400static void
1401aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1402 const_tree type,
1403 bool named ATTRIBUTE_UNUSED)
1404{
1405 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1406 int ncrn, nvrn, nregs;
1407 bool allocate_ncrn, allocate_nvrn;
1408
1409 /* We need to do this once per argument. */
1410 if (pcum->aapcs_arg_processed)
1411 return;
1412
1413 pcum->aapcs_arg_processed = true;
1414
1415 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1416 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1417 mode,
1418 type,
1419 &nregs);
1420
1421 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1422 The following code thus handles passing by SIMD/FP registers first. */
1423
1424 nvrn = pcum->aapcs_nvrn;
1425
1426 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1427 and homogenous short-vector aggregates (HVA). */
1428 if (allocate_nvrn)
1429 {
1430 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1431 {
1432 pcum->aapcs_nextnvrn = nvrn + nregs;
1433 if (!aarch64_composite_type_p (type, mode))
1434 {
1435 gcc_assert (nregs == 1);
1436 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1437 }
1438 else
1439 {
1440 rtx par;
1441 int i;
1442 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1443 for (i = 0; i < nregs; i++)
1444 {
1445 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1446 V0_REGNUM + nvrn + i);
1447 tmp = gen_rtx_EXPR_LIST
1448 (VOIDmode, tmp,
1449 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1450 XVECEXP (par, 0, i) = tmp;
1451 }
1452 pcum->aapcs_reg = par;
1453 }
1454 return;
1455 }
1456 else
1457 {
1458 /* C.3 NSRN is set to 8. */
1459 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1460 goto on_stack;
1461 }
1462 }
1463
1464 ncrn = pcum->aapcs_ncrn;
1465 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1466 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1467
1468
1469 /* C6 - C9. though the sign and zero extension semantics are
1470 handled elsewhere. This is the case where the argument fits
1471 entirely general registers. */
1472 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1473 {
1474 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1475
1476 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1477
1478 /* C.8 if the argument has an alignment of 16 then the NGRN is
1479 rounded up to the next even number. */
1480 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1481 {
1482 ++ncrn;
1483 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1484 }
1485 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1486 A reg is still generated for it, but the caller should be smart
1487 enough not to use it. */
1488 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1489 {
1490 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1491 }
1492 else
1493 {
1494 rtx par;
1495 int i;
1496
1497 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1498 for (i = 0; i < nregs; i++)
1499 {
1500 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1501 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1502 GEN_INT (i * UNITS_PER_WORD));
1503 XVECEXP (par, 0, i) = tmp;
1504 }
1505 pcum->aapcs_reg = par;
1506 }
1507
1508 pcum->aapcs_nextncrn = ncrn + nregs;
1509 return;
1510 }
1511
1512 /* C.11 */
1513 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1514
1515 /* The argument is passed on stack; record the needed number of words for
1516 this argument (we can re-use NREGS) and align the total size if
1517 necessary. */
1518on_stack:
1519 pcum->aapcs_stack_words = nregs;
1520 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1521 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1522 16 / UNITS_PER_WORD) + 1;
1523 return;
1524}
1525
1526/* Implement TARGET_FUNCTION_ARG. */
1527
1528static rtx
1529aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1530 const_tree type, bool named)
1531{
1532 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1533 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1534
1535 if (mode == VOIDmode)
1536 return NULL_RTX;
1537
1538 aarch64_layout_arg (pcum_v, mode, type, named);
1539 return pcum->aapcs_reg;
1540}
1541
1542void
1543aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1544 const_tree fntype ATTRIBUTE_UNUSED,
1545 rtx libname ATTRIBUTE_UNUSED,
1546 const_tree fndecl ATTRIBUTE_UNUSED,
1547 unsigned n_named ATTRIBUTE_UNUSED)
1548{
1549 pcum->aapcs_ncrn = 0;
1550 pcum->aapcs_nvrn = 0;
1551 pcum->aapcs_nextncrn = 0;
1552 pcum->aapcs_nextnvrn = 0;
1553 pcum->pcs_variant = ARM_PCS_AAPCS64;
1554 pcum->aapcs_reg = NULL_RTX;
1555 pcum->aapcs_arg_processed = false;
1556 pcum->aapcs_stack_words = 0;
1557 pcum->aapcs_stack_size = 0;
1558
1559 return;
1560}
1561
1562static void
1563aarch64_function_arg_advance (cumulative_args_t pcum_v,
1564 enum machine_mode mode,
1565 const_tree type,
1566 bool named)
1567{
1568 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1569 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1570 {
1571 aarch64_layout_arg (pcum_v, mode, type, named);
1572 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1573 != (pcum->aapcs_stack_words != 0));
1574 pcum->aapcs_arg_processed = false;
1575 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1576 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1577 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1578 pcum->aapcs_stack_words = 0;
1579 pcum->aapcs_reg = NULL_RTX;
1580 }
1581}
1582
1583bool
1584aarch64_function_arg_regno_p (unsigned regno)
1585{
1586 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1587 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1588}
1589
1590/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1591 PARM_BOUNDARY bits of alignment, but will be given anything up
1592 to STACK_BOUNDARY bits if the type requires it. This makes sure
1593 that both before and after the layout of each argument, the Next
1594 Stacked Argument Address (NSAA) will have a minimum alignment of
1595 8 bytes. */
1596
1597static unsigned int
1598aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1599{
1600 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1601
1602 if (alignment < PARM_BOUNDARY)
1603 alignment = PARM_BOUNDARY;
1604 if (alignment > STACK_BOUNDARY)
1605 alignment = STACK_BOUNDARY;
1606 return alignment;
1607}
1608
1609/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1610
1611 Return true if an argument passed on the stack should be padded upwards,
1612 i.e. if the least-significant byte of the stack slot has useful data.
1613
1614 Small aggregate types are placed in the lowest memory address.
1615
1616 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1617
1618bool
1619aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1620{
1621 /* On little-endian targets, the least significant byte of every stack
1622 argument is passed at the lowest byte address of the stack slot. */
1623 if (!BYTES_BIG_ENDIAN)
1624 return true;
1625
00edcfbe 1626 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1627 the least significant byte of a stack argument is passed at the highest
1628 byte address of the stack slot. */
1629 if (type
00edcfbe
YZ
1630 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1631 || POINTER_TYPE_P (type))
43e9d192
IB
1632 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1633 return false;
1634
1635 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1636 return true;
1637}
1638
1639/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1640
1641 It specifies padding for the last (may also be the only)
1642 element of a block move between registers and memory. If
1643 assuming the block is in the memory, padding upward means that
1644 the last element is padded after its highest significant byte,
1645 while in downward padding, the last element is padded at the
1646 its least significant byte side.
1647
1648 Small aggregates and small complex types are always padded
1649 upwards.
1650
1651 We don't need to worry about homogeneous floating-point or
1652 short-vector aggregates; their move is not affected by the
1653 padding direction determined here. Regardless of endianness,
1654 each element of such an aggregate is put in the least
1655 significant bits of a fp/simd register.
1656
1657 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1658 register has useful data, and return the opposite if the most
1659 significant byte does. */
1660
1661bool
1662aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1663 bool first ATTRIBUTE_UNUSED)
1664{
1665
1666 /* Small composite types are always padded upward. */
1667 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1668 {
1669 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1670 : GET_MODE_SIZE (mode));
1671 if (size < 2 * UNITS_PER_WORD)
1672 return true;
1673 }
1674
1675 /* Otherwise, use the default padding. */
1676 return !BYTES_BIG_ENDIAN;
1677}
1678
1679static enum machine_mode
1680aarch64_libgcc_cmp_return_mode (void)
1681{
1682 return SImode;
1683}
1684
1685static bool
1686aarch64_frame_pointer_required (void)
1687{
1688 /* If the function contains dynamic stack allocations, we need to
1689 use the frame pointer to access the static parts of the frame. */
1690 if (cfun->calls_alloca)
1691 return true;
1692
0b7f8166
MS
1693 /* In aarch64_override_options_after_change
1694 flag_omit_leaf_frame_pointer turns off the frame pointer by
1695 default. Turn it back on now if we've not got a leaf
1696 function. */
1697 if (flag_omit_leaf_frame_pointer
1698 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1699 return true;
43e9d192 1700
0b7f8166 1701 return false;
43e9d192
IB
1702}
1703
1704/* Mark the registers that need to be saved by the callee and calculate
1705 the size of the callee-saved registers area and frame record (both FP
1706 and LR may be omitted). */
1707static void
1708aarch64_layout_frame (void)
1709{
1710 HOST_WIDE_INT offset = 0;
1711 int regno;
1712
1713 if (reload_completed && cfun->machine->frame.laid_out)
1714 return;
1715
43e9d192
IB
1716 /* First mark all the registers that really need to be saved... */
1717 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1718 cfun->machine->frame.reg_offset[regno] = -1;
1719
1720 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1721 cfun->machine->frame.reg_offset[regno] = -1;
1722
1723 /* ... that includes the eh data registers (if needed)... */
1724 if (crtl->calls_eh_return)
1725 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1726 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1727
1728 /* ... and any callee saved register that dataflow says is live. */
1729 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1730 if (df_regs_ever_live_p (regno)
1731 && !call_used_regs[regno])
1732 cfun->machine->frame.reg_offset[regno] = 0;
1733
1734 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1735 if (df_regs_ever_live_p (regno)
1736 && !call_used_regs[regno])
1737 cfun->machine->frame.reg_offset[regno] = 0;
1738
1739 if (frame_pointer_needed)
1740 {
1741 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1742 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1743 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1744 }
1745
1746 /* Now assign stack slots for them. */
1747 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1748 if (cfun->machine->frame.reg_offset[regno] != -1)
1749 {
1750 cfun->machine->frame.reg_offset[regno] = offset;
1751 offset += UNITS_PER_WORD;
1752 }
1753
1754 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1755 if (cfun->machine->frame.reg_offset[regno] != -1)
1756 {
1757 cfun->machine->frame.reg_offset[regno] = offset;
1758 offset += UNITS_PER_WORD;
1759 }
1760
1761 if (frame_pointer_needed)
1762 {
1763 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1764 offset += UNITS_PER_WORD;
43e9d192
IB
1765 }
1766
1767 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1768 {
1769 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1770 offset += UNITS_PER_WORD;
43e9d192
IB
1771 }
1772
1773 cfun->machine->frame.padding0 =
1774 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1775 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1776
1777 cfun->machine->frame.saved_regs_size = offset;
1778 cfun->machine->frame.laid_out = true;
1779}
1780
1781/* Make the last instruction frame-related and note that it performs
1782 the operation described by FRAME_PATTERN. */
1783
1784static void
1785aarch64_set_frame_expr (rtx frame_pattern)
1786{
1787 rtx insn;
1788
1789 insn = get_last_insn ();
1790 RTX_FRAME_RELATED_P (insn) = 1;
1791 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1792 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1793 frame_pattern,
1794 REG_NOTES (insn));
1795}
1796
1797static bool
1798aarch64_register_saved_on_entry (int regno)
1799{
1800 return cfun->machine->frame.reg_offset[regno] != -1;
1801}
1802
1803
1804static void
1805aarch64_save_or_restore_fprs (int start_offset, int increment,
1806 bool restore, rtx base_rtx)
1807
1808{
1809 unsigned regno;
1810 unsigned regno2;
1811 rtx insn;
e0f396bc
MS
1812 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1813 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1814
1815
1816 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1817 {
1818 if (aarch64_register_saved_on_entry (regno))
1819 {
1820 rtx mem;
1821 mem = gen_mem_ref (DFmode,
1822 plus_constant (Pmode,
1823 base_rtx,
1824 start_offset));
1825
1826 for (regno2 = regno + 1;
1827 regno2 <= V31_REGNUM
1828 && !aarch64_register_saved_on_entry (regno2);
1829 regno2++)
1830 {
1831 /* Empty loop. */
1832 }
1833 if (regno2 <= V31_REGNUM &&
1834 aarch64_register_saved_on_entry (regno2))
1835 {
1836 rtx mem2;
1837 /* Next highest register to be saved. */
1838 mem2 = gen_mem_ref (DFmode,
1839 plus_constant
1840 (Pmode,
1841 base_rtx,
1842 start_offset + increment));
1843 if (restore == false)
1844 {
1845 insn = emit_insn
1846 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1847 mem2, gen_rtx_REG (DFmode, regno2)));
1848
1849 }
1850 else
1851 {
1852 insn = emit_insn
1853 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1854 gen_rtx_REG (DFmode, regno2), mem2));
1855
e0f396bc
MS
1856 add_reg_note (insn, REG_CFA_RESTORE,
1857 gen_rtx_REG (DFmode, regno));
1858 add_reg_note (insn, REG_CFA_RESTORE,
1859 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1860 }
1861
1862 /* The first part of a frame-related parallel insn
1863 is always assumed to be relevant to the frame
1864 calculations; subsequent parts, are only
1865 frame-related if explicitly marked. */
e0f396bc 1866 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1867 regno = regno2;
1868 start_offset += increment * 2;
1869 }
1870 else
1871 {
1872 if (restore == false)
1873 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1874 else
1875 {
1876 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1877 add_reg_note (insn, REG_CFA_RESTORE,
1878 gen_rtx_REG (DImode, regno));
43e9d192
IB
1879 }
1880 start_offset += increment;
1881 }
1882 RTX_FRAME_RELATED_P (insn) = 1;
1883 }
1884 }
1885
1886}
1887
1888
1889/* offset from the stack pointer of where the saves and
1890 restore's have to happen. */
1891static void
1892aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1893 bool restore)
1894{
1895 rtx insn;
1896 rtx base_rtx = stack_pointer_rtx;
1897 HOST_WIDE_INT start_offset = offset;
1898 HOST_WIDE_INT increment = UNITS_PER_WORD;
1899 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1900 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1901 unsigned regno;
1902 unsigned regno2;
1903
1904 for (regno = R0_REGNUM; regno <= limit; regno++)
1905 {
1906 if (aarch64_register_saved_on_entry (regno))
1907 {
1908 rtx mem;
1909 mem = gen_mem_ref (Pmode,
1910 plus_constant (Pmode,
1911 base_rtx,
1912 start_offset));
1913
1914 for (regno2 = regno + 1;
1915 regno2 <= limit
1916 && !aarch64_register_saved_on_entry (regno2);
1917 regno2++)
1918 {
1919 /* Empty loop. */
1920 }
1921 if (regno2 <= limit &&
1922 aarch64_register_saved_on_entry (regno2))
1923 {
1924 rtx mem2;
1925 /* Next highest register to be saved. */
1926 mem2 = gen_mem_ref (Pmode,
1927 plus_constant
1928 (Pmode,
1929 base_rtx,
1930 start_offset + increment));
1931 if (restore == false)
1932 {
1933 insn = emit_insn
1934 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1935 mem2, gen_rtx_REG (DImode, regno2)));
1936
1937 }
1938 else
1939 {
1940 insn = emit_insn
1941 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1942 gen_rtx_REG (DImode, regno2), mem2));
1943
1944 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1945 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1946 }
1947
1948 /* The first part of a frame-related parallel insn
1949 is always assumed to be relevant to the frame
1950 calculations; subsequent parts, are only
1951 frame-related if explicitly marked. */
1952 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1953 1)) = 1;
1954 regno = regno2;
1955 start_offset += increment * 2;
1956 }
1957 else
1958 {
1959 if (restore == false)
1960 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1961 else
1962 {
1963 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1964 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1965 }
1966 start_offset += increment;
1967 }
1968 RTX_FRAME_RELATED_P (insn) = 1;
1969 }
1970 }
1971
1972 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1973
1974}
1975
1976/* AArch64 stack frames generated by this compiler look like:
1977
1978 +-------------------------------+
1979 | |
1980 | incoming stack arguments |
1981 | |
1982 +-------------------------------+ <-- arg_pointer_rtx
1983 | |
1984 | callee-allocated save area |
1985 | for register varargs |
1986 | |
43e9d192
IB
1987 +-------------------------------+ <-- frame_pointer_rtx
1988 | |
454fdba9 1989 | local variables |
43e9d192
IB
1990 | |
1991 +-------------------------------+
454fdba9
RL
1992 | padding0 | \
1993 +-------------------------------+ |
1994 | | |
1995 | | |
1996 | callee-saved registers | | frame.saved_regs_size
1997 | | |
1998 +-------------------------------+ |
1999 | LR' | |
2000 +-------------------------------+ |
2001 | FP' | /
43e9d192
IB
2002 P +-------------------------------+ <-- hard_frame_pointer_rtx
2003 | dynamic allocation |
2004 +-------------------------------+
2005 | |
2006 | outgoing stack arguments |
2007 | |
2008 +-------------------------------+ <-- stack_pointer_rtx
2009
2010 Dynamic stack allocations such as alloca insert data at point P.
2011 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2012 hard_frame_pointer_rtx unchanged. */
2013
2014/* Generate the prologue instructions for entry into a function.
2015 Establish the stack frame by decreasing the stack pointer with a
2016 properly calculated size and, if necessary, create a frame record
2017 filled with the values of LR and previous frame pointer. The
6991c977 2018 current FP is also set up if it is in use. */
43e9d192
IB
2019
2020void
2021aarch64_expand_prologue (void)
2022{
2023 /* sub sp, sp, #<frame_size>
2024 stp {fp, lr}, [sp, #<frame_size> - 16]
2025 add fp, sp, #<frame_size> - hardfp_offset
2026 stp {cs_reg}, [fp, #-16] etc.
2027
2028 sub sp, sp, <final_adjustment_if_any>
2029 */
2030 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2031 HOST_WIDE_INT frame_size, offset;
2032 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2033 rtx insn;
2034
2035 aarch64_layout_frame ();
2036 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2037 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2038 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2039 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2040 + crtl->outgoing_args_size);
2041 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2042 STACK_BOUNDARY / BITS_PER_UNIT);
2043
2044 if (flag_stack_usage_info)
2045 current_function_static_stack_size = frame_size;
2046
2047 fp_offset = (offset
2048 - original_frame_size
2049 - cfun->machine->frame.saved_regs_size);
2050
44c0e7b9 2051 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2052 if (offset >= 512)
2053 {
2054 /* When the frame has a large size, an initial decrease is done on
2055 the stack pointer to jump over the callee-allocated save area for
2056 register varargs, the local variable area and/or the callee-saved
2057 register area. This will allow the pre-index write-back
2058 store pair instructions to be used for setting up the stack frame
2059 efficiently. */
2060 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2061 if (offset >= 512)
2062 offset = cfun->machine->frame.saved_regs_size;
2063
2064 frame_size -= (offset + crtl->outgoing_args_size);
2065 fp_offset = 0;
2066
2067 if (frame_size >= 0x1000000)
2068 {
2069 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2070 emit_move_insn (op0, GEN_INT (-frame_size));
2071 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2072 aarch64_set_frame_expr (gen_rtx_SET
2073 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2074 plus_constant (Pmode,
2075 stack_pointer_rtx,
2076 -frame_size)));
43e9d192
IB
2077 }
2078 else if (frame_size > 0)
2079 {
2080 if ((frame_size & 0xfff) != frame_size)
2081 {
2082 insn = emit_insn (gen_add2_insn
2083 (stack_pointer_rtx,
2084 GEN_INT (-(frame_size
2085 & ~(HOST_WIDE_INT)0xfff))));
2086 RTX_FRAME_RELATED_P (insn) = 1;
2087 }
2088 if ((frame_size & 0xfff) != 0)
2089 {
2090 insn = emit_insn (gen_add2_insn
2091 (stack_pointer_rtx,
2092 GEN_INT (-(frame_size
2093 & (HOST_WIDE_INT)0xfff))));
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 }
2096 }
2097 }
2098 else
2099 frame_size = -1;
2100
2101 if (offset > 0)
2102 {
2103 /* Save the frame pointer and lr if the frame pointer is needed
2104 first. Make the frame pointer point to the location of the
2105 old frame pointer on the stack. */
2106 if (frame_pointer_needed)
2107 {
2108 rtx mem_fp, mem_lr;
2109
2110 if (fp_offset)
2111 {
2112 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2113 GEN_INT (-offset)));
2114 RTX_FRAME_RELATED_P (insn) = 1;
2115 aarch64_set_frame_expr (gen_rtx_SET
2116 (Pmode, stack_pointer_rtx,
2117 gen_rtx_MINUS (Pmode,
2118 stack_pointer_rtx,
2119 GEN_INT (offset))));
2120 mem_fp = gen_frame_mem (DImode,
2121 plus_constant (Pmode,
2122 stack_pointer_rtx,
2123 fp_offset));
2124 mem_lr = gen_frame_mem (DImode,
2125 plus_constant (Pmode,
2126 stack_pointer_rtx,
2127 fp_offset
2128 + UNITS_PER_WORD));
2129 insn = emit_insn (gen_store_pairdi (mem_fp,
2130 hard_frame_pointer_rtx,
2131 mem_lr,
2132 gen_rtx_REG (DImode,
2133 LR_REGNUM)));
2134 }
2135 else
2136 {
2137 insn = emit_insn (gen_storewb_pairdi_di
2138 (stack_pointer_rtx, stack_pointer_rtx,
2139 hard_frame_pointer_rtx,
2140 gen_rtx_REG (DImode, LR_REGNUM),
2141 GEN_INT (-offset),
2142 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2143 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2144 }
2145
2146 /* The first part of a frame-related parallel insn is always
2147 assumed to be relevant to the frame calculations;
2148 subsequent parts, are only frame-related if explicitly
2149 marked. */
2150 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2151 RTX_FRAME_RELATED_P (insn) = 1;
2152
2153 /* Set up frame pointer to point to the location of the
2154 previous frame pointer on the stack. */
2155 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2156 stack_pointer_rtx,
2157 GEN_INT (fp_offset)));
2158 aarch64_set_frame_expr (gen_rtx_SET
2159 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2160 plus_constant (Pmode,
2161 stack_pointer_rtx,
2162 fp_offset)));
43e9d192
IB
2163 RTX_FRAME_RELATED_P (insn) = 1;
2164 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2165 hard_frame_pointer_rtx));
2166 }
2167 else
2168 {
2169 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2170 GEN_INT (-offset)));
2171 RTX_FRAME_RELATED_P (insn) = 1;
2172 }
2173
2174 aarch64_save_or_restore_callee_save_registers
2175 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2176 }
2177
2178 /* when offset >= 512,
2179 sub sp, sp, #<outgoing_args_size> */
2180 if (frame_size > -1)
2181 {
2182 if (crtl->outgoing_args_size > 0)
2183 {
2184 insn = emit_insn (gen_add2_insn
2185 (stack_pointer_rtx,
2186 GEN_INT (- crtl->outgoing_args_size)));
2187 RTX_FRAME_RELATED_P (insn) = 1;
2188 }
2189 }
2190}
2191
2192/* Generate the epilogue instructions for returning from a function. */
2193void
2194aarch64_expand_epilogue (bool for_sibcall)
2195{
2196 HOST_WIDE_INT original_frame_size, frame_size, offset;
2197 HOST_WIDE_INT fp_offset;
2198 rtx insn;
44c0e7b9 2199 rtx cfa_reg;
43e9d192
IB
2200
2201 aarch64_layout_frame ();
2202 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2203 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2204 + crtl->outgoing_args_size);
2205 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2206 STACK_BOUNDARY / BITS_PER_UNIT);
2207
2208 fp_offset = (offset
2209 - original_frame_size
2210 - cfun->machine->frame.saved_regs_size);
2211
44c0e7b9
YZ
2212 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2213
2214 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2215 if (offset >= 512)
2216 {
2217 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2218 if (offset >= 512)
2219 offset = cfun->machine->frame.saved_regs_size;
2220
2221 frame_size -= (offset + crtl->outgoing_args_size);
2222 fp_offset = 0;
2223 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2224 {
2225 insn = emit_insn (gen_add2_insn
2226 (stack_pointer_rtx,
2227 GEN_INT (crtl->outgoing_args_size)));
2228 RTX_FRAME_RELATED_P (insn) = 1;
2229 }
2230 }
2231 else
2232 frame_size = -1;
2233
2234 /* If there were outgoing arguments or we've done dynamic stack
2235 allocation, then restore the stack pointer from the frame
2236 pointer. This is at most one insn and more efficient than using
2237 GCC's internal mechanism. */
2238 if (frame_pointer_needed
2239 && (crtl->outgoing_args_size || cfun->calls_alloca))
2240 {
2241 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2242 hard_frame_pointer_rtx,
2243 GEN_INT (- fp_offset)));
2244 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2245 /* As SP is set to (FP - fp_offset), according to the rules in
2246 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2247 from the value of SP from now on. */
2248 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2249 }
2250
2251 aarch64_save_or_restore_callee_save_registers
2252 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2253
2254 /* Restore the frame pointer and lr if the frame pointer is needed. */
2255 if (offset > 0)
2256 {
2257 if (frame_pointer_needed)
2258 {
2259 rtx mem_fp, mem_lr;
2260
2261 if (fp_offset)
2262 {
2263 mem_fp = gen_frame_mem (DImode,
2264 plus_constant (Pmode,
2265 stack_pointer_rtx,
2266 fp_offset));
2267 mem_lr = gen_frame_mem (DImode,
2268 plus_constant (Pmode,
2269 stack_pointer_rtx,
2270 fp_offset
2271 + UNITS_PER_WORD));
2272 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2273 mem_fp,
2274 gen_rtx_REG (DImode,
2275 LR_REGNUM),
2276 mem_lr));
2277 }
2278 else
2279 {
2280 insn = emit_insn (gen_loadwb_pairdi_di
2281 (stack_pointer_rtx,
2282 stack_pointer_rtx,
2283 hard_frame_pointer_rtx,
2284 gen_rtx_REG (DImode, LR_REGNUM),
2285 GEN_INT (offset),
2286 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2287 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2288 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2289 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2290 plus_constant (Pmode, cfa_reg,
2291 offset))));
43e9d192
IB
2292 }
2293
2294 /* The first part of a frame-related parallel insn
2295 is always assumed to be relevant to the frame
2296 calculations; subsequent parts, are only
2297 frame-related if explicitly marked. */
2298 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2299 RTX_FRAME_RELATED_P (insn) = 1;
2300 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2301 add_reg_note (insn, REG_CFA_RESTORE,
2302 gen_rtx_REG (DImode, LR_REGNUM));
2303
2304 if (fp_offset)
2305 {
2306 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2307 GEN_INT (offset)));
2308 RTX_FRAME_RELATED_P (insn) = 1;
2309 }
2310 }
43e9d192
IB
2311 else
2312 {
2313 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2314 GEN_INT (offset)));
2315 RTX_FRAME_RELATED_P (insn) = 1;
2316 }
2317 }
2318
2319 /* Stack adjustment for exception handler. */
2320 if (crtl->calls_eh_return)
2321 {
2322 /* We need to unwind the stack by the offset computed by
2323 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2324 based on SP. Ideally we would update the SP and define the
2325 CFA along the lines of:
2326
2327 SP = SP + EH_RETURN_STACKADJ_RTX
2328 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2329
2330 However the dwarf emitter only understands a constant
2331 register offset.
2332
631b20a7 2333 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2334 as a temporary register to hold the current SP value. The
2335 CFA is described using IP0 then SP is modified. */
2336
2337 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2338
2339 insn = emit_move_insn (ip0, stack_pointer_rtx);
2340 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2341 RTX_FRAME_RELATED_P (insn) = 1;
2342
2343 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2344
2345 /* Ensure the assignment to IP0 does not get optimized away. */
2346 emit_use (ip0);
2347 }
2348
2349 if (frame_size > -1)
2350 {
2351 if (frame_size >= 0x1000000)
2352 {
2353 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2354 emit_move_insn (op0, GEN_INT (frame_size));
2355 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2356 aarch64_set_frame_expr (gen_rtx_SET
2357 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2358 plus_constant (Pmode,
2359 stack_pointer_rtx,
2360 frame_size)));
43e9d192
IB
2361 }
2362 else if (frame_size > 0)
2363 {
2364 if ((frame_size & 0xfff) != 0)
2365 {
2366 insn = emit_insn (gen_add2_insn
2367 (stack_pointer_rtx,
2368 GEN_INT ((frame_size
2369 & (HOST_WIDE_INT) 0xfff))));
2370 RTX_FRAME_RELATED_P (insn) = 1;
2371 }
2372 if ((frame_size & 0xfff) != frame_size)
2373 {
2374 insn = emit_insn (gen_add2_insn
2375 (stack_pointer_rtx,
2376 GEN_INT ((frame_size
2377 & ~ (HOST_WIDE_INT) 0xfff))));
2378 RTX_FRAME_RELATED_P (insn) = 1;
2379 }
2380 }
2381
f6fe771a
RL
2382 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2383 plus_constant (Pmode,
2384 stack_pointer_rtx,
2385 offset)));
43e9d192
IB
2386 }
2387
2388 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2389 if (!for_sibcall)
2390 emit_jump_insn (ret_rtx);
2391}
2392
2393/* Return the place to copy the exception unwinding return address to.
2394 This will probably be a stack slot, but could (in theory be the
2395 return register). */
2396rtx
2397aarch64_final_eh_return_addr (void)
2398{
2399 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2400 aarch64_layout_frame ();
2401 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2402 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2403 + crtl->outgoing_args_size);
2404 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2405 STACK_BOUNDARY / BITS_PER_UNIT);
2406 fp_offset = offset
2407 - original_frame_size
2408 - cfun->machine->frame.saved_regs_size;
2409
2410 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2411 return gen_rtx_REG (DImode, LR_REGNUM);
2412
2413 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2414 result in a store to save LR introduced by builtin_eh_return () being
2415 incorrectly deleted because the alias is not detected.
2416 So in the calculation of the address to copy the exception unwinding
2417 return address to, we note 2 cases.
2418 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2419 we return a SP-relative location since all the addresses are SP-relative
2420 in this case. This prevents the store from being optimized away.
2421 If the fp_offset is not 0, then the addresses will be FP-relative and
2422 therefore we return a FP-relative location. */
2423
2424 if (frame_pointer_needed)
2425 {
2426 if (fp_offset)
2427 return gen_frame_mem (DImode,
2428 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2429 else
2430 return gen_frame_mem (DImode,
2431 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2432 }
2433
2434 /* If FP is not needed, we calculate the location of LR, which would be
2435 at the top of the saved registers block. */
2436
2437 return gen_frame_mem (DImode,
2438 plus_constant (Pmode,
2439 stack_pointer_rtx,
2440 fp_offset
2441 + cfun->machine->frame.saved_regs_size
2442 - 2 * UNITS_PER_WORD));
2443}
2444
2445/* Output code to build up a constant in a register. */
2446static void
d9600ae5 2447aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2448{
2449 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2450 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2451 else
2452 {
2453 int i;
2454 int ncount = 0;
2455 int zcount = 0;
2456 HOST_WIDE_INT valp = val >> 16;
2457 HOST_WIDE_INT valm;
2458 HOST_WIDE_INT tval;
2459
2460 for (i = 16; i < 64; i += 16)
2461 {
2462 valm = (valp & 0xffff);
2463
2464 if (valm != 0)
2465 ++ zcount;
2466
2467 if (valm != 0xffff)
2468 ++ ncount;
2469
2470 valp >>= 16;
2471 }
2472
2473 /* zcount contains the number of additional MOVK instructions
2474 required if the constant is built up with an initial MOVZ instruction,
2475 while ncount is the number of MOVK instructions required if starting
2476 with a MOVN instruction. Choose the sequence that yields the fewest
2477 number of instructions, preferring MOVZ instructions when they are both
2478 the same. */
2479 if (ncount < zcount)
2480 {
d9600ae5 2481 emit_move_insn (gen_rtx_REG (Pmode, regnum),
d103f29b 2482 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192
IB
2483 tval = 0xffff;
2484 }
2485 else
2486 {
d9600ae5
SN
2487 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2488 GEN_INT (val & 0xffff));
43e9d192
IB
2489 tval = 0;
2490 }
2491
2492 val >>= 16;
2493
2494 for (i = 16; i < 64; i += 16)
2495 {
2496 if ((val & 0xffff) != tval)
d9600ae5
SN
2497 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2498 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2499 val >>= 16;
2500 }
2501 }
2502}
2503
2504static void
d9600ae5 2505aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2506{
2507 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2508 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2509 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2510
2511 if (mdelta < 0)
2512 mdelta = -mdelta;
2513
2514 if (mdelta >= 4096 * 4096)
2515 {
d9600ae5
SN
2516 aarch64_build_constant (scratchreg, delta);
2517 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2518 }
2519 else if (mdelta > 0)
2520 {
43e9d192 2521 if (mdelta >= 4096)
d9600ae5
SN
2522 {
2523 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2524 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2525 if (delta < 0)
2526 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2527 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2528 else
2529 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2530 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2531 }
43e9d192 2532 if (mdelta % 4096 != 0)
d9600ae5
SN
2533 {
2534 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2535 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2536 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2537 }
43e9d192
IB
2538 }
2539}
2540
2541/* Output code to add DELTA to the first argument, and then jump
2542 to FUNCTION. Used for C++ multiple inheritance. */
2543static void
2544aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2545 HOST_WIDE_INT delta,
2546 HOST_WIDE_INT vcall_offset,
2547 tree function)
2548{
2549 /* The this pointer is always in x0. Note that this differs from
2550 Arm where the this pointer maybe bumped to r1 if r0 is required
2551 to return a pointer to an aggregate. On AArch64 a result value
2552 pointer will be in x8. */
2553 int this_regno = R0_REGNUM;
75f1d6fc 2554 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2555
75f1d6fc
SN
2556 reload_completed = 1;
2557 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2558
2559 if (vcall_offset == 0)
d9600ae5 2560 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2561 else
2562 {
28514dda 2563 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2564
75f1d6fc
SN
2565 this_rtx = gen_rtx_REG (Pmode, this_regno);
2566 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2567 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2568
75f1d6fc
SN
2569 addr = this_rtx;
2570 if (delta != 0)
2571 {
2572 if (delta >= -256 && delta < 256)
2573 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2574 plus_constant (Pmode, this_rtx, delta));
2575 else
d9600ae5 2576 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2577 }
2578
28514dda
YZ
2579 if (Pmode == ptr_mode)
2580 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2581 else
2582 aarch64_emit_move (temp0,
2583 gen_rtx_ZERO_EXTEND (Pmode,
2584 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2585
28514dda 2586 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2587 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2588 else
2589 {
d9600ae5 2590 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2591 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2592 }
2593
28514dda
YZ
2594 if (Pmode == ptr_mode)
2595 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2596 else
2597 aarch64_emit_move (temp1,
2598 gen_rtx_SIGN_EXTEND (Pmode,
2599 gen_rtx_MEM (ptr_mode, addr)));
2600
75f1d6fc 2601 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2602 }
2603
75f1d6fc
SN
2604 /* Generate a tail call to the target function. */
2605 if (!TREE_USED (function))
2606 {
2607 assemble_external (function);
2608 TREE_USED (function) = 1;
2609 }
2610 funexp = XEXP (DECL_RTL (function), 0);
2611 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2612 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2613 SIBLING_CALL_P (insn) = 1;
2614
2615 insn = get_insns ();
2616 shorten_branches (insn);
2617 final_start_function (insn, file, 1);
2618 final (insn, file, 1);
43e9d192 2619 final_end_function ();
75f1d6fc
SN
2620
2621 /* Stop pretending to be a post-reload pass. */
2622 reload_completed = 0;
43e9d192
IB
2623}
2624
43e9d192
IB
2625static int
2626aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2627{
2628 if (GET_CODE (*x) == SYMBOL_REF)
2629 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2630
2631 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2632 TLS offsets, not real symbol references. */
2633 if (GET_CODE (*x) == UNSPEC
2634 && XINT (*x, 1) == UNSPEC_TLS)
2635 return -1;
2636
2637 return 0;
2638}
2639
2640static bool
2641aarch64_tls_referenced_p (rtx x)
2642{
2643 if (!TARGET_HAVE_TLS)
2644 return false;
2645
2646 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2647}
2648
2649
2650static int
2651aarch64_bitmasks_cmp (const void *i1, const void *i2)
2652{
2653 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2654 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2655
2656 if (*imm1 < *imm2)
2657 return -1;
2658 if (*imm1 > *imm2)
2659 return +1;
2660 return 0;
2661}
2662
2663
2664static void
2665aarch64_build_bitmask_table (void)
2666{
2667 unsigned HOST_WIDE_INT mask, imm;
2668 unsigned int log_e, e, s, r;
2669 unsigned int nimms = 0;
2670
2671 for (log_e = 1; log_e <= 6; log_e++)
2672 {
2673 e = 1 << log_e;
2674 if (e == 64)
2675 mask = ~(HOST_WIDE_INT) 0;
2676 else
2677 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2678 for (s = 1; s < e; s++)
2679 {
2680 for (r = 0; r < e; r++)
2681 {
2682 /* set s consecutive bits to 1 (s < 64) */
2683 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2684 /* rotate right by r */
2685 if (r != 0)
2686 imm = ((imm >> r) | (imm << (e - r))) & mask;
2687 /* replicate the constant depending on SIMD size */
2688 switch (log_e) {
2689 case 1: imm |= (imm << 2);
2690 case 2: imm |= (imm << 4);
2691 case 3: imm |= (imm << 8);
2692 case 4: imm |= (imm << 16);
2693 case 5: imm |= (imm << 32);
2694 case 6:
2695 break;
2696 default:
2697 gcc_unreachable ();
2698 }
2699 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2700 aarch64_bitmasks[nimms++] = imm;
2701 }
2702 }
2703 }
2704
2705 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2706 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2707 aarch64_bitmasks_cmp);
2708}
2709
2710
2711/* Return true if val can be encoded as a 12-bit unsigned immediate with
2712 a left shift of 0 or 12 bits. */
2713bool
2714aarch64_uimm12_shift (HOST_WIDE_INT val)
2715{
2716 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2717 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2718 );
2719}
2720
2721
2722/* Return true if val is an immediate that can be loaded into a
2723 register by a MOVZ instruction. */
2724static bool
2725aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2726{
2727 if (GET_MODE_SIZE (mode) > 4)
2728 {
2729 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2730 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2731 return 1;
2732 }
2733 else
2734 {
2735 /* Ignore sign extension. */
2736 val &= (HOST_WIDE_INT) 0xffffffff;
2737 }
2738 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2739 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2740}
2741
2742
2743/* Return true if val is a valid bitmask immediate. */
2744bool
2745aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2746{
2747 if (GET_MODE_SIZE (mode) < 8)
2748 {
2749 /* Replicate bit pattern. */
2750 val &= (HOST_WIDE_INT) 0xffffffff;
2751 val |= val << 32;
2752 }
2753 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2754 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2755}
2756
2757
2758/* Return true if val is an immediate that can be loaded into a
2759 register in a single instruction. */
2760bool
2761aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2762{
2763 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2764 return 1;
2765 return aarch64_bitmask_imm (val, mode);
2766}
2767
2768static bool
2769aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2770{
2771 rtx base, offset;
7eda14e1 2772
43e9d192
IB
2773 if (GET_CODE (x) == HIGH)
2774 return true;
2775
2776 split_const (x, &base, &offset);
2777 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2778 {
2779 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2780 != SYMBOL_FORCE_TO_MEM)
2781 return true;
2782 else
2783 /* Avoid generating a 64-bit relocation in ILP32; leave
2784 to aarch64_expand_mov_immediate to handle it properly. */
2785 return mode != ptr_mode;
2786 }
43e9d192
IB
2787
2788 return aarch64_tls_referenced_p (x);
2789}
2790
2791/* Return true if register REGNO is a valid index register.
2792 STRICT_P is true if REG_OK_STRICT is in effect. */
2793
2794bool
2795aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2796{
2797 if (!HARD_REGISTER_NUM_P (regno))
2798 {
2799 if (!strict_p)
2800 return true;
2801
2802 if (!reg_renumber)
2803 return false;
2804
2805 regno = reg_renumber[regno];
2806 }
2807 return GP_REGNUM_P (regno);
2808}
2809
2810/* Return true if register REGNO is a valid base register for mode MODE.
2811 STRICT_P is true if REG_OK_STRICT is in effect. */
2812
2813bool
2814aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2815{
2816 if (!HARD_REGISTER_NUM_P (regno))
2817 {
2818 if (!strict_p)
2819 return true;
2820
2821 if (!reg_renumber)
2822 return false;
2823
2824 regno = reg_renumber[regno];
2825 }
2826
2827 /* The fake registers will be eliminated to either the stack or
2828 hard frame pointer, both of which are usually valid base registers.
2829 Reload deals with the cases where the eliminated form isn't valid. */
2830 return (GP_REGNUM_P (regno)
2831 || regno == SP_REGNUM
2832 || regno == FRAME_POINTER_REGNUM
2833 || regno == ARG_POINTER_REGNUM);
2834}
2835
2836/* Return true if X is a valid base register for mode MODE.
2837 STRICT_P is true if REG_OK_STRICT is in effect. */
2838
2839static bool
2840aarch64_base_register_rtx_p (rtx x, bool strict_p)
2841{
2842 if (!strict_p && GET_CODE (x) == SUBREG)
2843 x = SUBREG_REG (x);
2844
2845 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2846}
2847
2848/* Return true if address offset is a valid index. If it is, fill in INFO
2849 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2850
2851static bool
2852aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2853 enum machine_mode mode, bool strict_p)
2854{
2855 enum aarch64_address_type type;
2856 rtx index;
2857 int shift;
2858
2859 /* (reg:P) */
2860 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2861 && GET_MODE (x) == Pmode)
2862 {
2863 type = ADDRESS_REG_REG;
2864 index = x;
2865 shift = 0;
2866 }
2867 /* (sign_extend:DI (reg:SI)) */
2868 else if ((GET_CODE (x) == SIGN_EXTEND
2869 || GET_CODE (x) == ZERO_EXTEND)
2870 && GET_MODE (x) == DImode
2871 && GET_MODE (XEXP (x, 0)) == SImode)
2872 {
2873 type = (GET_CODE (x) == SIGN_EXTEND)
2874 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2875 index = XEXP (x, 0);
2876 shift = 0;
2877 }
2878 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2879 else if (GET_CODE (x) == MULT
2880 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2881 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2882 && GET_MODE (XEXP (x, 0)) == DImode
2883 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2884 && CONST_INT_P (XEXP (x, 1)))
2885 {
2886 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2887 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2888 index = XEXP (XEXP (x, 0), 0);
2889 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2890 }
2891 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2892 else if (GET_CODE (x) == ASHIFT
2893 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2894 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2895 && GET_MODE (XEXP (x, 0)) == DImode
2896 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2897 && CONST_INT_P (XEXP (x, 1)))
2898 {
2899 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2900 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2901 index = XEXP (XEXP (x, 0), 0);
2902 shift = INTVAL (XEXP (x, 1));
2903 }
2904 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2905 else if ((GET_CODE (x) == SIGN_EXTRACT
2906 || GET_CODE (x) == ZERO_EXTRACT)
2907 && GET_MODE (x) == DImode
2908 && GET_CODE (XEXP (x, 0)) == MULT
2909 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2910 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2911 {
2912 type = (GET_CODE (x) == SIGN_EXTRACT)
2913 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2914 index = XEXP (XEXP (x, 0), 0);
2915 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2916 if (INTVAL (XEXP (x, 1)) != 32 + shift
2917 || INTVAL (XEXP (x, 2)) != 0)
2918 shift = -1;
2919 }
2920 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2921 (const_int 0xffffffff<<shift)) */
2922 else if (GET_CODE (x) == AND
2923 && GET_MODE (x) == DImode
2924 && GET_CODE (XEXP (x, 0)) == MULT
2925 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2926 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2927 && CONST_INT_P (XEXP (x, 1)))
2928 {
2929 type = ADDRESS_REG_UXTW;
2930 index = XEXP (XEXP (x, 0), 0);
2931 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2932 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2933 shift = -1;
2934 }
2935 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2936 else if ((GET_CODE (x) == SIGN_EXTRACT
2937 || GET_CODE (x) == ZERO_EXTRACT)
2938 && GET_MODE (x) == DImode
2939 && GET_CODE (XEXP (x, 0)) == ASHIFT
2940 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2941 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2942 {
2943 type = (GET_CODE (x) == SIGN_EXTRACT)
2944 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2945 index = XEXP (XEXP (x, 0), 0);
2946 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2947 if (INTVAL (XEXP (x, 1)) != 32 + shift
2948 || INTVAL (XEXP (x, 2)) != 0)
2949 shift = -1;
2950 }
2951 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2952 (const_int 0xffffffff<<shift)) */
2953 else if (GET_CODE (x) == AND
2954 && GET_MODE (x) == DImode
2955 && GET_CODE (XEXP (x, 0)) == ASHIFT
2956 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2957 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2958 && CONST_INT_P (XEXP (x, 1)))
2959 {
2960 type = ADDRESS_REG_UXTW;
2961 index = XEXP (XEXP (x, 0), 0);
2962 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2963 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2964 shift = -1;
2965 }
2966 /* (mult:P (reg:P) (const_int scale)) */
2967 else if (GET_CODE (x) == MULT
2968 && GET_MODE (x) == Pmode
2969 && GET_MODE (XEXP (x, 0)) == Pmode
2970 && CONST_INT_P (XEXP (x, 1)))
2971 {
2972 type = ADDRESS_REG_REG;
2973 index = XEXP (x, 0);
2974 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2975 }
2976 /* (ashift:P (reg:P) (const_int shift)) */
2977 else if (GET_CODE (x) == ASHIFT
2978 && GET_MODE (x) == Pmode
2979 && GET_MODE (XEXP (x, 0)) == Pmode
2980 && CONST_INT_P (XEXP (x, 1)))
2981 {
2982 type = ADDRESS_REG_REG;
2983 index = XEXP (x, 0);
2984 shift = INTVAL (XEXP (x, 1));
2985 }
2986 else
2987 return false;
2988
2989 if (GET_CODE (index) == SUBREG)
2990 index = SUBREG_REG (index);
2991
2992 if ((shift == 0 ||
2993 (shift > 0 && shift <= 3
2994 && (1 << shift) == GET_MODE_SIZE (mode)))
2995 && REG_P (index)
2996 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2997 {
2998 info->type = type;
2999 info->offset = index;
3000 info->shift = shift;
3001 return true;
3002 }
3003
3004 return false;
3005}
3006
3007static inline bool
3008offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3009{
3010 return (offset >= -64 * GET_MODE_SIZE (mode)
3011 && offset < 64 * GET_MODE_SIZE (mode)
3012 && offset % GET_MODE_SIZE (mode) == 0);
3013}
3014
3015static inline bool
3016offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3017 HOST_WIDE_INT offset)
3018{
3019 return offset >= -256 && offset < 256;
3020}
3021
3022static inline bool
3023offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3024{
3025 return (offset >= 0
3026 && offset < 4096 * GET_MODE_SIZE (mode)
3027 && offset % GET_MODE_SIZE (mode) == 0);
3028}
3029
3030/* Return true if X is a valid address for machine mode MODE. If it is,
3031 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3032 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3033
3034static bool
3035aarch64_classify_address (struct aarch64_address_info *info,
3036 rtx x, enum machine_mode mode,
3037 RTX_CODE outer_code, bool strict_p)
3038{
3039 enum rtx_code code = GET_CODE (x);
3040 rtx op0, op1;
3041 bool allow_reg_index_p =
3042 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3043
3044 /* Don't support anything other than POST_INC or REG addressing for
3045 AdvSIMD. */
3046 if (aarch64_vector_mode_p (mode)
3047 && (code != POST_INC && code != REG))
3048 return false;
3049
3050 switch (code)
3051 {
3052 case REG:
3053 case SUBREG:
3054 info->type = ADDRESS_REG_IMM;
3055 info->base = x;
3056 info->offset = const0_rtx;
3057 return aarch64_base_register_rtx_p (x, strict_p);
3058
3059 case PLUS:
3060 op0 = XEXP (x, 0);
3061 op1 = XEXP (x, 1);
3062 if (GET_MODE_SIZE (mode) != 0
3063 && CONST_INT_P (op1)
3064 && aarch64_base_register_rtx_p (op0, strict_p))
3065 {
3066 HOST_WIDE_INT offset = INTVAL (op1);
3067
3068 info->type = ADDRESS_REG_IMM;
3069 info->base = op0;
3070 info->offset = op1;
3071
3072 /* TImode and TFmode values are allowed in both pairs of X
3073 registers and individual Q registers. The available
3074 address modes are:
3075 X,X: 7-bit signed scaled offset
3076 Q: 9-bit signed offset
3077 We conservatively require an offset representable in either mode.
3078 */
3079 if (mode == TImode || mode == TFmode)
3080 return (offset_7bit_signed_scaled_p (mode, offset)
3081 && offset_9bit_signed_unscaled_p (mode, offset));
3082
3083 if (outer_code == PARALLEL)
3084 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3085 && offset_7bit_signed_scaled_p (mode, offset));
3086 else
3087 return (offset_9bit_signed_unscaled_p (mode, offset)
3088 || offset_12bit_unsigned_scaled_p (mode, offset));
3089 }
3090
3091 if (allow_reg_index_p)
3092 {
3093 /* Look for base + (scaled/extended) index register. */
3094 if (aarch64_base_register_rtx_p (op0, strict_p)
3095 && aarch64_classify_index (info, op1, mode, strict_p))
3096 {
3097 info->base = op0;
3098 return true;
3099 }
3100 if (aarch64_base_register_rtx_p (op1, strict_p)
3101 && aarch64_classify_index (info, op0, mode, strict_p))
3102 {
3103 info->base = op1;
3104 return true;
3105 }
3106 }
3107
3108 return false;
3109
3110 case POST_INC:
3111 case POST_DEC:
3112 case PRE_INC:
3113 case PRE_DEC:
3114 info->type = ADDRESS_REG_WB;
3115 info->base = XEXP (x, 0);
3116 info->offset = NULL_RTX;
3117 return aarch64_base_register_rtx_p (info->base, strict_p);
3118
3119 case POST_MODIFY:
3120 case PRE_MODIFY:
3121 info->type = ADDRESS_REG_WB;
3122 info->base = XEXP (x, 0);
3123 if (GET_CODE (XEXP (x, 1)) == PLUS
3124 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3125 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3126 && aarch64_base_register_rtx_p (info->base, strict_p))
3127 {
3128 HOST_WIDE_INT offset;
3129 info->offset = XEXP (XEXP (x, 1), 1);
3130 offset = INTVAL (info->offset);
3131
3132 /* TImode and TFmode values are allowed in both pairs of X
3133 registers and individual Q registers. The available
3134 address modes are:
3135 X,X: 7-bit signed scaled offset
3136 Q: 9-bit signed offset
3137 We conservatively require an offset representable in either mode.
3138 */
3139 if (mode == TImode || mode == TFmode)
3140 return (offset_7bit_signed_scaled_p (mode, offset)
3141 && offset_9bit_signed_unscaled_p (mode, offset));
3142
3143 if (outer_code == PARALLEL)
3144 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3145 && offset_7bit_signed_scaled_p (mode, offset));
3146 else
3147 return offset_9bit_signed_unscaled_p (mode, offset);
3148 }
3149 return false;
3150
3151 case CONST:
3152 case SYMBOL_REF:
3153 case LABEL_REF:
79517551
SN
3154 /* load literal: pc-relative constant pool entry. Only supported
3155 for SI mode or larger. */
43e9d192 3156 info->type = ADDRESS_SYMBOLIC;
79517551 3157 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3158 {
3159 rtx sym, addend;
3160
3161 split_const (x, &sym, &addend);
3162 return (GET_CODE (sym) == LABEL_REF
3163 || (GET_CODE (sym) == SYMBOL_REF
3164 && CONSTANT_POOL_ADDRESS_P (sym)));
3165 }
3166 return false;
3167
3168 case LO_SUM:
3169 info->type = ADDRESS_LO_SUM;
3170 info->base = XEXP (x, 0);
3171 info->offset = XEXP (x, 1);
3172 if (allow_reg_index_p
3173 && aarch64_base_register_rtx_p (info->base, strict_p))
3174 {
3175 rtx sym, offs;
3176 split_const (info->offset, &sym, &offs);
3177 if (GET_CODE (sym) == SYMBOL_REF
3178 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3179 == SYMBOL_SMALL_ABSOLUTE))
3180 {
3181 /* The symbol and offset must be aligned to the access size. */
3182 unsigned int align;
3183 unsigned int ref_size;
3184
3185 if (CONSTANT_POOL_ADDRESS_P (sym))
3186 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3187 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3188 {
3189 tree exp = SYMBOL_REF_DECL (sym);
3190 align = TYPE_ALIGN (TREE_TYPE (exp));
3191 align = CONSTANT_ALIGNMENT (exp, align);
3192 }
3193 else if (SYMBOL_REF_DECL (sym))
3194 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3195 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3196 && SYMBOL_REF_BLOCK (sym) != NULL)
3197 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3198 else
3199 align = BITS_PER_UNIT;
3200
3201 ref_size = GET_MODE_SIZE (mode);
3202 if (ref_size == 0)
3203 ref_size = GET_MODE_SIZE (DImode);
3204
3205 return ((INTVAL (offs) & (ref_size - 1)) == 0
3206 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3207 }
3208 }
3209 return false;
3210
3211 default:
3212 return false;
3213 }
3214}
3215
3216bool
3217aarch64_symbolic_address_p (rtx x)
3218{
3219 rtx offset;
3220
3221 split_const (x, &x, &offset);
3222 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3223}
3224
3225/* Classify the base of symbolic expression X, given that X appears in
3226 context CONTEXT. */
da4f13a4
MS
3227
3228enum aarch64_symbol_type
3229aarch64_classify_symbolic_expression (rtx x,
3230 enum aarch64_symbol_context context)
43e9d192
IB
3231{
3232 rtx offset;
da4f13a4 3233
43e9d192
IB
3234 split_const (x, &x, &offset);
3235 return aarch64_classify_symbol (x, context);
3236}
3237
3238
3239/* Return TRUE if X is a legitimate address for accessing memory in
3240 mode MODE. */
3241static bool
3242aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3243{
3244 struct aarch64_address_info addr;
3245
3246 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3247}
3248
3249/* Return TRUE if X is a legitimate address for accessing memory in
3250 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3251 pair operation. */
3252bool
3253aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3254 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3255{
3256 struct aarch64_address_info addr;
3257
3258 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3259}
3260
3261/* Return TRUE if rtx X is immediate constant 0.0 */
3262bool
3520f7cc 3263aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3264{
3265 REAL_VALUE_TYPE r;
3266
3267 if (GET_MODE (x) == VOIDmode)
3268 return false;
3269
3270 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3271 if (REAL_VALUE_MINUS_ZERO (r))
3272 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3273 return REAL_VALUES_EQUAL (r, dconst0);
3274}
3275
70f09188
AP
3276/* Return the fixed registers used for condition codes. */
3277
3278static bool
3279aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3280{
3281 *p1 = CC_REGNUM;
3282 *p2 = INVALID_REGNUM;
3283 return true;
3284}
3285
43e9d192
IB
3286enum machine_mode
3287aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3288{
3289 /* All floating point compares return CCFP if it is an equality
3290 comparison, and CCFPE otherwise. */
3291 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3292 {
3293 switch (code)
3294 {
3295 case EQ:
3296 case NE:
3297 case UNORDERED:
3298 case ORDERED:
3299 case UNLT:
3300 case UNLE:
3301 case UNGT:
3302 case UNGE:
3303 case UNEQ:
3304 case LTGT:
3305 return CCFPmode;
3306
3307 case LT:
3308 case LE:
3309 case GT:
3310 case GE:
3311 return CCFPEmode;
3312
3313 default:
3314 gcc_unreachable ();
3315 }
3316 }
3317
3318 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3319 && y == const0_rtx
3320 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3321 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3322 || GET_CODE (x) == NEG))
43e9d192
IB
3323 return CC_NZmode;
3324
1c992d1e 3325 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3326 the comparison will have to be swapped when we emit the assembly
3327 code. */
3328 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3329 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3330 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3331 || GET_CODE (x) == LSHIFTRT
1c992d1e 3332 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3333 return CC_SWPmode;
3334
1c992d1e
RE
3335 /* Similarly for a negated operand, but we can only do this for
3336 equalities. */
3337 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3338 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3339 && (code == EQ || code == NE)
3340 && GET_CODE (x) == NEG)
3341 return CC_Zmode;
3342
43e9d192
IB
3343 /* A compare of a mode narrower than SI mode against zero can be done
3344 by extending the value in the comparison. */
3345 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3346 && y == const0_rtx)
3347 /* Only use sign-extension if we really need it. */
3348 return ((code == GT || code == GE || code == LE || code == LT)
3349 ? CC_SESWPmode : CC_ZESWPmode);
3350
3351 /* For everything else, return CCmode. */
3352 return CCmode;
3353}
3354
3355static unsigned
3356aarch64_get_condition_code (rtx x)
3357{
3358 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3359 enum rtx_code comp_code = GET_CODE (x);
3360
3361 if (GET_MODE_CLASS (mode) != MODE_CC)
3362 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3363
3364 switch (mode)
3365 {
3366 case CCFPmode:
3367 case CCFPEmode:
3368 switch (comp_code)
3369 {
3370 case GE: return AARCH64_GE;
3371 case GT: return AARCH64_GT;
3372 case LE: return AARCH64_LS;
3373 case LT: return AARCH64_MI;
3374 case NE: return AARCH64_NE;
3375 case EQ: return AARCH64_EQ;
3376 case ORDERED: return AARCH64_VC;
3377 case UNORDERED: return AARCH64_VS;
3378 case UNLT: return AARCH64_LT;
3379 case UNLE: return AARCH64_LE;
3380 case UNGT: return AARCH64_HI;
3381 case UNGE: return AARCH64_PL;
3382 default: gcc_unreachable ();
3383 }
3384 break;
3385
3386 case CCmode:
3387 switch (comp_code)
3388 {
3389 case NE: return AARCH64_NE;
3390 case EQ: return AARCH64_EQ;
3391 case GE: return AARCH64_GE;
3392 case GT: return AARCH64_GT;
3393 case LE: return AARCH64_LE;
3394 case LT: return AARCH64_LT;
3395 case GEU: return AARCH64_CS;
3396 case GTU: return AARCH64_HI;
3397 case LEU: return AARCH64_LS;
3398 case LTU: return AARCH64_CC;
3399 default: gcc_unreachable ();
3400 }
3401 break;
3402
3403 case CC_SWPmode:
3404 case CC_ZESWPmode:
3405 case CC_SESWPmode:
3406 switch (comp_code)
3407 {
3408 case NE: return AARCH64_NE;
3409 case EQ: return AARCH64_EQ;
3410 case GE: return AARCH64_LE;
3411 case GT: return AARCH64_LT;
3412 case LE: return AARCH64_GE;
3413 case LT: return AARCH64_GT;
3414 case GEU: return AARCH64_LS;
3415 case GTU: return AARCH64_CC;
3416 case LEU: return AARCH64_CS;
3417 case LTU: return AARCH64_HI;
3418 default: gcc_unreachable ();
3419 }
3420 break;
3421
3422 case CC_NZmode:
3423 switch (comp_code)
3424 {
3425 case NE: return AARCH64_NE;
3426 case EQ: return AARCH64_EQ;
3427 case GE: return AARCH64_PL;
3428 case LT: return AARCH64_MI;
3429 default: gcc_unreachable ();
3430 }
3431 break;
3432
1c992d1e
RE
3433 case CC_Zmode:
3434 switch (comp_code)
3435 {
3436 case NE: return AARCH64_NE;
3437 case EQ: return AARCH64_EQ;
3438 default: gcc_unreachable ();
3439 }
3440 break;
3441
43e9d192
IB
3442 default:
3443 gcc_unreachable ();
3444 break;
3445 }
3446}
3447
3448static unsigned
3449bit_count (unsigned HOST_WIDE_INT value)
3450{
3451 unsigned count = 0;
3452
3453 while (value)
3454 {
3455 count++;
3456 value &= value - 1;
3457 }
3458
3459 return count;
3460}
3461
3462void
3463aarch64_print_operand (FILE *f, rtx x, char code)
3464{
3465 switch (code)
3466 {
f541a481
KT
3467 /* An integer or symbol address without a preceding # sign. */
3468 case 'c':
3469 switch (GET_CODE (x))
3470 {
3471 case CONST_INT:
3472 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3473 break;
3474
3475 case SYMBOL_REF:
3476 output_addr_const (f, x);
3477 break;
3478
3479 case CONST:
3480 if (GET_CODE (XEXP (x, 0)) == PLUS
3481 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3482 {
3483 output_addr_const (f, x);
3484 break;
3485 }
3486 /* Fall through. */
3487
3488 default:
3489 output_operand_lossage ("Unsupported operand for code '%c'", code);
3490 }
3491 break;
3492
43e9d192
IB
3493 case 'e':
3494 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3495 {
3496 int n;
3497
3498 if (GET_CODE (x) != CONST_INT
3499 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3500 {
3501 output_operand_lossage ("invalid operand for '%%%c'", code);
3502 return;
3503 }
3504
3505 switch (n)
3506 {
3507 case 3:
3508 fputc ('b', f);
3509 break;
3510 case 4:
3511 fputc ('h', f);
3512 break;
3513 case 5:
3514 fputc ('w', f);
3515 break;
3516 default:
3517 output_operand_lossage ("invalid operand for '%%%c'", code);
3518 return;
3519 }
3520 }
3521 break;
3522
3523 case 'p':
3524 {
3525 int n;
3526
3527 /* Print N such that 2^N == X. */
3528 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3529 {
3530 output_operand_lossage ("invalid operand for '%%%c'", code);
3531 return;
3532 }
3533
3534 asm_fprintf (f, "%d", n);
3535 }
3536 break;
3537
3538 case 'P':
3539 /* Print the number of non-zero bits in X (a const_int). */
3540 if (GET_CODE (x) != CONST_INT)
3541 {
3542 output_operand_lossage ("invalid operand for '%%%c'", code);
3543 return;
3544 }
3545
3546 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3547 break;
3548
3549 case 'H':
3550 /* Print the higher numbered register of a pair (TImode) of regs. */
3551 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3552 {
3553 output_operand_lossage ("invalid operand for '%%%c'", code);
3554 return;
3555 }
3556
01a3a324 3557 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3558 break;
3559
43e9d192
IB
3560 case 'm':
3561 /* Print a condition (eq, ne, etc). */
3562
3563 /* CONST_TRUE_RTX means always -- that's the default. */
3564 if (x == const_true_rtx)
3565 return;
3566
3567 if (!COMPARISON_P (x))
3568 {
3569 output_operand_lossage ("invalid operand for '%%%c'", code);
3570 return;
3571 }
3572
3573 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3574 break;
3575
3576 case 'M':
3577 /* Print the inverse of a condition (eq <-> ne, etc). */
3578
3579 /* CONST_TRUE_RTX means never -- that's the default. */
3580 if (x == const_true_rtx)
3581 {
3582 fputs ("nv", f);
3583 return;
3584 }
3585
3586 if (!COMPARISON_P (x))
3587 {
3588 output_operand_lossage ("invalid operand for '%%%c'", code);
3589 return;
3590 }
3591
3592 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3593 (aarch64_get_condition_code (x))], f);
3594 break;
3595
3596 case 'b':
3597 case 'h':
3598 case 's':
3599 case 'd':
3600 case 'q':
3601 /* Print a scalar FP/SIMD register name. */
3602 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3603 {
3604 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3605 return;
3606 }
50ce6f88 3607 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3608 break;
3609
3610 case 'S':
3611 case 'T':
3612 case 'U':
3613 case 'V':
3614 /* Print the first FP/SIMD register name in a list. */
3615 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3616 {
3617 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3618 return;
3619 }
50ce6f88 3620 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3621 break;
3622
a05c0ddf 3623 case 'X':
50d38551 3624 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3625 if (GET_CODE (x) != CONST_INT)
3626 {
3627 output_operand_lossage ("invalid operand for '%%%c'", code);
3628 return;
3629 }
50d38551 3630 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3631 break;
3632
43e9d192
IB
3633 case 'w':
3634 case 'x':
3635 /* Print a general register name or the zero register (32-bit or
3636 64-bit). */
3520f7cc
JG
3637 if (x == const0_rtx
3638 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3639 {
50ce6f88 3640 asm_fprintf (f, "%czr", code);
43e9d192
IB
3641 break;
3642 }
3643
3644 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3645 {
50ce6f88 3646 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3647 break;
3648 }
3649
3650 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3651 {
50ce6f88 3652 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3653 break;
3654 }
3655
3656 /* Fall through */
3657
3658 case 0:
3659 /* Print a normal operand, if it's a general register, then we
3660 assume DImode. */
3661 if (x == NULL)
3662 {
3663 output_operand_lossage ("missing operand");
3664 return;
3665 }
3666
3667 switch (GET_CODE (x))
3668 {
3669 case REG:
01a3a324 3670 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3671 break;
3672
3673 case MEM:
3674 aarch64_memory_reference_mode = GET_MODE (x);
3675 output_address (XEXP (x, 0));
3676 break;
3677
3678 case LABEL_REF:
3679 case SYMBOL_REF:
3680 output_addr_const (asm_out_file, x);
3681 break;
3682
3683 case CONST_INT:
3684 asm_fprintf (f, "%wd", INTVAL (x));
3685 break;
3686
3687 case CONST_VECTOR:
3520f7cc
JG
3688 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3689 {
3690 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3691 HOST_WIDE_INT_MIN,
3692 HOST_WIDE_INT_MAX));
3693 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3694 }
3695 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3696 {
3697 fputc ('0', f);
3698 }
3699 else
3700 gcc_unreachable ();
43e9d192
IB
3701 break;
3702
3520f7cc
JG
3703 case CONST_DOUBLE:
3704 /* CONST_DOUBLE can represent a double-width integer.
3705 In this case, the mode of x is VOIDmode. */
3706 if (GET_MODE (x) == VOIDmode)
3707 ; /* Do Nothing. */
3708 else if (aarch64_float_const_zero_rtx_p (x))
3709 {
3710 fputc ('0', f);
3711 break;
3712 }
3713 else if (aarch64_float_const_representable_p (x))
3714 {
3715#define buf_size 20
3716 char float_buf[buf_size] = {'\0'};
3717 REAL_VALUE_TYPE r;
3718 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3719 real_to_decimal_for_mode (float_buf, &r,
3720 buf_size, buf_size,
3721 1, GET_MODE (x));
3722 asm_fprintf (asm_out_file, "%s", float_buf);
3723 break;
3724#undef buf_size
3725 }
3726 output_operand_lossage ("invalid constant");
3727 return;
43e9d192
IB
3728 default:
3729 output_operand_lossage ("invalid operand");
3730 return;
3731 }
3732 break;
3733
3734 case 'A':
3735 if (GET_CODE (x) == HIGH)
3736 x = XEXP (x, 0);
3737
3738 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3739 {
3740 case SYMBOL_SMALL_GOT:
3741 asm_fprintf (asm_out_file, ":got:");
3742 break;
3743
3744 case SYMBOL_SMALL_TLSGD:
3745 asm_fprintf (asm_out_file, ":tlsgd:");
3746 break;
3747
3748 case SYMBOL_SMALL_TLSDESC:
3749 asm_fprintf (asm_out_file, ":tlsdesc:");
3750 break;
3751
3752 case SYMBOL_SMALL_GOTTPREL:
3753 asm_fprintf (asm_out_file, ":gottprel:");
3754 break;
3755
3756 case SYMBOL_SMALL_TPREL:
3757 asm_fprintf (asm_out_file, ":tprel:");
3758 break;
3759
87dd8ab0
MS
3760 case SYMBOL_TINY_GOT:
3761 gcc_unreachable ();
3762 break;
3763
43e9d192
IB
3764 default:
3765 break;
3766 }
3767 output_addr_const (asm_out_file, x);
3768 break;
3769
3770 case 'L':
3771 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3772 {
3773 case SYMBOL_SMALL_GOT:
3774 asm_fprintf (asm_out_file, ":lo12:");
3775 break;
3776
3777 case SYMBOL_SMALL_TLSGD:
3778 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3779 break;
3780
3781 case SYMBOL_SMALL_TLSDESC:
3782 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3783 break;
3784
3785 case SYMBOL_SMALL_GOTTPREL:
3786 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3787 break;
3788
3789 case SYMBOL_SMALL_TPREL:
3790 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3791 break;
3792
87dd8ab0
MS
3793 case SYMBOL_TINY_GOT:
3794 asm_fprintf (asm_out_file, ":got:");
3795 break;
3796
43e9d192
IB
3797 default:
3798 break;
3799 }
3800 output_addr_const (asm_out_file, x);
3801 break;
3802
3803 case 'G':
3804
3805 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3806 {
3807 case SYMBOL_SMALL_TPREL:
3808 asm_fprintf (asm_out_file, ":tprel_hi12:");
3809 break;
3810 default:
3811 break;
3812 }
3813 output_addr_const (asm_out_file, x);
3814 break;
3815
3816 default:
3817 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3818 return;
3819 }
3820}
3821
3822void
3823aarch64_print_operand_address (FILE *f, rtx x)
3824{
3825 struct aarch64_address_info addr;
3826
3827 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3828 MEM, true))
3829 switch (addr.type)
3830 {
3831 case ADDRESS_REG_IMM:
3832 if (addr.offset == const0_rtx)
01a3a324 3833 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3834 else
16a3246f 3835 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3836 INTVAL (addr.offset));
3837 return;
3838
3839 case ADDRESS_REG_REG:
3840 if (addr.shift == 0)
16a3246f 3841 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 3842 reg_names [REGNO (addr.offset)]);
43e9d192 3843 else
16a3246f 3844 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 3845 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3846 return;
3847
3848 case ADDRESS_REG_UXTW:
3849 if (addr.shift == 0)
16a3246f 3850 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3851 REGNO (addr.offset) - R0_REGNUM);
3852 else
16a3246f 3853 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3854 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3855 return;
3856
3857 case ADDRESS_REG_SXTW:
3858 if (addr.shift == 0)
16a3246f 3859 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3860 REGNO (addr.offset) - R0_REGNUM);
3861 else
16a3246f 3862 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3863 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3864 return;
3865
3866 case ADDRESS_REG_WB:
3867 switch (GET_CODE (x))
3868 {
3869 case PRE_INC:
16a3246f 3870 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3871 GET_MODE_SIZE (aarch64_memory_reference_mode));
3872 return;
3873 case POST_INC:
16a3246f 3874 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
3875 GET_MODE_SIZE (aarch64_memory_reference_mode));
3876 return;
3877 case PRE_DEC:
16a3246f 3878 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3879 GET_MODE_SIZE (aarch64_memory_reference_mode));
3880 return;
3881 case POST_DEC:
16a3246f 3882 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3883 GET_MODE_SIZE (aarch64_memory_reference_mode));
3884 return;
3885 case PRE_MODIFY:
16a3246f 3886 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3887 INTVAL (addr.offset));
3888 return;
3889 case POST_MODIFY:
16a3246f 3890 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3891 INTVAL (addr.offset));
3892 return;
3893 default:
3894 break;
3895 }
3896 break;
3897
3898 case ADDRESS_LO_SUM:
16a3246f 3899 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3900 output_addr_const (f, addr.offset);
3901 asm_fprintf (f, "]");
3902 return;
3903
3904 case ADDRESS_SYMBOLIC:
3905 break;
3906 }
3907
3908 output_addr_const (f, x);
3909}
3910
43e9d192
IB
3911bool
3912aarch64_label_mentioned_p (rtx x)
3913{
3914 const char *fmt;
3915 int i;
3916
3917 if (GET_CODE (x) == LABEL_REF)
3918 return true;
3919
3920 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3921 referencing instruction, but they are constant offsets, not
3922 symbols. */
3923 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3924 return false;
3925
3926 fmt = GET_RTX_FORMAT (GET_CODE (x));
3927 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3928 {
3929 if (fmt[i] == 'E')
3930 {
3931 int j;
3932
3933 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3934 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3935 return 1;
3936 }
3937 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3938 return 1;
3939 }
3940
3941 return 0;
3942}
3943
3944/* Implement REGNO_REG_CLASS. */
3945
3946enum reg_class
3947aarch64_regno_regclass (unsigned regno)
3948{
3949 if (GP_REGNUM_P (regno))
3950 return CORE_REGS;
3951
3952 if (regno == SP_REGNUM)
3953 return STACK_REG;
3954
3955 if (regno == FRAME_POINTER_REGNUM
3956 || regno == ARG_POINTER_REGNUM)
f24bb080 3957 return POINTER_REGS;
43e9d192
IB
3958
3959 if (FP_REGNUM_P (regno))
3960 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3961
3962 return NO_REGS;
3963}
3964
3965/* Try a machine-dependent way of reloading an illegitimate address
3966 operand. If we find one, push the reload and return the new rtx. */
3967
3968rtx
3969aarch64_legitimize_reload_address (rtx *x_p,
3970 enum machine_mode mode,
3971 int opnum, int type,
3972 int ind_levels ATTRIBUTE_UNUSED)
3973{
3974 rtx x = *x_p;
3975
3976 /* Do not allow mem (plus (reg, const)) if vector mode. */
3977 if (aarch64_vector_mode_p (mode)
3978 && GET_CODE (x) == PLUS
3979 && REG_P (XEXP (x, 0))
3980 && CONST_INT_P (XEXP (x, 1)))
3981 {
3982 rtx orig_rtx = x;
3983 x = copy_rtx (x);
3984 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3985 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3986 opnum, (enum reload_type) type);
3987 return x;
3988 }
3989
3990 /* We must recognize output that we have already generated ourselves. */
3991 if (GET_CODE (x) == PLUS
3992 && GET_CODE (XEXP (x, 0)) == PLUS
3993 && REG_P (XEXP (XEXP (x, 0), 0))
3994 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3995 && CONST_INT_P (XEXP (x, 1)))
3996 {
3997 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3998 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3999 opnum, (enum reload_type) type);
4000 return x;
4001 }
4002
4003 /* We wish to handle large displacements off a base register by splitting
4004 the addend across an add and the mem insn. This can cut the number of
4005 extra insns needed from 3 to 1. It is only useful for load/store of a
4006 single register with 12 bit offset field. */
4007 if (GET_CODE (x) == PLUS
4008 && REG_P (XEXP (x, 0))
4009 && CONST_INT_P (XEXP (x, 1))
4010 && HARD_REGISTER_P (XEXP (x, 0))
4011 && mode != TImode
4012 && mode != TFmode
4013 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4014 {
4015 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4016 HOST_WIDE_INT low = val & 0xfff;
4017 HOST_WIDE_INT high = val - low;
4018 HOST_WIDE_INT offs;
4019 rtx cst;
28514dda
YZ
4020 enum machine_mode xmode = GET_MODE (x);
4021
4022 /* In ILP32, xmode can be either DImode or SImode. */
4023 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4024
4025 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4026 BLKmode alignment. */
4027 if (GET_MODE_SIZE (mode) == 0)
4028 return NULL_RTX;
4029
4030 offs = low % GET_MODE_SIZE (mode);
4031
4032 /* Align misaligned offset by adjusting high part to compensate. */
4033 if (offs != 0)
4034 {
4035 if (aarch64_uimm12_shift (high + offs))
4036 {
4037 /* Align down. */
4038 low = low - offs;
4039 high = high + offs;
4040 }
4041 else
4042 {
4043 /* Align up. */
4044 offs = GET_MODE_SIZE (mode) - offs;
4045 low = low + offs;
4046 high = high + (low & 0x1000) - offs;
4047 low &= 0xfff;
4048 }
4049 }
4050
4051 /* Check for overflow. */
4052 if (high + low != val)
4053 return NULL_RTX;
4054
4055 cst = GEN_INT (high);
4056 if (!aarch64_uimm12_shift (high))
28514dda 4057 cst = force_const_mem (xmode, cst);
43e9d192
IB
4058
4059 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4060 in the mem instruction.
4061 Note that replacing this gen_rtx_PLUS with plus_constant is
4062 wrong in this case because we rely on the
4063 (plus (plus reg c1) c2) structure being preserved so that
4064 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4065 x = gen_rtx_PLUS (xmode,
4066 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4067 GEN_INT (low));
43e9d192
IB
4068
4069 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4070 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4071 opnum, (enum reload_type) type);
4072 return x;
4073 }
4074
4075 return NULL_RTX;
4076}
4077
4078
4079static reg_class_t
4080aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4081 reg_class_t rclass,
4082 enum machine_mode mode,
4083 secondary_reload_info *sri)
4084{
43e9d192
IB
4085 /* Without the TARGET_SIMD instructions we cannot move a Q register
4086 to a Q register directly. We need a scratch. */
4087 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4088 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4089 && reg_class_subset_p (rclass, FP_REGS))
4090 {
4091 if (mode == TFmode)
4092 sri->icode = CODE_FOR_aarch64_reload_movtf;
4093 else if (mode == TImode)
4094 sri->icode = CODE_FOR_aarch64_reload_movti;
4095 return NO_REGS;
4096 }
4097
4098 /* A TFmode or TImode memory access should be handled via an FP_REGS
4099 because AArch64 has richer addressing modes for LDR/STR instructions
4100 than LDP/STP instructions. */
4101 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4102 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4103 return FP_REGS;
4104
4105 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4106 return CORE_REGS;
4107
4108 return NO_REGS;
4109}
4110
4111static bool
4112aarch64_can_eliminate (const int from, const int to)
4113{
4114 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4115 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4116
4117 if (frame_pointer_needed)
4118 {
4119 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4120 return true;
4121 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4122 return false;
4123 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4124 && !cfun->calls_alloca)
4125 return true;
4126 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4127 return true;
0b7f8166
MS
4128
4129 return false;
43e9d192 4130 }
777e6976 4131
43e9d192
IB
4132 return true;
4133}
4134
4135HOST_WIDE_INT
4136aarch64_initial_elimination_offset (unsigned from, unsigned to)
4137{
4138 HOST_WIDE_INT frame_size;
4139 HOST_WIDE_INT offset;
4140
4141 aarch64_layout_frame ();
4142 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4143 + crtl->outgoing_args_size
4144 + cfun->machine->saved_varargs_size);
4145
78c29983
MS
4146 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4147 offset = frame_size;
4148
4149 if (to == HARD_FRAME_POINTER_REGNUM)
4150 {
4151 if (from == ARG_POINTER_REGNUM)
4152 return offset - crtl->outgoing_args_size;
4153
4154 if (from == FRAME_POINTER_REGNUM)
4155 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4156 }
4157
4158 if (to == STACK_POINTER_REGNUM)
4159 {
4160 if (from == FRAME_POINTER_REGNUM)
4161 {
4162 HOST_WIDE_INT elim = crtl->outgoing_args_size
4163 + cfun->machine->frame.saved_regs_size
53e5ace2 4164 + get_frame_size ();
78c29983
MS
4165 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4166 return elim;
4167 }
4168 }
4169
4170 return offset;
43e9d192
IB
4171}
4172
4173
4174/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4175 previous frame. */
4176
4177rtx
4178aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4179{
4180 if (count != 0)
4181 return const0_rtx;
4182 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4183}
4184
4185
4186static void
4187aarch64_asm_trampoline_template (FILE *f)
4188{
28514dda
YZ
4189 if (TARGET_ILP32)
4190 {
4191 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4192 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4193 }
4194 else
4195 {
4196 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4197 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4198 }
01a3a324 4199 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4200 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4201 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4202 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4203}
4204
4205static void
4206aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4207{
4208 rtx fnaddr, mem, a_tramp;
28514dda 4209 const int tramp_code_sz = 16;
43e9d192
IB
4210
4211 /* Don't need to copy the trailing D-words, we fill those in below. */
4212 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4213 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4214 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4215 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4216 if (GET_MODE (fnaddr) != ptr_mode)
4217 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4218 emit_move_insn (mem, fnaddr);
4219
28514dda 4220 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4221 emit_move_insn (mem, chain_value);
4222
4223 /* XXX We should really define a "clear_cache" pattern and use
4224 gen_clear_cache(). */
4225 a_tramp = XEXP (m_tramp, 0);
4226 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4227 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4228 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4229 ptr_mode);
43e9d192
IB
4230}
4231
4232static unsigned char
4233aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4234{
4235 switch (regclass)
4236 {
4237 case CORE_REGS:
4238 case POINTER_REGS:
4239 case GENERAL_REGS:
4240 case ALL_REGS:
4241 case FP_REGS:
4242 case FP_LO_REGS:
4243 return
4244 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4245 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4246 case STACK_REG:
4247 return 1;
4248
4249 case NO_REGS:
4250 return 0;
4251
4252 default:
4253 break;
4254 }
4255 gcc_unreachable ();
4256}
4257
4258static reg_class_t
78d8b9f0 4259aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4260{
51bb310d 4261 if (regclass == POINTER_REGS)
78d8b9f0
IB
4262 return GENERAL_REGS;
4263
51bb310d
MS
4264 if (regclass == STACK_REG)
4265 {
4266 if (REG_P(x)
4267 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4268 return regclass;
4269
4270 return NO_REGS;
4271 }
4272
78d8b9f0
IB
4273 /* If it's an integer immediate that MOVI can't handle, then
4274 FP_REGS is not an option, so we return NO_REGS instead. */
4275 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4276 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4277 return NO_REGS;
4278
27bd251b
IB
4279 /* Register eliminiation can result in a request for
4280 SP+constant->FP_REGS. We cannot support such operations which
4281 use SP as source and an FP_REG as destination, so reject out
4282 right now. */
4283 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4284 {
4285 rtx lhs = XEXP (x, 0);
4286
4287 /* Look through a possible SUBREG introduced by ILP32. */
4288 if (GET_CODE (lhs) == SUBREG)
4289 lhs = SUBREG_REG (lhs);
4290
4291 gcc_assert (REG_P (lhs));
4292 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4293 POINTER_REGS));
4294 return NO_REGS;
4295 }
4296
78d8b9f0 4297 return regclass;
43e9d192
IB
4298}
4299
4300void
4301aarch64_asm_output_labelref (FILE* f, const char *name)
4302{
4303 asm_fprintf (f, "%U%s", name);
4304}
4305
4306static void
4307aarch64_elf_asm_constructor (rtx symbol, int priority)
4308{
4309 if (priority == DEFAULT_INIT_PRIORITY)
4310 default_ctor_section_asm_out_constructor (symbol, priority);
4311 else
4312 {
4313 section *s;
4314 char buf[18];
4315 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4316 s = get_section (buf, SECTION_WRITE, NULL);
4317 switch_to_section (s);
4318 assemble_align (POINTER_SIZE);
28514dda 4319 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4320 }
4321}
4322
4323static void
4324aarch64_elf_asm_destructor (rtx symbol, int priority)
4325{
4326 if (priority == DEFAULT_INIT_PRIORITY)
4327 default_dtor_section_asm_out_destructor (symbol, priority);
4328 else
4329 {
4330 section *s;
4331 char buf[18];
4332 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4333 s = get_section (buf, SECTION_WRITE, NULL);
4334 switch_to_section (s);
4335 assemble_align (POINTER_SIZE);
28514dda 4336 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4337 }
4338}
4339
4340const char*
4341aarch64_output_casesi (rtx *operands)
4342{
4343 char buf[100];
4344 char label[100];
592a16fc 4345 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4346 int index;
4347 static const char *const patterns[4][2] =
4348 {
4349 {
4350 "ldrb\t%w3, [%0,%w1,uxtw]",
4351 "add\t%3, %4, %w3, sxtb #2"
4352 },
4353 {
4354 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4355 "add\t%3, %4, %w3, sxth #2"
4356 },
4357 {
4358 "ldr\t%w3, [%0,%w1,uxtw #2]",
4359 "add\t%3, %4, %w3, sxtw #2"
4360 },
4361 /* We assume that DImode is only generated when not optimizing and
4362 that we don't really need 64-bit address offsets. That would
4363 imply an object file with 8GB of code in a single function! */
4364 {
4365 "ldr\t%w3, [%0,%w1,uxtw #2]",
4366 "add\t%3, %4, %w3, sxtw #2"
4367 }
4368 };
4369
4370 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4371
4372 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4373
4374 gcc_assert (index >= 0 && index <= 3);
4375
4376 /* Need to implement table size reduction, by chaning the code below. */
4377 output_asm_insn (patterns[index][0], operands);
4378 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4379 snprintf (buf, sizeof (buf),
4380 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4381 output_asm_insn (buf, operands);
4382 output_asm_insn (patterns[index][1], operands);
4383 output_asm_insn ("br\t%3", operands);
4384 assemble_label (asm_out_file, label);
4385 return "";
4386}
4387
4388
4389/* Return size in bits of an arithmetic operand which is shifted/scaled and
4390 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4391 operator. */
4392
4393int
4394aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4395{
4396 if (shift >= 0 && shift <= 3)
4397 {
4398 int size;
4399 for (size = 8; size <= 32; size *= 2)
4400 {
4401 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4402 if (mask == bits << shift)
4403 return size;
4404 }
4405 }
4406 return 0;
4407}
4408
4409static bool
4410aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4411 const_rtx x ATTRIBUTE_UNUSED)
4412{
4413 /* We can't use blocks for constants when we're using a per-function
4414 constant pool. */
4415 return false;
4416}
4417
4418static section *
4419aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4420 rtx x ATTRIBUTE_UNUSED,
4421 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4422{
4423 /* Force all constant pool entries into the current function section. */
4424 return function_section (current_function_decl);
4425}
4426
4427
4428/* Costs. */
4429
4430/* Helper function for rtx cost calculation. Strip a shift expression
4431 from X. Returns the inner operand if successful, or the original
4432 expression on failure. */
4433static rtx
4434aarch64_strip_shift (rtx x)
4435{
4436 rtx op = x;
4437
4438 if ((GET_CODE (op) == ASHIFT
4439 || GET_CODE (op) == ASHIFTRT
4440 || GET_CODE (op) == LSHIFTRT)
4441 && CONST_INT_P (XEXP (op, 1)))
4442 return XEXP (op, 0);
4443
4444 if (GET_CODE (op) == MULT
4445 && CONST_INT_P (XEXP (op, 1))
4446 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4447 return XEXP (op, 0);
4448
4449 return x;
4450}
4451
4452/* Helper function for rtx cost calculation. Strip a shift or extend
4453 expression from X. Returns the inner operand if successful, or the
4454 original expression on failure. We deal with a number of possible
4455 canonicalization variations here. */
4456static rtx
4457aarch64_strip_shift_or_extend (rtx x)
4458{
4459 rtx op = x;
4460
4461 /* Zero and sign extraction of a widened value. */
4462 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4463 && XEXP (op, 2) == const0_rtx
4464 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4465 XEXP (op, 1)))
4466 return XEXP (XEXP (op, 0), 0);
4467
4468 /* It can also be represented (for zero-extend) as an AND with an
4469 immediate. */
4470 if (GET_CODE (op) == AND
4471 && GET_CODE (XEXP (op, 0)) == MULT
4472 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4473 && CONST_INT_P (XEXP (op, 1))
4474 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4475 INTVAL (XEXP (op, 1))) != 0)
4476 return XEXP (XEXP (op, 0), 0);
4477
4478 /* Now handle extended register, as this may also have an optional
4479 left shift by 1..4. */
4480 if (GET_CODE (op) == ASHIFT
4481 && CONST_INT_P (XEXP (op, 1))
4482 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4483 op = XEXP (op, 0);
4484
4485 if (GET_CODE (op) == ZERO_EXTEND
4486 || GET_CODE (op) == SIGN_EXTEND)
4487 op = XEXP (op, 0);
4488
4489 if (op != x)
4490 return op;
4491
4492 return aarch64_strip_shift (x);
4493}
4494
4495/* Calculate the cost of calculating X, storing it in *COST. Result
4496 is true if the total cost of the operation has now been calculated. */
4497static bool
4498aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4499 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4500{
4501 rtx op0, op1;
73250c4c 4502 const struct cpu_cost_table *extra_cost
43e9d192
IB
4503 = aarch64_tune_params->insn_extra_cost;
4504
4505 switch (code)
4506 {
4507 case SET:
4508 op0 = SET_DEST (x);
4509 op1 = SET_SRC (x);
4510
4511 switch (GET_CODE (op0))
4512 {
4513 case MEM:
4514 if (speed)
73250c4c 4515 *cost += extra_cost->ldst.store;
43e9d192
IB
4516
4517 if (op1 != const0_rtx)
4518 *cost += rtx_cost (op1, SET, 1, speed);
4519 return true;
4520
4521 case SUBREG:
4522 if (! REG_P (SUBREG_REG (op0)))
4523 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4524 /* Fall through. */
4525 case REG:
4526 /* Cost is just the cost of the RHS of the set. */
4527 *cost += rtx_cost (op1, SET, 1, true);
4528 return true;
4529
4530 case ZERO_EXTRACT: /* Bit-field insertion. */
4531 case SIGN_EXTRACT:
4532 /* Strip any redundant widening of the RHS to meet the width of
4533 the target. */
4534 if (GET_CODE (op1) == SUBREG)
4535 op1 = SUBREG_REG (op1);
4536 if ((GET_CODE (op1) == ZERO_EXTEND
4537 || GET_CODE (op1) == SIGN_EXTEND)
4538 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4539 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4540 >= INTVAL (XEXP (op0, 1))))
4541 op1 = XEXP (op1, 0);
4542 *cost += rtx_cost (op1, SET, 1, speed);
4543 return true;
4544
4545 default:
4546 break;
4547 }
4548 return false;
4549
4550 case MEM:
4551 if (speed)
73250c4c 4552 *cost += extra_cost->ldst.load;
43e9d192
IB
4553
4554 return true;
4555
4556 case NEG:
4557 op0 = CONST0_RTX (GET_MODE (x));
4558 op1 = XEXP (x, 0);
4559 goto cost_minus;
4560
4561 case COMPARE:
4562 op0 = XEXP (x, 0);
4563 op1 = XEXP (x, 1);
4564
4565 if (op1 == const0_rtx
4566 && GET_CODE (op0) == AND)
4567 {
4568 x = op0;
4569 goto cost_logic;
4570 }
4571
4572 /* Comparisons can work if the order is swapped.
4573 Canonicalization puts the more complex operation first, but
4574 we want it in op1. */
4575 if (! (REG_P (op0)
4576 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4577 {
4578 op0 = XEXP (x, 1);
4579 op1 = XEXP (x, 0);
4580 }
4581 goto cost_minus;
4582
4583 case MINUS:
4584 op0 = XEXP (x, 0);
4585 op1 = XEXP (x, 1);
4586
4587 cost_minus:
4588 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4589 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4590 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4591 {
4592 if (op0 != const0_rtx)
4593 *cost += rtx_cost (op0, MINUS, 0, speed);
4594
4595 if (CONST_INT_P (op1))
4596 {
4597 if (!aarch64_uimm12_shift (INTVAL (op1)))
4598 *cost += rtx_cost (op1, MINUS, 1, speed);
4599 }
4600 else
4601 {
4602 op1 = aarch64_strip_shift_or_extend (op1);
4603 *cost += rtx_cost (op1, MINUS, 1, speed);
4604 }
4605 return true;
4606 }
4607
4608 return false;
4609
4610 case PLUS:
4611 op0 = XEXP (x, 0);
4612 op1 = XEXP (x, 1);
4613
4614 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4615 {
4616 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4617 {
4618 *cost += rtx_cost (op0, PLUS, 0, speed);
4619 }
4620 else
4621 {
4622 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4623
4624 if (new_op0 == op0
4625 && GET_CODE (op0) == MULT)
4626 {
4627 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4628 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4629 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4630 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4631 {
4632 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4633 speed)
4634 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4635 speed)
4636 + rtx_cost (op1, PLUS, 1, speed));
4637 if (speed)
73250c4c
KT
4638 *cost +=
4639 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
43e9d192
IB
4640 return true;
4641 }
328402a9 4642
43e9d192
IB
4643 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4644 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4645 + rtx_cost (op1, PLUS, 1, speed));
4646
4647 if (speed)
73250c4c 4648 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
328402a9
RE
4649
4650 return true;
43e9d192
IB
4651 }
4652
4653 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4654 + rtx_cost (op1, PLUS, 1, speed));
4655 }
4656 return true;
4657 }
4658
4659 return false;
4660
4661 case IOR:
4662 case XOR:
4663 case AND:
4664 cost_logic:
4665 op0 = XEXP (x, 0);
4666 op1 = XEXP (x, 1);
4667
4668 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4669 {
4670 if (CONST_INT_P (op1)
4671 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4672 {
4673 *cost += rtx_cost (op0, AND, 0, speed);
4674 }
4675 else
4676 {
4677 if (GET_CODE (op0) == NOT)
4678 op0 = XEXP (op0, 0);
4679 op0 = aarch64_strip_shift (op0);
4680 *cost += (rtx_cost (op0, AND, 0, speed)
4681 + rtx_cost (op1, AND, 1, speed));
4682 }
4683 return true;
4684 }
4685 return false;
4686
4687 case ZERO_EXTEND:
4688 if ((GET_MODE (x) == DImode
4689 && GET_MODE (XEXP (x, 0)) == SImode)
4690 || GET_CODE (XEXP (x, 0)) == MEM)
4691 {
4692 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4693 return true;
4694 }
4695 return false;
4696
4697 case SIGN_EXTEND:
4698 if (GET_CODE (XEXP (x, 0)) == MEM)
4699 {
4700 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4701 return true;
4702 }
4703 return false;
4704
4705 case ROTATE:
4706 if (!CONST_INT_P (XEXP (x, 1)))
4707 *cost += COSTS_N_INSNS (2);
4708 /* Fall through. */
4709 case ROTATERT:
4710 case LSHIFTRT:
4711 case ASHIFT:
4712 case ASHIFTRT:
4713
4714 /* Shifting by a register often takes an extra cycle. */
4715 if (speed && !CONST_INT_P (XEXP (x, 1)))
73250c4c 4716 *cost += extra_cost->alu.arith_shift_reg;
43e9d192
IB
4717
4718 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4719 return true;
4720
4721 case HIGH:
4722 if (!CONSTANT_P (XEXP (x, 0)))
4723 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4724 return true;
4725
4726 case LO_SUM:
4727 if (!CONSTANT_P (XEXP (x, 1)))
4728 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4729 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4730 return true;
4731
4732 case ZERO_EXTRACT:
4733 case SIGN_EXTRACT:
4734 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4735 return true;
4736
4737 case MULT:
4738 op0 = XEXP (x, 0);
4739 op1 = XEXP (x, 1);
4740
4741 *cost = COSTS_N_INSNS (1);
4742 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4743 {
4744 if (CONST_INT_P (op1)
4745 && exact_log2 (INTVAL (op1)) > 0)
4746 {
4747 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4748 return true;
4749 }
4750
4751 if ((GET_CODE (op0) == ZERO_EXTEND
4752 && GET_CODE (op1) == ZERO_EXTEND)
4753 || (GET_CODE (op0) == SIGN_EXTEND
4754 && GET_CODE (op1) == SIGN_EXTEND))
4755 {
4756 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4757 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4758 if (speed)
73250c4c 4759 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
43e9d192
IB
4760 return true;
4761 }
4762
4763 if (speed)
73250c4c 4764 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
43e9d192
IB
4765 }
4766 else if (speed)
4767 {
4768 if (GET_MODE (x) == DFmode)
73250c4c 4769 *cost += extra_cost->fp[1].mult;
43e9d192 4770 else if (GET_MODE (x) == SFmode)
73250c4c 4771 *cost += extra_cost->fp[0].mult;
43e9d192
IB
4772 }
4773
4774 return false; /* All arguments need to be in registers. */
4775
4776 case MOD:
4777 case UMOD:
4778 *cost = COSTS_N_INSNS (2);
4779 if (speed)
4780 {
4781 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
4782 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4783 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 4784 else if (GET_MODE (x) == DFmode)
73250c4c
KT
4785 *cost += (extra_cost->fp[1].mult
4786 + extra_cost->fp[1].div);
43e9d192 4787 else if (GET_MODE (x) == SFmode)
73250c4c
KT
4788 *cost += (extra_cost->fp[0].mult
4789 + extra_cost->fp[0].div);
43e9d192
IB
4790 }
4791 return false; /* All arguments need to be in registers. */
4792
4793 case DIV:
4794 case UDIV:
4795 *cost = COSTS_N_INSNS (1);
4796 if (speed)
4797 {
4798 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c 4799 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
43e9d192 4800 else if (GET_MODE (x) == DFmode)
73250c4c 4801 *cost += extra_cost->fp[1].div;
43e9d192 4802 else if (GET_MODE (x) == SFmode)
73250c4c 4803 *cost += extra_cost->fp[0].div;
43e9d192
IB
4804 }
4805 return false; /* All arguments need to be in registers. */
4806
4807 default:
4808 break;
4809 }
4810 return false;
4811}
4812
4813static int
4814aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4815 enum machine_mode mode ATTRIBUTE_UNUSED,
4816 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4817{
4818 enum rtx_code c = GET_CODE (x);
4819 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4820
4821 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4822 return addr_cost->pre_modify;
4823
4824 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4825 return addr_cost->post_modify;
4826
4827 if (c == PLUS)
4828 {
4829 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4830 return addr_cost->imm_offset;
4831 else if (GET_CODE (XEXP (x, 0)) == MULT
4832 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4833 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4834 return addr_cost->register_extend;
4835
4836 return addr_cost->register_offset;
4837 }
4838 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4839 return addr_cost->imm_offset;
4840
4841 return 0;
4842}
4843
4844static int
8a3a7e67
RH
4845aarch64_register_move_cost (enum machine_mode mode,
4846 reg_class_t from_i, reg_class_t to_i)
43e9d192 4847{
8a3a7e67
RH
4848 enum reg_class from = (enum reg_class) from_i;
4849 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
4850 const struct cpu_regmove_cost *regmove_cost
4851 = aarch64_tune_params->regmove_cost;
4852
6ee70f81
AP
4853 /* Moving between GPR and stack cost is the same as GP2GP. */
4854 if ((from == GENERAL_REGS && to == STACK_REG)
4855 || (to == GENERAL_REGS && from == STACK_REG))
4856 return regmove_cost->GP2GP;
4857
4858 /* To/From the stack register, we move via the gprs. */
4859 if (to == STACK_REG || from == STACK_REG)
4860 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
4861 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
4862
43e9d192
IB
4863 if (from == GENERAL_REGS && to == GENERAL_REGS)
4864 return regmove_cost->GP2GP;
4865 else if (from == GENERAL_REGS)
4866 return regmove_cost->GP2FP;
4867 else if (to == GENERAL_REGS)
4868 return regmove_cost->FP2GP;
4869
4870 /* When AdvSIMD instructions are disabled it is not possible to move
4871 a 128-bit value directly between Q registers. This is handled in
4872 secondary reload. A general register is used as a scratch to move
4873 the upper DI value and the lower DI value is moved directly,
4874 hence the cost is the sum of three moves. */
8a3a7e67 4875 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
43e9d192
IB
4876 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4877
4878 return regmove_cost->FP2FP;
4879}
4880
4881static int
4882aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4883 reg_class_t rclass ATTRIBUTE_UNUSED,
4884 bool in ATTRIBUTE_UNUSED)
4885{
4886 return aarch64_tune_params->memmov_cost;
4887}
4888
d126a4ae
AP
4889/* Return the number of instructions that can be issued per cycle. */
4890static int
4891aarch64_sched_issue_rate (void)
4892{
4893 return aarch64_tune_params->issue_rate;
4894}
4895
8990e73a
TB
4896/* Vectorizer cost model target hooks. */
4897
4898/* Implement targetm.vectorize.builtin_vectorization_cost. */
4899static int
4900aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4901 tree vectype,
4902 int misalign ATTRIBUTE_UNUSED)
4903{
4904 unsigned elements;
4905
4906 switch (type_of_cost)
4907 {
4908 case scalar_stmt:
4909 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4910
4911 case scalar_load:
4912 return aarch64_tune_params->vec_costs->scalar_load_cost;
4913
4914 case scalar_store:
4915 return aarch64_tune_params->vec_costs->scalar_store_cost;
4916
4917 case vector_stmt:
4918 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4919
4920 case vector_load:
4921 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4922
4923 case vector_store:
4924 return aarch64_tune_params->vec_costs->vec_store_cost;
4925
4926 case vec_to_scalar:
4927 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4928
4929 case scalar_to_vec:
4930 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4931
4932 case unaligned_load:
4933 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4934
4935 case unaligned_store:
4936 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4937
4938 case cond_branch_taken:
4939 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4940
4941 case cond_branch_not_taken:
4942 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4943
4944 case vec_perm:
4945 case vec_promote_demote:
4946 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4947
4948 case vec_construct:
4949 elements = TYPE_VECTOR_SUBPARTS (vectype);
4950 return elements / 2 + 1;
4951
4952 default:
4953 gcc_unreachable ();
4954 }
4955}
4956
4957/* Implement targetm.vectorize.add_stmt_cost. */
4958static unsigned
4959aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4960 struct _stmt_vec_info *stmt_info, int misalign,
4961 enum vect_cost_model_location where)
4962{
4963 unsigned *cost = (unsigned *) data;
4964 unsigned retval = 0;
4965
4966 if (flag_vect_cost_model)
4967 {
4968 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4969 int stmt_cost =
4970 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4971
4972 /* Statements in an inner loop relative to the loop being
4973 vectorized are weighted more heavily. The value here is
4974 a function (linear for now) of the loop nest level. */
4975 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4976 {
4977 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4978 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4979 unsigned nest_level = loop_depth (loop);
4980
4981 count *= nest_level;
4982 }
4983
4984 retval = (unsigned) (count * stmt_cost);
4985 cost[where] += retval;
4986 }
4987
4988 return retval;
4989}
4990
43e9d192
IB
4991static void initialize_aarch64_code_model (void);
4992
4993/* Parse the architecture extension string. */
4994
4995static void
4996aarch64_parse_extension (char *str)
4997{
4998 /* The extension string is parsed left to right. */
4999 const struct aarch64_option_extension *opt = NULL;
5000
5001 /* Flag to say whether we are adding or removing an extension. */
5002 int adding_ext = -1;
5003
5004 while (str != NULL && *str != 0)
5005 {
5006 char *ext;
5007 size_t len;
5008
5009 str++;
5010 ext = strchr (str, '+');
5011
5012 if (ext != NULL)
5013 len = ext - str;
5014 else
5015 len = strlen (str);
5016
5017 if (len >= 2 && strncmp (str, "no", 2) == 0)
5018 {
5019 adding_ext = 0;
5020 len -= 2;
5021 str += 2;
5022 }
5023 else if (len > 0)
5024 adding_ext = 1;
5025
5026 if (len == 0)
5027 {
5028 error ("missing feature modifier after %qs", "+no");
5029 return;
5030 }
5031
5032 /* Scan over the extensions table trying to find an exact match. */
5033 for (opt = all_extensions; opt->name != NULL; opt++)
5034 {
5035 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5036 {
5037 /* Add or remove the extension. */
5038 if (adding_ext)
5039 aarch64_isa_flags |= opt->flags_on;
5040 else
5041 aarch64_isa_flags &= ~(opt->flags_off);
5042 break;
5043 }
5044 }
5045
5046 if (opt->name == NULL)
5047 {
5048 /* Extension not found in list. */
5049 error ("unknown feature modifier %qs", str);
5050 return;
5051 }
5052
5053 str = ext;
5054 };
5055
5056 return;
5057}
5058
5059/* Parse the ARCH string. */
5060
5061static void
5062aarch64_parse_arch (void)
5063{
5064 char *ext;
5065 const struct processor *arch;
5066 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5067 size_t len;
5068
5069 strcpy (str, aarch64_arch_string);
5070
5071 ext = strchr (str, '+');
5072
5073 if (ext != NULL)
5074 len = ext - str;
5075 else
5076 len = strlen (str);
5077
5078 if (len == 0)
5079 {
5080 error ("missing arch name in -march=%qs", str);
5081 return;
5082 }
5083
5084 /* Loop through the list of supported ARCHs to find a match. */
5085 for (arch = all_architectures; arch->name != NULL; arch++)
5086 {
5087 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5088 {
5089 selected_arch = arch;
5090 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
5091
5092 if (!selected_cpu)
5093 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
5094
5095 if (ext != NULL)
5096 {
5097 /* ARCH string contains at least one extension. */
5098 aarch64_parse_extension (ext);
5099 }
5100
ffee7aa9
JG
5101 if (strcmp (selected_arch->arch, selected_cpu->arch))
5102 {
5103 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5104 selected_cpu->name, selected_arch->name);
5105 }
5106
43e9d192
IB
5107 return;
5108 }
5109 }
5110
5111 /* ARCH name not found in list. */
5112 error ("unknown value %qs for -march", str);
5113 return;
5114}
5115
5116/* Parse the CPU string. */
5117
5118static void
5119aarch64_parse_cpu (void)
5120{
5121 char *ext;
5122 const struct processor *cpu;
5123 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5124 size_t len;
5125
5126 strcpy (str, aarch64_cpu_string);
5127
5128 ext = strchr (str, '+');
5129
5130 if (ext != NULL)
5131 len = ext - str;
5132 else
5133 len = strlen (str);
5134
5135 if (len == 0)
5136 {
5137 error ("missing cpu name in -mcpu=%qs", str);
5138 return;
5139 }
5140
5141 /* Loop through the list of supported CPUs to find a match. */
5142 for (cpu = all_cores; cpu->name != NULL; cpu++)
5143 {
5144 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5145 {
5146 selected_cpu = cpu;
192ed1dd 5147 selected_tune = cpu;
43e9d192
IB
5148 aarch64_isa_flags = selected_cpu->flags;
5149
5150 if (ext != NULL)
5151 {
5152 /* CPU string contains at least one extension. */
5153 aarch64_parse_extension (ext);
5154 }
5155
5156 return;
5157 }
5158 }
5159
5160 /* CPU name not found in list. */
5161 error ("unknown value %qs for -mcpu", str);
5162 return;
5163}
5164
5165/* Parse the TUNE string. */
5166
5167static void
5168aarch64_parse_tune (void)
5169{
5170 const struct processor *cpu;
5171 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5172 strcpy (str, aarch64_tune_string);
5173
5174 /* Loop through the list of supported CPUs to find a match. */
5175 for (cpu = all_cores; cpu->name != NULL; cpu++)
5176 {
5177 if (strcmp (cpu->name, str) == 0)
5178 {
5179 selected_tune = cpu;
5180 return;
5181 }
5182 }
5183
5184 /* CPU name not found in list. */
5185 error ("unknown value %qs for -mtune", str);
5186 return;
5187}
5188
5189
5190/* Implement TARGET_OPTION_OVERRIDE. */
5191
5192static void
5193aarch64_override_options (void)
5194{
ffee7aa9
JG
5195 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5196 If either of -march or -mtune is given, they override their
5197 respective component of -mcpu.
43e9d192 5198
ffee7aa9
JG
5199 So, first parse AARCH64_CPU_STRING, then the others, be careful
5200 with -march as, if -mcpu is not present on the command line, march
5201 must set a sensible default CPU. */
5202 if (aarch64_cpu_string)
43e9d192 5203 {
ffee7aa9 5204 aarch64_parse_cpu ();
43e9d192
IB
5205 }
5206
ffee7aa9 5207 if (aarch64_arch_string)
43e9d192 5208 {
ffee7aa9 5209 aarch64_parse_arch ();
43e9d192
IB
5210 }
5211
5212 if (aarch64_tune_string)
5213 {
5214 aarch64_parse_tune ();
5215 }
5216
63892fa2
KV
5217#ifndef HAVE_AS_MABI_OPTION
5218 /* The compiler may have been configured with 2.23.* binutils, which does
5219 not have support for ILP32. */
5220 if (TARGET_ILP32)
5221 error ("Assembler does not support -mabi=ilp32");
5222#endif
5223
43e9d192
IB
5224 initialize_aarch64_code_model ();
5225
5226 aarch64_build_bitmask_table ();
5227
5228 /* This target defaults to strict volatile bitfields. */
5229 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5230 flag_strict_volatile_bitfields = 1;
5231
5232 /* If the user did not specify a processor, choose the default
5233 one for them. This will be the CPU set during configuration using
a3cd0246 5234 --with-cpu, otherwise it is "generic". */
43e9d192
IB
5235 if (!selected_cpu)
5236 {
5237 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5238 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5239 }
5240
5241 gcc_assert (selected_cpu);
5242
5243 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5244 if (!selected_tune)
5245 selected_tune = &all_cores[selected_cpu->core];
5246
5247 aarch64_tune_flags = selected_tune->flags;
5248 aarch64_tune = selected_tune->core;
5249 aarch64_tune_params = selected_tune->tune;
5250
5251 aarch64_override_options_after_change ();
5252}
5253
5254/* Implement targetm.override_options_after_change. */
5255
5256static void
5257aarch64_override_options_after_change (void)
5258{
0b7f8166
MS
5259 if (flag_omit_frame_pointer)
5260 flag_omit_leaf_frame_pointer = false;
5261 else if (flag_omit_leaf_frame_pointer)
5262 flag_omit_frame_pointer = true;
43e9d192
IB
5263}
5264
5265static struct machine_function *
5266aarch64_init_machine_status (void)
5267{
5268 struct machine_function *machine;
5269 machine = ggc_alloc_cleared_machine_function ();
5270 return machine;
5271}
5272
5273void
5274aarch64_init_expanders (void)
5275{
5276 init_machine_status = aarch64_init_machine_status;
5277}
5278
5279/* A checking mechanism for the implementation of the various code models. */
5280static void
5281initialize_aarch64_code_model (void)
5282{
5283 if (flag_pic)
5284 {
5285 switch (aarch64_cmodel_var)
5286 {
5287 case AARCH64_CMODEL_TINY:
5288 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5289 break;
5290 case AARCH64_CMODEL_SMALL:
5291 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5292 break;
5293 case AARCH64_CMODEL_LARGE:
5294 sorry ("code model %qs with -f%s", "large",
5295 flag_pic > 1 ? "PIC" : "pic");
5296 default:
5297 gcc_unreachable ();
5298 }
5299 }
5300 else
5301 aarch64_cmodel = aarch64_cmodel_var;
5302}
5303
5304/* Return true if SYMBOL_REF X binds locally. */
5305
5306static bool
5307aarch64_symbol_binds_local_p (const_rtx x)
5308{
5309 return (SYMBOL_REF_DECL (x)
5310 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5311 : SYMBOL_REF_LOCAL_P (x));
5312}
5313
5314/* Return true if SYMBOL_REF X is thread local */
5315static bool
5316aarch64_tls_symbol_p (rtx x)
5317{
5318 if (! TARGET_HAVE_TLS)
5319 return false;
5320
5321 if (GET_CODE (x) != SYMBOL_REF)
5322 return false;
5323
5324 return SYMBOL_REF_TLS_MODEL (x) != 0;
5325}
5326
5327/* Classify a TLS symbol into one of the TLS kinds. */
5328enum aarch64_symbol_type
5329aarch64_classify_tls_symbol (rtx x)
5330{
5331 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5332
5333 switch (tls_kind)
5334 {
5335 case TLS_MODEL_GLOBAL_DYNAMIC:
5336 case TLS_MODEL_LOCAL_DYNAMIC:
5337 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5338
5339 case TLS_MODEL_INITIAL_EXEC:
5340 return SYMBOL_SMALL_GOTTPREL;
5341
5342 case TLS_MODEL_LOCAL_EXEC:
5343 return SYMBOL_SMALL_TPREL;
5344
5345 case TLS_MODEL_EMULATED:
5346 case TLS_MODEL_NONE:
5347 return SYMBOL_FORCE_TO_MEM;
5348
5349 default:
5350 gcc_unreachable ();
5351 }
5352}
5353
5354/* Return the method that should be used to access SYMBOL_REF or
5355 LABEL_REF X in context CONTEXT. */
17f4d4bf 5356
43e9d192
IB
5357enum aarch64_symbol_type
5358aarch64_classify_symbol (rtx x,
5359 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5360{
5361 if (GET_CODE (x) == LABEL_REF)
5362 {
5363 switch (aarch64_cmodel)
5364 {
5365 case AARCH64_CMODEL_LARGE:
5366 return SYMBOL_FORCE_TO_MEM;
5367
5368 case AARCH64_CMODEL_TINY_PIC:
5369 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5370 return SYMBOL_TINY_ABSOLUTE;
5371
43e9d192
IB
5372 case AARCH64_CMODEL_SMALL_PIC:
5373 case AARCH64_CMODEL_SMALL:
5374 return SYMBOL_SMALL_ABSOLUTE;
5375
5376 default:
5377 gcc_unreachable ();
5378 }
5379 }
5380
17f4d4bf 5381 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5382 {
4a985a37
MS
5383 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5384 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
5385
5386 if (aarch64_tls_symbol_p (x))
5387 return aarch64_classify_tls_symbol (x);
5388
17f4d4bf
CSS
5389 switch (aarch64_cmodel)
5390 {
5391 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5392 if (SYMBOL_REF_WEAK (x))
5393 return SYMBOL_FORCE_TO_MEM;
5394 return SYMBOL_TINY_ABSOLUTE;
5395
17f4d4bf
CSS
5396 case AARCH64_CMODEL_SMALL:
5397 if (SYMBOL_REF_WEAK (x))
5398 return SYMBOL_FORCE_TO_MEM;
5399 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5400
17f4d4bf 5401 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 5402 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 5403 return SYMBOL_TINY_GOT;
38e6c9a6
MS
5404 return SYMBOL_TINY_ABSOLUTE;
5405
17f4d4bf
CSS
5406 case AARCH64_CMODEL_SMALL_PIC:
5407 if (!aarch64_symbol_binds_local_p (x))
5408 return SYMBOL_SMALL_GOT;
5409 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5410
17f4d4bf
CSS
5411 default:
5412 gcc_unreachable ();
5413 }
43e9d192 5414 }
17f4d4bf 5415
43e9d192
IB
5416 /* By default push everything into the constant pool. */
5417 return SYMBOL_FORCE_TO_MEM;
5418}
5419
43e9d192
IB
5420bool
5421aarch64_constant_address_p (rtx x)
5422{
5423 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5424}
5425
5426bool
5427aarch64_legitimate_pic_operand_p (rtx x)
5428{
5429 if (GET_CODE (x) == SYMBOL_REF
5430 || (GET_CODE (x) == CONST
5431 && GET_CODE (XEXP (x, 0)) == PLUS
5432 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5433 return false;
5434
5435 return true;
5436}
5437
3520f7cc
JG
5438/* Return true if X holds either a quarter-precision or
5439 floating-point +0.0 constant. */
5440static bool
5441aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5442{
5443 if (!CONST_DOUBLE_P (x))
5444 return false;
5445
5446 /* TODO: We could handle moving 0.0 to a TFmode register,
5447 but first we would like to refactor the movtf_aarch64
5448 to be more amicable to split moves properly and
5449 correctly gate on TARGET_SIMD. For now - reject all
5450 constants which are not to SFmode or DFmode registers. */
5451 if (!(mode == SFmode || mode == DFmode))
5452 return false;
5453
5454 if (aarch64_float_const_zero_rtx_p (x))
5455 return true;
5456 return aarch64_float_const_representable_p (x);
5457}
5458
43e9d192
IB
5459static bool
5460aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5461{
5462 /* Do not allow vector struct mode constants. We could support
5463 0 and -1 easily, but they need support in aarch64-simd.md. */
5464 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5465 return false;
5466
5467 /* This could probably go away because
5468 we now decompose CONST_INTs according to expand_mov_immediate. */
5469 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5470 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5471 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5472 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5473
5474 if (GET_CODE (x) == HIGH
5475 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5476 return true;
5477
5478 return aarch64_constant_address_p (x);
5479}
5480
a5bc806c 5481rtx
43e9d192
IB
5482aarch64_load_tp (rtx target)
5483{
5484 if (!target
5485 || GET_MODE (target) != Pmode
5486 || !register_operand (target, Pmode))
5487 target = gen_reg_rtx (Pmode);
5488
5489 /* Can return in any reg. */
5490 emit_insn (gen_aarch64_load_tp_hard (target));
5491 return target;
5492}
5493
43e9d192
IB
5494/* On AAPCS systems, this is the "struct __va_list". */
5495static GTY(()) tree va_list_type;
5496
5497/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5498 Return the type to use as __builtin_va_list.
5499
5500 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5501
5502 struct __va_list
5503 {
5504 void *__stack;
5505 void *__gr_top;
5506 void *__vr_top;
5507 int __gr_offs;
5508 int __vr_offs;
5509 }; */
5510
5511static tree
5512aarch64_build_builtin_va_list (void)
5513{
5514 tree va_list_name;
5515 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5516
5517 /* Create the type. */
5518 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5519 /* Give it the required name. */
5520 va_list_name = build_decl (BUILTINS_LOCATION,
5521 TYPE_DECL,
5522 get_identifier ("__va_list"),
5523 va_list_type);
5524 DECL_ARTIFICIAL (va_list_name) = 1;
5525 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5526 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5527
5528 /* Create the fields. */
5529 f_stack = build_decl (BUILTINS_LOCATION,
5530 FIELD_DECL, get_identifier ("__stack"),
5531 ptr_type_node);
5532 f_grtop = build_decl (BUILTINS_LOCATION,
5533 FIELD_DECL, get_identifier ("__gr_top"),
5534 ptr_type_node);
5535 f_vrtop = build_decl (BUILTINS_LOCATION,
5536 FIELD_DECL, get_identifier ("__vr_top"),
5537 ptr_type_node);
5538 f_groff = build_decl (BUILTINS_LOCATION,
5539 FIELD_DECL, get_identifier ("__gr_offs"),
5540 integer_type_node);
5541 f_vroff = build_decl (BUILTINS_LOCATION,
5542 FIELD_DECL, get_identifier ("__vr_offs"),
5543 integer_type_node);
5544
5545 DECL_ARTIFICIAL (f_stack) = 1;
5546 DECL_ARTIFICIAL (f_grtop) = 1;
5547 DECL_ARTIFICIAL (f_vrtop) = 1;
5548 DECL_ARTIFICIAL (f_groff) = 1;
5549 DECL_ARTIFICIAL (f_vroff) = 1;
5550
5551 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5552 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5553 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5554 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5555 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5556
5557 TYPE_FIELDS (va_list_type) = f_stack;
5558 DECL_CHAIN (f_stack) = f_grtop;
5559 DECL_CHAIN (f_grtop) = f_vrtop;
5560 DECL_CHAIN (f_vrtop) = f_groff;
5561 DECL_CHAIN (f_groff) = f_vroff;
5562
5563 /* Compute its layout. */
5564 layout_type (va_list_type);
5565
5566 return va_list_type;
5567}
5568
5569/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5570static void
5571aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5572{
5573 const CUMULATIVE_ARGS *cum;
5574 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5575 tree stack, grtop, vrtop, groff, vroff;
5576 tree t;
5577 int gr_save_area_size;
5578 int vr_save_area_size;
5579 int vr_offset;
5580
5581 cum = &crtl->args.info;
5582 gr_save_area_size
5583 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5584 vr_save_area_size
5585 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5586
5587 if (TARGET_GENERAL_REGS_ONLY)
5588 {
5589 if (cum->aapcs_nvrn > 0)
5590 sorry ("%qs and floating point or vector arguments",
5591 "-mgeneral-regs-only");
5592 vr_save_area_size = 0;
5593 }
5594
5595 f_stack = TYPE_FIELDS (va_list_type_node);
5596 f_grtop = DECL_CHAIN (f_stack);
5597 f_vrtop = DECL_CHAIN (f_grtop);
5598 f_groff = DECL_CHAIN (f_vrtop);
5599 f_vroff = DECL_CHAIN (f_groff);
5600
5601 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5602 NULL_TREE);
5603 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5604 NULL_TREE);
5605 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5606 NULL_TREE);
5607 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5608 NULL_TREE);
5609 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5610 NULL_TREE);
5611
5612 /* Emit code to initialize STACK, which points to the next varargs stack
5613 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5614 by named arguments. STACK is 8-byte aligned. */
5615 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5616 if (cum->aapcs_stack_size > 0)
5617 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5618 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5619 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5620
5621 /* Emit code to initialize GRTOP, the top of the GR save area.
5622 virtual_incoming_args_rtx should have been 16 byte aligned. */
5623 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5624 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5626
5627 /* Emit code to initialize VRTOP, the top of the VR save area.
5628 This address is gr_save_area_bytes below GRTOP, rounded
5629 down to the next 16-byte boundary. */
5630 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5631 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5632 STACK_BOUNDARY / BITS_PER_UNIT);
5633
5634 if (vr_offset)
5635 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5636 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5637 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5638
5639 /* Emit code to initialize GROFF, the offset from GRTOP of the
5640 next GPR argument. */
5641 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5642 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5643 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5644
5645 /* Likewise emit code to initialize VROFF, the offset from FTOP
5646 of the next VR argument. */
5647 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5648 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5649 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5650}
5651
5652/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5653
5654static tree
5655aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5656 gimple_seq *post_p ATTRIBUTE_UNUSED)
5657{
5658 tree addr;
5659 bool indirect_p;
5660 bool is_ha; /* is HFA or HVA. */
5661 bool dw_align; /* double-word align. */
5662 enum machine_mode ag_mode = VOIDmode;
5663 int nregs;
5664 enum machine_mode mode;
5665
5666 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5667 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5668 HOST_WIDE_INT size, rsize, adjust, align;
5669 tree t, u, cond1, cond2;
5670
5671 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5672 if (indirect_p)
5673 type = build_pointer_type (type);
5674
5675 mode = TYPE_MODE (type);
5676
5677 f_stack = TYPE_FIELDS (va_list_type_node);
5678 f_grtop = DECL_CHAIN (f_stack);
5679 f_vrtop = DECL_CHAIN (f_grtop);
5680 f_groff = DECL_CHAIN (f_vrtop);
5681 f_vroff = DECL_CHAIN (f_groff);
5682
5683 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5684 f_stack, NULL_TREE);
5685 size = int_size_in_bytes (type);
5686 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5687
5688 dw_align = false;
5689 adjust = 0;
5690 if (aarch64_vfp_is_call_or_return_candidate (mode,
5691 type,
5692 &ag_mode,
5693 &nregs,
5694 &is_ha))
5695 {
5696 /* TYPE passed in fp/simd registers. */
5697 if (TARGET_GENERAL_REGS_ONLY)
5698 sorry ("%qs and floating point or vector arguments",
5699 "-mgeneral-regs-only");
5700
5701 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5702 unshare_expr (valist), f_vrtop, NULL_TREE);
5703 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5704 unshare_expr (valist), f_vroff, NULL_TREE);
5705
5706 rsize = nregs * UNITS_PER_VREG;
5707
5708 if (is_ha)
5709 {
5710 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5711 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5712 }
5713 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5714 && size < UNITS_PER_VREG)
5715 {
5716 adjust = UNITS_PER_VREG - size;
5717 }
5718 }
5719 else
5720 {
5721 /* TYPE passed in general registers. */
5722 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5723 unshare_expr (valist), f_grtop, NULL_TREE);
5724 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5725 unshare_expr (valist), f_groff, NULL_TREE);
5726 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5727 nregs = rsize / UNITS_PER_WORD;
5728
5729 if (align > 8)
5730 dw_align = true;
5731
5732 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5733 && size < UNITS_PER_WORD)
5734 {
5735 adjust = UNITS_PER_WORD - size;
5736 }
5737 }
5738
5739 /* Get a local temporary for the field value. */
5740 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5741
5742 /* Emit code to branch if off >= 0. */
5743 t = build2 (GE_EXPR, boolean_type_node, off,
5744 build_int_cst (TREE_TYPE (off), 0));
5745 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5746
5747 if (dw_align)
5748 {
5749 /* Emit: offs = (offs + 15) & -16. */
5750 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5751 build_int_cst (TREE_TYPE (off), 15));
5752 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5753 build_int_cst (TREE_TYPE (off), -16));
5754 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5755 }
5756 else
5757 roundup = NULL;
5758
5759 /* Update ap.__[g|v]r_offs */
5760 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5761 build_int_cst (TREE_TYPE (off), rsize));
5762 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5763
5764 /* String up. */
5765 if (roundup)
5766 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5767
5768 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5769 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5770 build_int_cst (TREE_TYPE (f_off), 0));
5771 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5772
5773 /* String up: make sure the assignment happens before the use. */
5774 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5775 COND_EXPR_ELSE (cond1) = t;
5776
5777 /* Prepare the trees handling the argument that is passed on the stack;
5778 the top level node will store in ON_STACK. */
5779 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5780 if (align > 8)
5781 {
5782 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5783 t = fold_convert (intDI_type_node, arg);
5784 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5785 build_int_cst (TREE_TYPE (t), 15));
5786 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5787 build_int_cst (TREE_TYPE (t), -16));
5788 t = fold_convert (TREE_TYPE (arg), t);
5789 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5790 }
5791 else
5792 roundup = NULL;
5793 /* Advance ap.__stack */
5794 t = fold_convert (intDI_type_node, arg);
5795 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5796 build_int_cst (TREE_TYPE (t), size + 7));
5797 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5798 build_int_cst (TREE_TYPE (t), -8));
5799 t = fold_convert (TREE_TYPE (arg), t);
5800 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5801 /* String up roundup and advance. */
5802 if (roundup)
5803 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5804 /* String up with arg */
5805 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5806 /* Big-endianness related address adjustment. */
5807 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5808 && size < UNITS_PER_WORD)
5809 {
5810 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5811 size_int (UNITS_PER_WORD - size));
5812 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5813 }
5814
5815 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5816 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5817
5818 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5819 t = off;
5820 if (adjust)
5821 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5822 build_int_cst (TREE_TYPE (off), adjust));
5823
5824 t = fold_convert (sizetype, t);
5825 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5826
5827 if (is_ha)
5828 {
5829 /* type ha; // treat as "struct {ftype field[n];}"
5830 ... [computing offs]
5831 for (i = 0; i <nregs; ++i, offs += 16)
5832 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5833 return ha; */
5834 int i;
5835 tree tmp_ha, field_t, field_ptr_t;
5836
5837 /* Declare a local variable. */
5838 tmp_ha = create_tmp_var_raw (type, "ha");
5839 gimple_add_tmp_var (tmp_ha);
5840
5841 /* Establish the base type. */
5842 switch (ag_mode)
5843 {
5844 case SFmode:
5845 field_t = float_type_node;
5846 field_ptr_t = float_ptr_type_node;
5847 break;
5848 case DFmode:
5849 field_t = double_type_node;
5850 field_ptr_t = double_ptr_type_node;
5851 break;
5852 case TFmode:
5853 field_t = long_double_type_node;
5854 field_ptr_t = long_double_ptr_type_node;
5855 break;
5856/* The half precision and quad precision are not fully supported yet. Enable
5857 the following code after the support is complete. Need to find the correct
5858 type node for __fp16 *. */
5859#if 0
5860 case HFmode:
5861 field_t = float_type_node;
5862 field_ptr_t = float_ptr_type_node;
5863 break;
5864#endif
5865 case V2SImode:
5866 case V4SImode:
5867 {
5868 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5869 field_t = build_vector_type_for_mode (innertype, ag_mode);
5870 field_ptr_t = build_pointer_type (field_t);
5871 }
5872 break;
5873 default:
5874 gcc_assert (0);
5875 }
5876
5877 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5878 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5879 addr = t;
5880 t = fold_convert (field_ptr_t, addr);
5881 t = build2 (MODIFY_EXPR, field_t,
5882 build1 (INDIRECT_REF, field_t, tmp_ha),
5883 build1 (INDIRECT_REF, field_t, t));
5884
5885 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5886 for (i = 1; i < nregs; ++i)
5887 {
5888 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5889 u = fold_convert (field_ptr_t, addr);
5890 u = build2 (MODIFY_EXPR, field_t,
5891 build2 (MEM_REF, field_t, tmp_ha,
5892 build_int_cst (field_ptr_t,
5893 (i *
5894 int_size_in_bytes (field_t)))),
5895 build1 (INDIRECT_REF, field_t, u));
5896 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5897 }
5898
5899 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5900 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5901 }
5902
5903 COND_EXPR_ELSE (cond2) = t;
5904 addr = fold_convert (build_pointer_type (type), cond1);
5905 addr = build_va_arg_indirect_ref (addr);
5906
5907 if (indirect_p)
5908 addr = build_va_arg_indirect_ref (addr);
5909
5910 return addr;
5911}
5912
5913/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5914
5915static void
5916aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5917 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5918 int no_rtl)
5919{
5920 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5921 CUMULATIVE_ARGS local_cum;
5922 int gr_saved, vr_saved;
5923
5924 /* The caller has advanced CUM up to, but not beyond, the last named
5925 argument. Advance a local copy of CUM past the last "real" named
5926 argument, to find out how many registers are left over. */
5927 local_cum = *cum;
5928 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5929
5930 /* Found out how many registers we need to save. */
5931 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5932 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5933
5934 if (TARGET_GENERAL_REGS_ONLY)
5935 {
5936 if (local_cum.aapcs_nvrn > 0)
5937 sorry ("%qs and floating point or vector arguments",
5938 "-mgeneral-regs-only");
5939 vr_saved = 0;
5940 }
5941
5942 if (!no_rtl)
5943 {
5944 if (gr_saved > 0)
5945 {
5946 rtx ptr, mem;
5947
5948 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5949 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5950 - gr_saved * UNITS_PER_WORD);
5951 mem = gen_frame_mem (BLKmode, ptr);
5952 set_mem_alias_set (mem, get_varargs_alias_set ());
5953
5954 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5955 mem, gr_saved);
5956 }
5957 if (vr_saved > 0)
5958 {
5959 /* We can't use move_block_from_reg, because it will use
5960 the wrong mode, storing D regs only. */
5961 enum machine_mode mode = TImode;
5962 int off, i;
5963
5964 /* Set OFF to the offset from virtual_incoming_args_rtx of
5965 the first vector register. The VR save area lies below
5966 the GR one, and is aligned to 16 bytes. */
5967 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5968 STACK_BOUNDARY / BITS_PER_UNIT);
5969 off -= vr_saved * UNITS_PER_VREG;
5970
5971 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5972 {
5973 rtx ptr, mem;
5974
5975 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5976 mem = gen_frame_mem (mode, ptr);
5977 set_mem_alias_set (mem, get_varargs_alias_set ());
5978 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5979 off += UNITS_PER_VREG;
5980 }
5981 }
5982 }
5983
5984 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5985 any complication of having crtl->args.pretend_args_size changed. */
5986 cfun->machine->saved_varargs_size
5987 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5988 STACK_BOUNDARY / BITS_PER_UNIT)
5989 + vr_saved * UNITS_PER_VREG);
5990}
5991
5992static void
5993aarch64_conditional_register_usage (void)
5994{
5995 int i;
5996 if (!TARGET_FLOAT)
5997 {
5998 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5999 {
6000 fixed_regs[i] = 1;
6001 call_used_regs[i] = 1;
6002 }
6003 }
6004}
6005
6006/* Walk down the type tree of TYPE counting consecutive base elements.
6007 If *MODEP is VOIDmode, then set it to the first valid floating point
6008 type. If a non-floating point type is found, or if a floating point
6009 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6010 otherwise return the count in the sub-tree. */
6011static int
6012aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6013{
6014 enum machine_mode mode;
6015 HOST_WIDE_INT size;
6016
6017 switch (TREE_CODE (type))
6018 {
6019 case REAL_TYPE:
6020 mode = TYPE_MODE (type);
6021 if (mode != DFmode && mode != SFmode && mode != TFmode)
6022 return -1;
6023
6024 if (*modep == VOIDmode)
6025 *modep = mode;
6026
6027 if (*modep == mode)
6028 return 1;
6029
6030 break;
6031
6032 case COMPLEX_TYPE:
6033 mode = TYPE_MODE (TREE_TYPE (type));
6034 if (mode != DFmode && mode != SFmode && mode != TFmode)
6035 return -1;
6036
6037 if (*modep == VOIDmode)
6038 *modep = mode;
6039
6040 if (*modep == mode)
6041 return 2;
6042
6043 break;
6044
6045 case VECTOR_TYPE:
6046 /* Use V2SImode and V4SImode as representatives of all 64-bit
6047 and 128-bit vector types. */
6048 size = int_size_in_bytes (type);
6049 switch (size)
6050 {
6051 case 8:
6052 mode = V2SImode;
6053 break;
6054 case 16:
6055 mode = V4SImode;
6056 break;
6057 default:
6058 return -1;
6059 }
6060
6061 if (*modep == VOIDmode)
6062 *modep = mode;
6063
6064 /* Vector modes are considered to be opaque: two vectors are
6065 equivalent for the purposes of being homogeneous aggregates
6066 if they are the same size. */
6067 if (*modep == mode)
6068 return 1;
6069
6070 break;
6071
6072 case ARRAY_TYPE:
6073 {
6074 int count;
6075 tree index = TYPE_DOMAIN (type);
6076
6077 /* Can't handle incomplete types. */
6078 if (!COMPLETE_TYPE_P (type))
6079 return -1;
6080
6081 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6082 if (count == -1
6083 || !index
6084 || !TYPE_MAX_VALUE (index)
cc269bb6 6085 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 6086 || !TYPE_MIN_VALUE (index)
cc269bb6 6087 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
6088 || count < 0)
6089 return -1;
6090
ae7e9ddd
RS
6091 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6092 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
6093
6094 /* There must be no padding. */
cc269bb6 6095 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6096 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6097 != count * GET_MODE_BITSIZE (*modep)))
6098 return -1;
6099
6100 return count;
6101 }
6102
6103 case RECORD_TYPE:
6104 {
6105 int count = 0;
6106 int sub_count;
6107 tree field;
6108
6109 /* Can't handle incomplete types. */
6110 if (!COMPLETE_TYPE_P (type))
6111 return -1;
6112
6113 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6114 {
6115 if (TREE_CODE (field) != FIELD_DECL)
6116 continue;
6117
6118 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6119 if (sub_count < 0)
6120 return -1;
6121 count += sub_count;
6122 }
6123
6124 /* There must be no padding. */
cc269bb6 6125 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6126 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6127 != count * GET_MODE_BITSIZE (*modep)))
6128 return -1;
6129
6130 return count;
6131 }
6132
6133 case UNION_TYPE:
6134 case QUAL_UNION_TYPE:
6135 {
6136 /* These aren't very interesting except in a degenerate case. */
6137 int count = 0;
6138 int sub_count;
6139 tree field;
6140
6141 /* Can't handle incomplete types. */
6142 if (!COMPLETE_TYPE_P (type))
6143 return -1;
6144
6145 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6146 {
6147 if (TREE_CODE (field) != FIELD_DECL)
6148 continue;
6149
6150 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6151 if (sub_count < 0)
6152 return -1;
6153 count = count > sub_count ? count : sub_count;
6154 }
6155
6156 /* There must be no padding. */
cc269bb6 6157 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6158 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6159 != count * GET_MODE_BITSIZE (*modep)))
6160 return -1;
6161
6162 return count;
6163 }
6164
6165 default:
6166 break;
6167 }
6168
6169 return -1;
6170}
6171
38e8f663
YR
6172/* Return true if we use LRA instead of reload pass. */
6173static bool
6174aarch64_lra_p (void)
6175{
6176 return aarch64_lra_flag;
6177}
6178
43e9d192
IB
6179/* Return TRUE if the type, as described by TYPE and MODE, is a composite
6180 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6181 array types. The C99 floating-point complex types are also considered
6182 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6183 types, which are GCC extensions and out of the scope of AAPCS64, are
6184 treated as composite types here as well.
6185
6186 Note that MODE itself is not sufficient in determining whether a type
6187 is such a composite type or not. This is because
6188 stor-layout.c:compute_record_mode may have already changed the MODE
6189 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6190 structure with only one field may have its MODE set to the mode of the
6191 field. Also an integer mode whose size matches the size of the
6192 RECORD_TYPE type may be used to substitute the original mode
6193 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6194 solely relied on. */
6195
6196static bool
6197aarch64_composite_type_p (const_tree type,
6198 enum machine_mode mode)
6199{
6200 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6201 return true;
6202
6203 if (mode == BLKmode
6204 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6205 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6206 return true;
6207
6208 return false;
6209}
6210
6211/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6212 type as described in AAPCS64 \S 4.1.2.
6213
6214 See the comment above aarch64_composite_type_p for the notes on MODE. */
6215
6216static bool
6217aarch64_short_vector_p (const_tree type,
6218 enum machine_mode mode)
6219{
6220 HOST_WIDE_INT size = -1;
6221
6222 if (type && TREE_CODE (type) == VECTOR_TYPE)
6223 size = int_size_in_bytes (type);
6224 else if (!aarch64_composite_type_p (type, mode)
6225 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6226 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6227 size = GET_MODE_SIZE (mode);
6228
6229 return (size == 8 || size == 16) ? true : false;
6230}
6231
6232/* Return TRUE if an argument, whose type is described by TYPE and MODE,
6233 shall be passed or returned in simd/fp register(s) (providing these
6234 parameter passing registers are available).
6235
6236 Upon successful return, *COUNT returns the number of needed registers,
6237 *BASE_MODE returns the mode of the individual register and when IS_HAF
6238 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6239 floating-point aggregate or a homogeneous short-vector aggregate. */
6240
6241static bool
6242aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6243 const_tree type,
6244 enum machine_mode *base_mode,
6245 int *count,
6246 bool *is_ha)
6247{
6248 enum machine_mode new_mode = VOIDmode;
6249 bool composite_p = aarch64_composite_type_p (type, mode);
6250
6251 if (is_ha != NULL) *is_ha = false;
6252
6253 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6254 || aarch64_short_vector_p (type, mode))
6255 {
6256 *count = 1;
6257 new_mode = mode;
6258 }
6259 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6260 {
6261 if (is_ha != NULL) *is_ha = true;
6262 *count = 2;
6263 new_mode = GET_MODE_INNER (mode);
6264 }
6265 else if (type && composite_p)
6266 {
6267 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6268
6269 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6270 {
6271 if (is_ha != NULL) *is_ha = true;
6272 *count = ag_count;
6273 }
6274 else
6275 return false;
6276 }
6277 else
6278 return false;
6279
6280 *base_mode = new_mode;
6281 return true;
6282}
6283
6284/* Implement TARGET_STRUCT_VALUE_RTX. */
6285
6286static rtx
6287aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6288 int incoming ATTRIBUTE_UNUSED)
6289{
6290 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6291}
6292
6293/* Implements target hook vector_mode_supported_p. */
6294static bool
6295aarch64_vector_mode_supported_p (enum machine_mode mode)
6296{
6297 if (TARGET_SIMD
6298 && (mode == V4SImode || mode == V8HImode
6299 || mode == V16QImode || mode == V2DImode
6300 || mode == V2SImode || mode == V4HImode
6301 || mode == V8QImode || mode == V2SFmode
6302 || mode == V4SFmode || mode == V2DFmode))
6303 return true;
6304
6305 return false;
6306}
6307
b7342d25
IB
6308/* Return appropriate SIMD container
6309 for MODE within a vector of WIDTH bits. */
43e9d192 6310static enum machine_mode
b7342d25 6311aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6312{
b7342d25 6313 gcc_assert (width == 64 || width == 128);
43e9d192 6314 if (TARGET_SIMD)
b7342d25
IB
6315 {
6316 if (width == 128)
6317 switch (mode)
6318 {
6319 case DFmode:
6320 return V2DFmode;
6321 case SFmode:
6322 return V4SFmode;
6323 case SImode:
6324 return V4SImode;
6325 case HImode:
6326 return V8HImode;
6327 case QImode:
6328 return V16QImode;
6329 case DImode:
6330 return V2DImode;
6331 default:
6332 break;
6333 }
6334 else
6335 switch (mode)
6336 {
6337 case SFmode:
6338 return V2SFmode;
6339 case SImode:
6340 return V2SImode;
6341 case HImode:
6342 return V4HImode;
6343 case QImode:
6344 return V8QImode;
6345 default:
6346 break;
6347 }
6348 }
43e9d192
IB
6349 return word_mode;
6350}
6351
b7342d25
IB
6352/* Return 128-bit container as the preferred SIMD mode for MODE. */
6353static enum machine_mode
6354aarch64_preferred_simd_mode (enum machine_mode mode)
6355{
6356 return aarch64_simd_container_mode (mode, 128);
6357}
6358
3b357264
JG
6359/* Return the bitmask of possible vector sizes for the vectorizer
6360 to iterate over. */
6361static unsigned int
6362aarch64_autovectorize_vector_sizes (void)
6363{
6364 return (16 | 8);
6365}
6366
c6fc9e43
YZ
6367/* A table to help perform AArch64-specific name mangling for AdvSIMD
6368 vector types in order to conform to the AAPCS64 (see "Procedure
6369 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6370 qualify for emission with the mangled names defined in that document,
6371 a vector type must not only be of the correct mode but also be
6372 composed of AdvSIMD vector element types (e.g.
6373 _builtin_aarch64_simd_qi); these types are registered by
6374 aarch64_init_simd_builtins (). In other words, vector types defined
6375 in other ways e.g. via vector_size attribute will get default
6376 mangled names. */
6377typedef struct
6378{
6379 enum machine_mode mode;
6380 const char *element_type_name;
6381 const char *mangled_name;
6382} aarch64_simd_mangle_map_entry;
6383
6384static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6385 /* 64-bit containerized types. */
6386 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6387 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6388 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6389 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6390 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6391 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6392 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6393 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6394 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6395 /* 128-bit containerized types. */
6396 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6397 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6398 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6399 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6400 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6401 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6402 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6403 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6404 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6405 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6406 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6407 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 6408 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
6409 { VOIDmode, NULL, NULL }
6410};
6411
ac2b960f
YZ
6412/* Implement TARGET_MANGLE_TYPE. */
6413
6f549691 6414static const char *
ac2b960f
YZ
6415aarch64_mangle_type (const_tree type)
6416{
6417 /* The AArch64 ABI documents say that "__va_list" has to be
6418 managled as if it is in the "std" namespace. */
6419 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6420 return "St9__va_list";
6421
c6fc9e43
YZ
6422 /* Check the mode of the vector type, and the name of the vector
6423 element type, against the table. */
6424 if (TREE_CODE (type) == VECTOR_TYPE)
6425 {
6426 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6427
6428 while (pos->mode != VOIDmode)
6429 {
6430 tree elt_type = TREE_TYPE (type);
6431
6432 if (pos->mode == TYPE_MODE (type)
6433 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6434 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6435 pos->element_type_name))
6436 return pos->mangled_name;
6437
6438 pos++;
6439 }
6440 }
6441
ac2b960f
YZ
6442 /* Use the default mangling. */
6443 return NULL;
6444}
6445
43e9d192 6446/* Return the equivalent letter for size. */
81c2dfb9 6447static char
43e9d192
IB
6448sizetochar (int size)
6449{
6450 switch (size)
6451 {
6452 case 64: return 'd';
6453 case 32: return 's';
6454 case 16: return 'h';
6455 case 8 : return 'b';
6456 default: gcc_unreachable ();
6457 }
6458}
6459
3520f7cc
JG
6460/* Return true iff x is a uniform vector of floating-point
6461 constants, and the constant can be represented in
6462 quarter-precision form. Note, as aarch64_float_const_representable
6463 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6464static bool
6465aarch64_vect_float_const_representable_p (rtx x)
6466{
6467 int i = 0;
6468 REAL_VALUE_TYPE r0, ri;
6469 rtx x0, xi;
6470
6471 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6472 return false;
6473
6474 x0 = CONST_VECTOR_ELT (x, 0);
6475 if (!CONST_DOUBLE_P (x0))
6476 return false;
6477
6478 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6479
6480 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6481 {
6482 xi = CONST_VECTOR_ELT (x, i);
6483 if (!CONST_DOUBLE_P (xi))
6484 return false;
6485
6486 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6487 if (!REAL_VALUES_EQUAL (r0, ri))
6488 return false;
6489 }
6490
6491 return aarch64_float_const_representable_p (x0);
6492}
6493
d8edd899 6494/* Return true for valid and false for invalid. */
3ea63f60 6495bool
48063b9d
IB
6496aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6497 struct simd_immediate_info *info)
43e9d192
IB
6498{
6499#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6500 matches = 1; \
6501 for (i = 0; i < idx; i += (STRIDE)) \
6502 if (!(TEST)) \
6503 matches = 0; \
6504 if (matches) \
6505 { \
6506 immtype = (CLASS); \
6507 elsize = (ELSIZE); \
43e9d192
IB
6508 eshift = (SHIFT); \
6509 emvn = (NEG); \
6510 break; \
6511 }
6512
6513 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6514 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6515 unsigned char bytes[16];
43e9d192
IB
6516 int immtype = -1, matches;
6517 unsigned int invmask = inverse ? 0xff : 0;
6518 int eshift, emvn;
6519
43e9d192 6520 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6521 {
81c2dfb9
IB
6522 if (! (aarch64_simd_imm_zero_p (op, mode)
6523 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6524 return false;
3520f7cc 6525
48063b9d
IB
6526 if (info)
6527 {
6528 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6529 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6530 info->mvn = false;
6531 info->shift = 0;
6532 }
3520f7cc 6533
d8edd899 6534 return true;
3520f7cc 6535 }
43e9d192
IB
6536
6537 /* Splat vector constant out into a byte vector. */
6538 for (i = 0; i < n_elts; i++)
6539 {
4b1e108c
AL
6540 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
6541 it must be laid out in the vector register in reverse order. */
6542 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
6543 unsigned HOST_WIDE_INT elpart;
6544 unsigned int part, parts;
6545
6546 if (GET_CODE (el) == CONST_INT)
6547 {
6548 elpart = INTVAL (el);
6549 parts = 1;
6550 }
6551 else if (GET_CODE (el) == CONST_DOUBLE)
6552 {
6553 elpart = CONST_DOUBLE_LOW (el);
6554 parts = 2;
6555 }
6556 else
6557 gcc_unreachable ();
6558
6559 for (part = 0; part < parts; part++)
6560 {
6561 unsigned int byte;
6562 for (byte = 0; byte < innersize; byte++)
6563 {
6564 bytes[idx++] = (elpart & 0xff) ^ invmask;
6565 elpart >>= BITS_PER_UNIT;
6566 }
6567 if (GET_CODE (el) == CONST_DOUBLE)
6568 elpart = CONST_DOUBLE_HIGH (el);
6569 }
6570 }
6571
6572 /* Sanity check. */
6573 gcc_assert (idx == GET_MODE_SIZE (mode));
6574
6575 do
6576 {
6577 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6578 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6579
6580 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6581 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6582
6583 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6584 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6585
6586 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6587 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6588
6589 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6590
6591 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6592
6593 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6594 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6595
6596 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6597 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6598
6599 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6600 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6601
6602 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6603 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6604
6605 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6606
6607 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6608
6609 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 6610 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
6611
6612 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 6613 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
6614
6615 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 6616 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
6617
6618 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 6619 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
6620
6621 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6622
6623 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6624 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6625 }
6626 while (0);
6627
e4f0f84d 6628 if (immtype == -1)
d8edd899 6629 return false;
43e9d192 6630
48063b9d 6631 if (info)
43e9d192 6632 {
48063b9d 6633 info->element_width = elsize;
48063b9d
IB
6634 info->mvn = emvn != 0;
6635 info->shift = eshift;
6636
43e9d192
IB
6637 unsigned HOST_WIDE_INT imm = 0;
6638
e4f0f84d
TB
6639 if (immtype >= 12 && immtype <= 15)
6640 info->msl = true;
6641
43e9d192
IB
6642 /* Un-invert bytes of recognized vector, if necessary. */
6643 if (invmask != 0)
6644 for (i = 0; i < idx; i++)
6645 bytes[i] ^= invmask;
6646
6647 if (immtype == 17)
6648 {
6649 /* FIXME: Broken on 32-bit H_W_I hosts. */
6650 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6651
6652 for (i = 0; i < 8; i++)
6653 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6654 << (i * BITS_PER_UNIT);
6655
43e9d192 6656
48063b9d
IB
6657 info->value = GEN_INT (imm);
6658 }
6659 else
6660 {
6661 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6662 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6663
6664 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6665 generic constants. */
6666 if (info->mvn)
43e9d192 6667 imm = ~imm;
48063b9d
IB
6668 imm = (imm >> info->shift) & 0xff;
6669 info->value = GEN_INT (imm);
6670 }
43e9d192
IB
6671 }
6672
48063b9d 6673 return true;
43e9d192
IB
6674#undef CHECK
6675}
6676
43e9d192
IB
6677static bool
6678aarch64_const_vec_all_same_int_p (rtx x,
6679 HOST_WIDE_INT minval,
6680 HOST_WIDE_INT maxval)
6681{
6682 HOST_WIDE_INT firstval;
6683 int count, i;
6684
6685 if (GET_CODE (x) != CONST_VECTOR
6686 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6687 return false;
6688
6689 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6690 if (firstval < minval || firstval > maxval)
6691 return false;
6692
6693 count = CONST_VECTOR_NUNITS (x);
6694 for (i = 1; i < count; i++)
6695 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6696 return false;
6697
6698 return true;
6699}
6700
6701/* Check of immediate shift constants are within range. */
6702bool
6703aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6704{
6705 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6706 if (left)
6707 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6708 else
6709 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6710}
6711
3520f7cc
JG
6712/* Return true if X is a uniform vector where all elements
6713 are either the floating-point constant 0.0 or the
6714 integer constant 0. */
43e9d192
IB
6715bool
6716aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6717{
3520f7cc 6718 return x == CONST0_RTX (mode);
43e9d192
IB
6719}
6720
6721bool
6722aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6723{
6724 HOST_WIDE_INT imm = INTVAL (x);
6725 int i;
6726
6727 for (i = 0; i < 8; i++)
6728 {
6729 unsigned int byte = imm & 0xff;
6730 if (byte != 0xff && byte != 0)
6731 return false;
6732 imm >>= 8;
6733 }
6734
6735 return true;
6736}
6737
83f8c414
CSS
6738bool
6739aarch64_mov_operand_p (rtx x,
a5350ddc 6740 enum aarch64_symbol_context context,
83f8c414
CSS
6741 enum machine_mode mode)
6742{
83f8c414
CSS
6743 if (GET_CODE (x) == HIGH
6744 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6745 return true;
6746
6747 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6748 return true;
6749
6750 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6751 return true;
6752
a5350ddc
CSS
6753 return aarch64_classify_symbolic_expression (x, context)
6754 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6755}
6756
43e9d192
IB
6757/* Return a const_int vector of VAL. */
6758rtx
6759aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6760{
6761 int nunits = GET_MODE_NUNITS (mode);
6762 rtvec v = rtvec_alloc (nunits);
6763 int i;
6764
6765 for (i=0; i < nunits; i++)
6766 RTVEC_ELT (v, i) = GEN_INT (val);
6767
6768 return gen_rtx_CONST_VECTOR (mode, v);
6769}
6770
051d0e2f
SN
6771/* Check OP is a legal scalar immediate for the MOVI instruction. */
6772
6773bool
6774aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6775{
6776 enum machine_mode vmode;
6777
6778 gcc_assert (!VECTOR_MODE_P (mode));
6779 vmode = aarch64_preferred_simd_mode (mode);
6780 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6781 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6782}
6783
43e9d192
IB
6784/* Construct and return a PARALLEL RTX vector. */
6785rtx
6786aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6787{
6788 int nunits = GET_MODE_NUNITS (mode);
6789 rtvec v = rtvec_alloc (nunits / 2);
6790 int base = high ? nunits / 2 : 0;
6791 rtx t1;
6792 int i;
6793
6794 for (i=0; i < nunits / 2; i++)
6795 RTVEC_ELT (v, i) = GEN_INT (base + i);
6796
6797 t1 = gen_rtx_PARALLEL (mode, v);
6798 return t1;
6799}
6800
6801/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6802 HIGH (exclusive). */
6803void
6804aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6805{
6806 HOST_WIDE_INT lane;
6807 gcc_assert (GET_CODE (operand) == CONST_INT);
6808 lane = INTVAL (operand);
6809
6810 if (lane < low || lane >= high)
6811 error ("lane out of range");
6812}
6813
6814void
6815aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6816{
6817 gcc_assert (GET_CODE (operand) == CONST_INT);
6818 HOST_WIDE_INT lane = INTVAL (operand);
6819
6820 if (lane < low || lane >= high)
6821 error ("constant out of range");
6822}
6823
6824/* Emit code to reinterpret one AdvSIMD type as another,
6825 without altering bits. */
6826void
6827aarch64_simd_reinterpret (rtx dest, rtx src)
6828{
6829 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6830}
6831
6832/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6833 registers). */
6834void
6835aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6836 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6837 rtx op1)
6838{
6839 rtx mem = gen_rtx_MEM (mode, destaddr);
6840 rtx tmp1 = gen_reg_rtx (mode);
6841 rtx tmp2 = gen_reg_rtx (mode);
6842
6843 emit_insn (intfn (tmp1, op1, tmp2));
6844
6845 emit_move_insn (mem, tmp1);
6846 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6847 emit_move_insn (mem, tmp2);
6848}
6849
6850/* Return TRUE if OP is a valid vector addressing mode. */
6851bool
6852aarch64_simd_mem_operand_p (rtx op)
6853{
6854 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6855 || GET_CODE (XEXP (op, 0)) == REG);
6856}
6857
6858/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6859 not to early-clobber SRC registers in the process.
6860
6861 We assume that the operands described by SRC and DEST represent a
6862 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6863 number of components into which the copy has been decomposed. */
6864void
6865aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6866 rtx *src, unsigned int count)
6867{
6868 unsigned int i;
6869
6870 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6871 || REGNO (operands[0]) < REGNO (operands[1]))
6872 {
6873 for (i = 0; i < count; i++)
6874 {
6875 operands[2 * i] = dest[i];
6876 operands[2 * i + 1] = src[i];
6877 }
6878 }
6879 else
6880 {
6881 for (i = 0; i < count; i++)
6882 {
6883 operands[2 * i] = dest[count - i - 1];
6884 operands[2 * i + 1] = src[count - i - 1];
6885 }
6886 }
6887}
6888
6889/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6890 one of VSTRUCT modes: OI, CI or XI. */
6891int
6892aarch64_simd_attr_length_move (rtx insn)
6893{
43e9d192
IB
6894 enum machine_mode mode;
6895
6896 extract_insn_cached (insn);
6897
6898 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6899 {
6900 mode = GET_MODE (recog_data.operand[0]);
6901 switch (mode)
6902 {
6903 case OImode:
6904 return 8;
6905 case CImode:
6906 return 12;
6907 case XImode:
6908 return 16;
6909 default:
6910 gcc_unreachable ();
6911 }
6912 }
6913 return 4;
6914}
6915
db0253a4
TB
6916/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6917 alignment of a vector to 128 bits. */
6918static HOST_WIDE_INT
6919aarch64_simd_vector_alignment (const_tree type)
6920{
9439e9a1 6921 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
6922 return MIN (align, 128);
6923}
6924
6925/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6926static bool
6927aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6928{
6929 if (is_packed)
6930 return false;
6931
6932 /* We guarantee alignment for vectors up to 128-bits. */
6933 if (tree_int_cst_compare (TYPE_SIZE (type),
6934 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6935 return false;
6936
6937 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6938 return true;
6939}
6940
4369c11e
TB
6941/* If VALS is a vector constant that can be loaded into a register
6942 using DUP, generate instructions to do so and return an RTX to
6943 assign to the register. Otherwise return NULL_RTX. */
6944static rtx
6945aarch64_simd_dup_constant (rtx vals)
6946{
6947 enum machine_mode mode = GET_MODE (vals);
6948 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6949 int n_elts = GET_MODE_NUNITS (mode);
6950 bool all_same = true;
6951 rtx x;
6952 int i;
6953
6954 if (GET_CODE (vals) != CONST_VECTOR)
6955 return NULL_RTX;
6956
6957 for (i = 1; i < n_elts; ++i)
6958 {
6959 x = CONST_VECTOR_ELT (vals, i);
6960 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6961 all_same = false;
6962 }
6963
6964 if (!all_same)
6965 return NULL_RTX;
6966
6967 /* We can load this constant by using DUP and a constant in a
6968 single ARM register. This will be cheaper than a vector
6969 load. */
6970 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6971 return gen_rtx_VEC_DUPLICATE (mode, x);
6972}
6973
6974
6975/* Generate code to load VALS, which is a PARALLEL containing only
6976 constants (for vec_init) or CONST_VECTOR, efficiently into a
6977 register. Returns an RTX to copy into the register, or NULL_RTX
6978 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6979static rtx
4369c11e
TB
6980aarch64_simd_make_constant (rtx vals)
6981{
6982 enum machine_mode mode = GET_MODE (vals);
6983 rtx const_dup;
6984 rtx const_vec = NULL_RTX;
6985 int n_elts = GET_MODE_NUNITS (mode);
6986 int n_const = 0;
6987 int i;
6988
6989 if (GET_CODE (vals) == CONST_VECTOR)
6990 const_vec = vals;
6991 else if (GET_CODE (vals) == PARALLEL)
6992 {
6993 /* A CONST_VECTOR must contain only CONST_INTs and
6994 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6995 Only store valid constants in a CONST_VECTOR. */
6996 for (i = 0; i < n_elts; ++i)
6997 {
6998 rtx x = XVECEXP (vals, 0, i);
6999 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7000 n_const++;
7001 }
7002 if (n_const == n_elts)
7003 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7004 }
7005 else
7006 gcc_unreachable ();
7007
7008 if (const_vec != NULL_RTX
48063b9d 7009 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
7010 /* Load using MOVI/MVNI. */
7011 return const_vec;
7012 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7013 /* Loaded using DUP. */
7014 return const_dup;
7015 else if (const_vec != NULL_RTX)
7016 /* Load from constant pool. We can not take advantage of single-cycle
7017 LD1 because we need a PC-relative addressing mode. */
7018 return const_vec;
7019 else
7020 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7021 We can not construct an initializer. */
7022 return NULL_RTX;
7023}
7024
7025void
7026aarch64_expand_vector_init (rtx target, rtx vals)
7027{
7028 enum machine_mode mode = GET_MODE (target);
7029 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7030 int n_elts = GET_MODE_NUNITS (mode);
7031 int n_var = 0, one_var = -1;
7032 bool all_same = true;
7033 rtx x, mem;
7034 int i;
7035
7036 x = XVECEXP (vals, 0, 0);
7037 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7038 n_var = 1, one_var = 0;
7039
7040 for (i = 1; i < n_elts; ++i)
7041 {
7042 x = XVECEXP (vals, 0, i);
7043 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7044 ++n_var, one_var = i;
7045
7046 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7047 all_same = false;
7048 }
7049
7050 if (n_var == 0)
7051 {
7052 rtx constant = aarch64_simd_make_constant (vals);
7053 if (constant != NULL_RTX)
7054 {
7055 emit_move_insn (target, constant);
7056 return;
7057 }
7058 }
7059
7060 /* Splat a single non-constant element if we can. */
7061 if (all_same)
7062 {
7063 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7064 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7065 return;
7066 }
7067
7068 /* One field is non-constant. Load constant then overwrite varying
7069 field. This is more efficient than using the stack. */
7070 if (n_var == 1)
7071 {
7072 rtx copy = copy_rtx (vals);
7073 rtx index = GEN_INT (one_var);
7074 enum insn_code icode;
7075
7076 /* Load constant part of vector, substitute neighboring value for
7077 varying element. */
7078 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7079 aarch64_expand_vector_init (target, copy);
7080
7081 /* Insert variable. */
7082 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7083 icode = optab_handler (vec_set_optab, mode);
7084 gcc_assert (icode != CODE_FOR_nothing);
7085 emit_insn (GEN_FCN (icode) (target, x, index));
7086 return;
7087 }
7088
7089 /* Construct the vector in memory one field at a time
7090 and load the whole vector. */
7091 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7092 for (i = 0; i < n_elts; i++)
7093 emit_move_insn (adjust_address_nv (mem, inner_mode,
7094 i * GET_MODE_SIZE (inner_mode)),
7095 XVECEXP (vals, 0, i));
7096 emit_move_insn (target, mem);
7097
7098}
7099
43e9d192
IB
7100static unsigned HOST_WIDE_INT
7101aarch64_shift_truncation_mask (enum machine_mode mode)
7102{
7103 return
7104 (aarch64_vector_mode_supported_p (mode)
7105 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7106}
7107
7108#ifndef TLS_SECTION_ASM_FLAG
7109#define TLS_SECTION_ASM_FLAG 'T'
7110#endif
7111
7112void
7113aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7114 tree decl ATTRIBUTE_UNUSED)
7115{
7116 char flagchars[10], *f = flagchars;
7117
7118 /* If we have already declared this section, we can use an
7119 abbreviated form to switch back to it -- unless this section is
7120 part of a COMDAT groups, in which case GAS requires the full
7121 declaration every time. */
7122 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7123 && (flags & SECTION_DECLARED))
7124 {
7125 fprintf (asm_out_file, "\t.section\t%s\n", name);
7126 return;
7127 }
7128
7129 if (!(flags & SECTION_DEBUG))
7130 *f++ = 'a';
7131 if (flags & SECTION_WRITE)
7132 *f++ = 'w';
7133 if (flags & SECTION_CODE)
7134 *f++ = 'x';
7135 if (flags & SECTION_SMALL)
7136 *f++ = 's';
7137 if (flags & SECTION_MERGE)
7138 *f++ = 'M';
7139 if (flags & SECTION_STRINGS)
7140 *f++ = 'S';
7141 if (flags & SECTION_TLS)
7142 *f++ = TLS_SECTION_ASM_FLAG;
7143 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7144 *f++ = 'G';
7145 *f = '\0';
7146
7147 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7148
7149 if (!(flags & SECTION_NOTYPE))
7150 {
7151 const char *type;
7152 const char *format;
7153
7154 if (flags & SECTION_BSS)
7155 type = "nobits";
7156 else
7157 type = "progbits";
7158
7159#ifdef TYPE_OPERAND_FMT
7160 format = "," TYPE_OPERAND_FMT;
7161#else
7162 format = ",@%s";
7163#endif
7164
7165 fprintf (asm_out_file, format, type);
7166
7167 if (flags & SECTION_ENTSIZE)
7168 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7169 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7170 {
7171 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7172 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7173 else
7174 fprintf (asm_out_file, ",%s,comdat",
7175 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7176 }
7177 }
7178
7179 putc ('\n', asm_out_file);
7180}
7181
7182/* Select a format to encode pointers in exception handling data. */
7183int
7184aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7185{
7186 int type;
7187 switch (aarch64_cmodel)
7188 {
7189 case AARCH64_CMODEL_TINY:
7190 case AARCH64_CMODEL_TINY_PIC:
7191 case AARCH64_CMODEL_SMALL:
7192 case AARCH64_CMODEL_SMALL_PIC:
7193 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7194 for everything. */
7195 type = DW_EH_PE_sdata4;
7196 break;
7197 default:
7198 /* No assumptions here. 8-byte relocs required. */
7199 type = DW_EH_PE_sdata8;
7200 break;
7201 }
7202 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7203}
7204
0462169c
SN
7205/* Emit load exclusive. */
7206
7207static void
7208aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7209 rtx mem, rtx model_rtx)
7210{
7211 rtx (*gen) (rtx, rtx, rtx);
7212
7213 switch (mode)
7214 {
7215 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7216 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7217 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7218 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7219 default:
7220 gcc_unreachable ();
7221 }
7222
7223 emit_insn (gen (rval, mem, model_rtx));
7224}
7225
7226/* Emit store exclusive. */
7227
7228static void
7229aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7230 rtx rval, rtx mem, rtx model_rtx)
7231{
7232 rtx (*gen) (rtx, rtx, rtx, rtx);
7233
7234 switch (mode)
7235 {
7236 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7237 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7238 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7239 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7240 default:
7241 gcc_unreachable ();
7242 }
7243
7244 emit_insn (gen (bval, rval, mem, model_rtx));
7245}
7246
7247/* Mark the previous jump instruction as unlikely. */
7248
7249static void
7250aarch64_emit_unlikely_jump (rtx insn)
7251{
e5af9ddd 7252 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
7253
7254 insn = emit_jump_insn (insn);
e5af9ddd 7255 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
7256}
7257
7258/* Expand a compare and swap pattern. */
7259
7260void
7261aarch64_expand_compare_and_swap (rtx operands[])
7262{
7263 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7264 enum machine_mode mode, cmp_mode;
7265 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7266
7267 bval = operands[0];
7268 rval = operands[1];
7269 mem = operands[2];
7270 oldval = operands[3];
7271 newval = operands[4];
7272 is_weak = operands[5];
7273 mod_s = operands[6];
7274 mod_f = operands[7];
7275 mode = GET_MODE (mem);
7276 cmp_mode = mode;
7277
7278 /* Normally the succ memory model must be stronger than fail, but in the
7279 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7280 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7281
7282 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7283 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7284 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7285
7286 switch (mode)
7287 {
7288 case QImode:
7289 case HImode:
7290 /* For short modes, we're going to perform the comparison in SImode,
7291 so do the zero-extension now. */
7292 cmp_mode = SImode;
7293 rval = gen_reg_rtx (SImode);
7294 oldval = convert_modes (SImode, mode, oldval, true);
7295 /* Fall through. */
7296
7297 case SImode:
7298 case DImode:
7299 /* Force the value into a register if needed. */
7300 if (!aarch64_plus_operand (oldval, mode))
7301 oldval = force_reg (cmp_mode, oldval);
7302 break;
7303
7304 default:
7305 gcc_unreachable ();
7306 }
7307
7308 switch (mode)
7309 {
7310 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7311 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7312 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7313 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7314 default:
7315 gcc_unreachable ();
7316 }
7317
7318 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7319
7320 if (mode == QImode || mode == HImode)
7321 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7322
7323 x = gen_rtx_REG (CCmode, CC_REGNUM);
7324 x = gen_rtx_EQ (SImode, x, const0_rtx);
7325 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7326}
7327
7328/* Split a compare and swap pattern. */
7329
7330void
7331aarch64_split_compare_and_swap (rtx operands[])
7332{
7333 rtx rval, mem, oldval, newval, scratch;
7334 enum machine_mode mode;
0462169c
SN
7335 bool is_weak;
7336 rtx label1, label2, x, cond;
7337
7338 rval = operands[0];
7339 mem = operands[1];
7340 oldval = operands[2];
7341 newval = operands[3];
7342 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7343 scratch = operands[7];
7344 mode = GET_MODE (mem);
7345
7346 label1 = NULL_RTX;
7347 if (!is_weak)
7348 {
7349 label1 = gen_label_rtx ();
7350 emit_label (label1);
7351 }
7352 label2 = gen_label_rtx ();
7353
7354 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7355
7356 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7357 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7358 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7359 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7360 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7361
7362 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7363
7364 if (!is_weak)
7365 {
7366 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7367 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7368 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7369 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7370 }
7371 else
7372 {
7373 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7374 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7375 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7376 }
7377
7378 emit_label (label2);
7379}
7380
7381/* Split an atomic operation. */
7382
7383void
7384aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7385 rtx value, rtx model_rtx, rtx cond)
7386{
7387 enum machine_mode mode = GET_MODE (mem);
7388 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7389 rtx label, x;
7390
7391 label = gen_label_rtx ();
7392 emit_label (label);
7393
7394 if (new_out)
7395 new_out = gen_lowpart (wmode, new_out);
7396 if (old_out)
7397 old_out = gen_lowpart (wmode, old_out);
7398 else
7399 old_out = new_out;
7400 value = simplify_gen_subreg (wmode, value, mode, 0);
7401
7402 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7403
7404 switch (code)
7405 {
7406 case SET:
7407 new_out = value;
7408 break;
7409
7410 case NOT:
7411 x = gen_rtx_AND (wmode, old_out, value);
7412 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7413 x = gen_rtx_NOT (wmode, new_out);
7414 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7415 break;
7416
7417 case MINUS:
7418 if (CONST_INT_P (value))
7419 {
7420 value = GEN_INT (-INTVAL (value));
7421 code = PLUS;
7422 }
7423 /* Fall through. */
7424
7425 default:
7426 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7427 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7428 break;
7429 }
7430
7431 aarch64_emit_store_exclusive (mode, cond, mem,
7432 gen_lowpart (mode, new_out), model_rtx);
7433
7434 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7435 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7436 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7437 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7438}
7439
95ca411e
YZ
7440static void
7441aarch64_print_extension (void)
7442{
7443 const struct aarch64_option_extension *opt = NULL;
7444
7445 for (opt = all_extensions; opt->name != NULL; opt++)
7446 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7447 asm_fprintf (asm_out_file, "+%s", opt->name);
7448
7449 asm_fprintf (asm_out_file, "\n");
7450}
7451
43e9d192
IB
7452static void
7453aarch64_start_file (void)
7454{
7455 if (selected_arch)
95ca411e
YZ
7456 {
7457 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7458 aarch64_print_extension ();
7459 }
43e9d192 7460 else if (selected_cpu)
95ca411e 7461 {
682287fb
JG
7462 const char *truncated_name
7463 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7464 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
7465 aarch64_print_extension ();
7466 }
43e9d192
IB
7467 default_file_start();
7468}
7469
7470/* Target hook for c_mode_for_suffix. */
7471static enum machine_mode
7472aarch64_c_mode_for_suffix (char suffix)
7473{
7474 if (suffix == 'q')
7475 return TFmode;
7476
7477 return VOIDmode;
7478}
7479
3520f7cc
JG
7480/* We can only represent floating point constants which will fit in
7481 "quarter-precision" values. These values are characterised by
7482 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7483 by:
7484
7485 (-1)^s * (n/16) * 2^r
7486
7487 Where:
7488 's' is the sign bit.
7489 'n' is an integer in the range 16 <= n <= 31.
7490 'r' is an integer in the range -3 <= r <= 4. */
7491
7492/* Return true iff X can be represented by a quarter-precision
7493 floating point immediate operand X. Note, we cannot represent 0.0. */
7494bool
7495aarch64_float_const_representable_p (rtx x)
7496{
7497 /* This represents our current view of how many bits
7498 make up the mantissa. */
7499 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7500 int exponent;
3520f7cc
JG
7501 unsigned HOST_WIDE_INT mantissa, mask;
7502 HOST_WIDE_INT m1, m2;
7503 REAL_VALUE_TYPE r, m;
7504
7505 if (!CONST_DOUBLE_P (x))
7506 return false;
7507
7508 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7509
7510 /* We cannot represent infinities, NaNs or +/-zero. We won't
7511 know if we have +zero until we analyse the mantissa, but we
7512 can reject the other invalid values. */
7513 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7514 || REAL_VALUE_MINUS_ZERO (r))
7515 return false;
7516
ba96cdfb 7517 /* Extract exponent. */
3520f7cc
JG
7518 r = real_value_abs (&r);
7519 exponent = REAL_EXP (&r);
7520
7521 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7522 highest (sign) bit, with a fixed binary point at bit point_pos.
7523 m1 holds the low part of the mantissa, m2 the high part.
7524 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7525 bits for the mantissa, this can fail (low bits will be lost). */
7526 real_ldexp (&m, &r, point_pos - exponent);
7527 REAL_VALUE_TO_INT (&m1, &m2, m);
7528
7529 /* If the low part of the mantissa has bits set we cannot represent
7530 the value. */
7531 if (m1 != 0)
7532 return false;
7533 /* We have rejected the lower HOST_WIDE_INT, so update our
7534 understanding of how many bits lie in the mantissa and
7535 look only at the high HOST_WIDE_INT. */
7536 mantissa = m2;
7537 point_pos -= HOST_BITS_PER_WIDE_INT;
7538
7539 /* We can only represent values with a mantissa of the form 1.xxxx. */
7540 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7541 if ((mantissa & mask) != 0)
7542 return false;
7543
7544 /* Having filtered unrepresentable values, we may now remove all
7545 but the highest 5 bits. */
7546 mantissa >>= point_pos - 5;
7547
7548 /* We cannot represent the value 0.0, so reject it. This is handled
7549 elsewhere. */
7550 if (mantissa == 0)
7551 return false;
7552
7553 /* Then, as bit 4 is always set, we can mask it off, leaving
7554 the mantissa in the range [0, 15]. */
7555 mantissa &= ~(1 << 4);
7556 gcc_assert (mantissa <= 15);
7557
7558 /* GCC internally does not use IEEE754-like encoding (where normalized
7559 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7560 Our mantissa values are shifted 4 places to the left relative to
7561 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7562 by 5 places to correct for GCC's representation. */
7563 exponent = 5 - exponent;
7564
7565 return (exponent >= 0 && exponent <= 7);
7566}
7567
7568char*
81c2dfb9 7569aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7570 enum machine_mode mode,
7571 unsigned width)
7572{
3ea63f60 7573 bool is_valid;
3520f7cc 7574 static char templ[40];
3520f7cc 7575 const char *mnemonic;
e4f0f84d 7576 const char *shift_op;
3520f7cc 7577 unsigned int lane_count = 0;
81c2dfb9 7578 char element_char;
3520f7cc 7579
e4f0f84d 7580 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
7581
7582 /* This will return true to show const_vector is legal for use as either
7583 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7584 also update INFO to show how the immediate should be generated. */
81c2dfb9 7585 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7586 gcc_assert (is_valid);
7587
81c2dfb9 7588 element_char = sizetochar (info.element_width);
48063b9d
IB
7589 lane_count = width / info.element_width;
7590
3520f7cc
JG
7591 mode = GET_MODE_INNER (mode);
7592 if (mode == SFmode || mode == DFmode)
7593 {
48063b9d
IB
7594 gcc_assert (info.shift == 0 && ! info.mvn);
7595 if (aarch64_float_const_zero_rtx_p (info.value))
7596 info.value = GEN_INT (0);
7597 else
7598 {
7599#define buf_size 20
7600 REAL_VALUE_TYPE r;
7601 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7602 char float_buf[buf_size] = {'\0'};
7603 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7604#undef buf_size
7605
7606 if (lane_count == 1)
7607 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7608 else
7609 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7610 lane_count, element_char, float_buf);
48063b9d
IB
7611 return templ;
7612 }
3520f7cc 7613 }
3520f7cc 7614
48063b9d 7615 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 7616 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
7617
7618 if (lane_count == 1)
48063b9d
IB
7619 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7620 mnemonic, UINTVAL (info.value));
7621 else if (info.shift)
7622 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
7623 ", %s %d", mnemonic, lane_count, element_char,
7624 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 7625 else
48063b9d 7626 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7627 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7628 return templ;
7629}
7630
b7342d25
IB
7631char*
7632aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7633 enum machine_mode mode)
7634{
7635 enum machine_mode vmode;
7636
7637 gcc_assert (!VECTOR_MODE_P (mode));
7638 vmode = aarch64_simd_container_mode (mode, 64);
7639 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7640 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7641}
7642
88b08073
JG
7643/* Split operands into moves from op[1] + op[2] into op[0]. */
7644
7645void
7646aarch64_split_combinev16qi (rtx operands[3])
7647{
7648 unsigned int dest = REGNO (operands[0]);
7649 unsigned int src1 = REGNO (operands[1]);
7650 unsigned int src2 = REGNO (operands[2]);
7651 enum machine_mode halfmode = GET_MODE (operands[1]);
7652 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7653 rtx destlo, desthi;
7654
7655 gcc_assert (halfmode == V16QImode);
7656
7657 if (src1 == dest && src2 == dest + halfregs)
7658 {
7659 /* No-op move. Can't split to nothing; emit something. */
7660 emit_note (NOTE_INSN_DELETED);
7661 return;
7662 }
7663
7664 /* Preserve register attributes for variable tracking. */
7665 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7666 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7667 GET_MODE_SIZE (halfmode));
7668
7669 /* Special case of reversed high/low parts. */
7670 if (reg_overlap_mentioned_p (operands[2], destlo)
7671 && reg_overlap_mentioned_p (operands[1], desthi))
7672 {
7673 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7674 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7675 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7676 }
7677 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7678 {
7679 /* Try to avoid unnecessary moves if part of the result
7680 is in the right place already. */
7681 if (src1 != dest)
7682 emit_move_insn (destlo, operands[1]);
7683 if (src2 != dest + halfregs)
7684 emit_move_insn (desthi, operands[2]);
7685 }
7686 else
7687 {
7688 if (src2 != dest + halfregs)
7689 emit_move_insn (desthi, operands[2]);
7690 if (src1 != dest)
7691 emit_move_insn (destlo, operands[1]);
7692 }
7693}
7694
7695/* vec_perm support. */
7696
7697#define MAX_VECT_LEN 16
7698
7699struct expand_vec_perm_d
7700{
7701 rtx target, op0, op1;
7702 unsigned char perm[MAX_VECT_LEN];
7703 enum machine_mode vmode;
7704 unsigned char nelt;
7705 bool one_vector_p;
7706 bool testing_p;
7707};
7708
7709/* Generate a variable permutation. */
7710
7711static void
7712aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7713{
7714 enum machine_mode vmode = GET_MODE (target);
7715 bool one_vector_p = rtx_equal_p (op0, op1);
7716
7717 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7718 gcc_checking_assert (GET_MODE (op0) == vmode);
7719 gcc_checking_assert (GET_MODE (op1) == vmode);
7720 gcc_checking_assert (GET_MODE (sel) == vmode);
7721 gcc_checking_assert (TARGET_SIMD);
7722
7723 if (one_vector_p)
7724 {
7725 if (vmode == V8QImode)
7726 {
7727 /* Expand the argument to a V16QI mode by duplicating it. */
7728 rtx pair = gen_reg_rtx (V16QImode);
7729 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7730 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7731 }
7732 else
7733 {
7734 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7735 }
7736 }
7737 else
7738 {
7739 rtx pair;
7740
7741 if (vmode == V8QImode)
7742 {
7743 pair = gen_reg_rtx (V16QImode);
7744 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7745 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7746 }
7747 else
7748 {
7749 pair = gen_reg_rtx (OImode);
7750 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7751 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7752 }
7753 }
7754}
7755
7756void
7757aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7758{
7759 enum machine_mode vmode = GET_MODE (target);
7760 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7761 bool one_vector_p = rtx_equal_p (op0, op1);
7762 rtx rmask[MAX_VECT_LEN], mask;
7763
7764 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7765
7766 /* The TBL instruction does not use a modulo index, so we must take care
7767 of that ourselves. */
7768 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7769 for (i = 0; i < nelt; ++i)
7770 rmask[i] = mask;
7771 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7772 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7773
7774 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7775}
7776
cc4d934f
JG
7777/* Recognize patterns suitable for the TRN instructions. */
7778static bool
7779aarch64_evpc_trn (struct expand_vec_perm_d *d)
7780{
7781 unsigned int i, odd, mask, nelt = d->nelt;
7782 rtx out, in0, in1, x;
7783 rtx (*gen) (rtx, rtx, rtx);
7784 enum machine_mode vmode = d->vmode;
7785
7786 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7787 return false;
7788
7789 /* Note that these are little-endian tests.
7790 We correct for big-endian later. */
7791 if (d->perm[0] == 0)
7792 odd = 0;
7793 else if (d->perm[0] == 1)
7794 odd = 1;
7795 else
7796 return false;
7797 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7798
7799 for (i = 0; i < nelt; i += 2)
7800 {
7801 if (d->perm[i] != i + odd)
7802 return false;
7803 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7804 return false;
7805 }
7806
7807 /* Success! */
7808 if (d->testing_p)
7809 return true;
7810
7811 in0 = d->op0;
7812 in1 = d->op1;
7813 if (BYTES_BIG_ENDIAN)
7814 {
7815 x = in0, in0 = in1, in1 = x;
7816 odd = !odd;
7817 }
7818 out = d->target;
7819
7820 if (odd)
7821 {
7822 switch (vmode)
7823 {
7824 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7825 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7826 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7827 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7828 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7829 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7830 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7831 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7832 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7833 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7834 default:
7835 return false;
7836 }
7837 }
7838 else
7839 {
7840 switch (vmode)
7841 {
7842 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7843 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7844 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7845 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7846 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7847 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7848 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7849 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7850 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7851 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7852 default:
7853 return false;
7854 }
7855 }
7856
7857 emit_insn (gen (out, in0, in1));
7858 return true;
7859}
7860
7861/* Recognize patterns suitable for the UZP instructions. */
7862static bool
7863aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7864{
7865 unsigned int i, odd, mask, nelt = d->nelt;
7866 rtx out, in0, in1, x;
7867 rtx (*gen) (rtx, rtx, rtx);
7868 enum machine_mode vmode = d->vmode;
7869
7870 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7871 return false;
7872
7873 /* Note that these are little-endian tests.
7874 We correct for big-endian later. */
7875 if (d->perm[0] == 0)
7876 odd = 0;
7877 else if (d->perm[0] == 1)
7878 odd = 1;
7879 else
7880 return false;
7881 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7882
7883 for (i = 0; i < nelt; i++)
7884 {
7885 unsigned elt = (i * 2 + odd) & mask;
7886 if (d->perm[i] != elt)
7887 return false;
7888 }
7889
7890 /* Success! */
7891 if (d->testing_p)
7892 return true;
7893
7894 in0 = d->op0;
7895 in1 = d->op1;
7896 if (BYTES_BIG_ENDIAN)
7897 {
7898 x = in0, in0 = in1, in1 = x;
7899 odd = !odd;
7900 }
7901 out = d->target;
7902
7903 if (odd)
7904 {
7905 switch (vmode)
7906 {
7907 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7908 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7909 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7910 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7911 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7912 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7913 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7914 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7915 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7916 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7917 default:
7918 return false;
7919 }
7920 }
7921 else
7922 {
7923 switch (vmode)
7924 {
7925 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7926 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7927 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7928 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7929 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7930 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7931 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7932 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7933 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7934 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7935 default:
7936 return false;
7937 }
7938 }
7939
7940 emit_insn (gen (out, in0, in1));
7941 return true;
7942}
7943
7944/* Recognize patterns suitable for the ZIP instructions. */
7945static bool
7946aarch64_evpc_zip (struct expand_vec_perm_d *d)
7947{
7948 unsigned int i, high, mask, nelt = d->nelt;
7949 rtx out, in0, in1, x;
7950 rtx (*gen) (rtx, rtx, rtx);
7951 enum machine_mode vmode = d->vmode;
7952
7953 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7954 return false;
7955
7956 /* Note that these are little-endian tests.
7957 We correct for big-endian later. */
7958 high = nelt / 2;
7959 if (d->perm[0] == high)
7960 /* Do Nothing. */
7961 ;
7962 else if (d->perm[0] == 0)
7963 high = 0;
7964 else
7965 return false;
7966 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7967
7968 for (i = 0; i < nelt / 2; i++)
7969 {
7970 unsigned elt = (i + high) & mask;
7971 if (d->perm[i * 2] != elt)
7972 return false;
7973 elt = (elt + nelt) & mask;
7974 if (d->perm[i * 2 + 1] != elt)
7975 return false;
7976 }
7977
7978 /* Success! */
7979 if (d->testing_p)
7980 return true;
7981
7982 in0 = d->op0;
7983 in1 = d->op1;
7984 if (BYTES_BIG_ENDIAN)
7985 {
7986 x = in0, in0 = in1, in1 = x;
7987 high = !high;
7988 }
7989 out = d->target;
7990
7991 if (high)
7992 {
7993 switch (vmode)
7994 {
7995 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7996 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7997 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7998 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7999 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8000 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8001 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8002 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8003 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8004 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8005 default:
8006 return false;
8007 }
8008 }
8009 else
8010 {
8011 switch (vmode)
8012 {
8013 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8014 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8015 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8016 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8017 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8018 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8019 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8020 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8021 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8022 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8023 default:
8024 return false;
8025 }
8026 }
8027
8028 emit_insn (gen (out, in0, in1));
8029 return true;
8030}
8031
91bd4114
JG
8032static bool
8033aarch64_evpc_dup (struct expand_vec_perm_d *d)
8034{
8035 rtx (*gen) (rtx, rtx, rtx);
8036 rtx out = d->target;
8037 rtx in0;
8038 enum machine_mode vmode = d->vmode;
8039 unsigned int i, elt, nelt = d->nelt;
8040 rtx lane;
8041
8042 /* TODO: This may not be big-endian safe. */
8043 if (BYTES_BIG_ENDIAN)
8044 return false;
8045
8046 elt = d->perm[0];
8047 for (i = 1; i < nelt; i++)
8048 {
8049 if (elt != d->perm[i])
8050 return false;
8051 }
8052
8053 /* The generic preparation in aarch64_expand_vec_perm_const_1
8054 swaps the operand order and the permute indices if it finds
8055 d->perm[0] to be in the second operand. Thus, we can always
8056 use d->op0 and need not do any extra arithmetic to get the
8057 correct lane number. */
8058 in0 = d->op0;
8059 lane = GEN_INT (elt);
8060
8061 switch (vmode)
8062 {
8063 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8064 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8065 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8066 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8067 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8068 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8069 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8070 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8071 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8072 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8073 default:
8074 return false;
8075 }
8076
8077 emit_insn (gen (out, in0, lane));
8078 return true;
8079}
8080
88b08073
JG
8081static bool
8082aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8083{
8084 rtx rperm[MAX_VECT_LEN], sel;
8085 enum machine_mode vmode = d->vmode;
8086 unsigned int i, nelt = d->nelt;
8087
8088 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8089 numbering of elements for big-endian, we must reverse the order. */
8090 if (BYTES_BIG_ENDIAN)
8091 return false;
8092
8093 if (d->testing_p)
8094 return true;
8095
8096 /* Generic code will try constant permutation twice. Once with the
8097 original mode and again with the elements lowered to QImode.
8098 So wait and don't do the selector expansion ourselves. */
8099 if (vmode != V8QImode && vmode != V16QImode)
8100 return false;
8101
8102 for (i = 0; i < nelt; ++i)
8103 rperm[i] = GEN_INT (d->perm[i]);
8104 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8105 sel = force_reg (vmode, sel);
8106
8107 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8108 return true;
8109}
8110
8111static bool
8112aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8113{
8114 /* The pattern matching functions above are written to look for a small
8115 number to begin the sequence (0, 1, N/2). If we begin with an index
8116 from the second operand, we can swap the operands. */
8117 if (d->perm[0] >= d->nelt)
8118 {
8119 unsigned i, nelt = d->nelt;
8120 rtx x;
8121
8122 for (i = 0; i < nelt; ++i)
8123 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8124
8125 x = d->op0;
8126 d->op0 = d->op1;
8127 d->op1 = x;
8128 }
8129
8130 if (TARGET_SIMD)
cc4d934f
JG
8131 {
8132 if (aarch64_evpc_zip (d))
8133 return true;
8134 else if (aarch64_evpc_uzp (d))
8135 return true;
8136 else if (aarch64_evpc_trn (d))
8137 return true;
91bd4114
JG
8138 else if (aarch64_evpc_dup (d))
8139 return true;
cc4d934f
JG
8140 return aarch64_evpc_tbl (d);
8141 }
88b08073
JG
8142 return false;
8143}
8144
8145/* Expand a vec_perm_const pattern. */
8146
8147bool
8148aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8149{
8150 struct expand_vec_perm_d d;
8151 int i, nelt, which;
8152
8153 d.target = target;
8154 d.op0 = op0;
8155 d.op1 = op1;
8156
8157 d.vmode = GET_MODE (target);
8158 gcc_assert (VECTOR_MODE_P (d.vmode));
8159 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8160 d.testing_p = false;
8161
8162 for (i = which = 0; i < nelt; ++i)
8163 {
8164 rtx e = XVECEXP (sel, 0, i);
8165 int ei = INTVAL (e) & (2 * nelt - 1);
8166 which |= (ei < nelt ? 1 : 2);
8167 d.perm[i] = ei;
8168 }
8169
8170 switch (which)
8171 {
8172 default:
8173 gcc_unreachable ();
8174
8175 case 3:
8176 d.one_vector_p = false;
8177 if (!rtx_equal_p (op0, op1))
8178 break;
8179
8180 /* The elements of PERM do not suggest that only the first operand
8181 is used, but both operands are identical. Allow easier matching
8182 of the permutation by folding the permutation into the single
8183 input vector. */
8184 /* Fall Through. */
8185 case 2:
8186 for (i = 0; i < nelt; ++i)
8187 d.perm[i] &= nelt - 1;
8188 d.op0 = op1;
8189 d.one_vector_p = true;
8190 break;
8191
8192 case 1:
8193 d.op1 = op0;
8194 d.one_vector_p = true;
8195 break;
8196 }
8197
8198 return aarch64_expand_vec_perm_const_1 (&d);
8199}
8200
8201static bool
8202aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8203 const unsigned char *sel)
8204{
8205 struct expand_vec_perm_d d;
8206 unsigned int i, nelt, which;
8207 bool ret;
8208
8209 d.vmode = vmode;
8210 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8211 d.testing_p = true;
8212 memcpy (d.perm, sel, nelt);
8213
8214 /* Calculate whether all elements are in one vector. */
8215 for (i = which = 0; i < nelt; ++i)
8216 {
8217 unsigned char e = d.perm[i];
8218 gcc_assert (e < 2 * nelt);
8219 which |= (e < nelt ? 1 : 2);
8220 }
8221
8222 /* If all elements are from the second vector, reindex as if from the
8223 first vector. */
8224 if (which == 2)
8225 for (i = 0; i < nelt; ++i)
8226 d.perm[i] -= nelt;
8227
8228 /* Check whether the mask can be applied to a single vector. */
8229 d.one_vector_p = (which != 3);
8230
8231 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8232 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8233 if (!d.one_vector_p)
8234 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8235
8236 start_sequence ();
8237 ret = aarch64_expand_vec_perm_const_1 (&d);
8238 end_sequence ();
8239
8240 return ret;
8241}
8242
69675d50
TB
8243/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8244bool
8245aarch64_cannot_change_mode_class (enum machine_mode from,
8246 enum machine_mode to,
8247 enum reg_class rclass)
8248{
8249 /* Full-reg subregs are allowed on general regs or any class if they are
8250 the same size. */
8251 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8252 || !reg_classes_intersect_p (FP_REGS, rclass))
8253 return false;
8254
8255 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8256 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8257 2. Scalar to Scalar for integer modes or same size float modes.
8258 3. Vector to Vector modes. */
8259 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8260 {
8261 if (aarch64_vector_mode_supported_p (from)
8262 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8263 return false;
8264
8265 if (GET_MODE_NUNITS (from) == 1
8266 && GET_MODE_NUNITS (to) == 1
8267 && (GET_MODE_CLASS (from) == MODE_INT
8268 || from == to))
8269 return false;
8270
8271 if (aarch64_vector_mode_supported_p (from)
8272 && aarch64_vector_mode_supported_p (to))
8273 return false;
8274 }
8275
8276 return true;
8277}
8278
43e9d192
IB
8279#undef TARGET_ADDRESS_COST
8280#define TARGET_ADDRESS_COST aarch64_address_cost
8281
8282/* This hook will determines whether unnamed bitfields affect the alignment
8283 of the containing structure. The hook returns true if the structure
8284 should inherit the alignment requirements of an unnamed bitfield's
8285 type. */
8286#undef TARGET_ALIGN_ANON_BITFIELD
8287#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8288
8289#undef TARGET_ASM_ALIGNED_DI_OP
8290#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8291
8292#undef TARGET_ASM_ALIGNED_HI_OP
8293#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8294
8295#undef TARGET_ASM_ALIGNED_SI_OP
8296#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8297
8298#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8299#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8300 hook_bool_const_tree_hwi_hwi_const_tree_true
8301
8302#undef TARGET_ASM_FILE_START
8303#define TARGET_ASM_FILE_START aarch64_start_file
8304
8305#undef TARGET_ASM_OUTPUT_MI_THUNK
8306#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8307
8308#undef TARGET_ASM_SELECT_RTX_SECTION
8309#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8310
8311#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8312#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8313
8314#undef TARGET_BUILD_BUILTIN_VA_LIST
8315#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8316
8317#undef TARGET_CALLEE_COPIES
8318#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8319
8320#undef TARGET_CAN_ELIMINATE
8321#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8322
8323#undef TARGET_CANNOT_FORCE_CONST_MEM
8324#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8325
8326#undef TARGET_CONDITIONAL_REGISTER_USAGE
8327#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8328
8329/* Only the least significant bit is used for initialization guard
8330 variables. */
8331#undef TARGET_CXX_GUARD_MASK_BIT
8332#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8333
8334#undef TARGET_C_MODE_FOR_SUFFIX
8335#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8336
8337#ifdef TARGET_BIG_ENDIAN_DEFAULT
8338#undef TARGET_DEFAULT_TARGET_FLAGS
8339#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8340#endif
8341
8342#undef TARGET_CLASS_MAX_NREGS
8343#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8344
119103ca
JG
8345#undef TARGET_BUILTIN_DECL
8346#define TARGET_BUILTIN_DECL aarch64_builtin_decl
8347
43e9d192
IB
8348#undef TARGET_EXPAND_BUILTIN
8349#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8350
8351#undef TARGET_EXPAND_BUILTIN_VA_START
8352#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8353
9697e620
JG
8354#undef TARGET_FOLD_BUILTIN
8355#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8356
43e9d192
IB
8357#undef TARGET_FUNCTION_ARG
8358#define TARGET_FUNCTION_ARG aarch64_function_arg
8359
8360#undef TARGET_FUNCTION_ARG_ADVANCE
8361#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8362
8363#undef TARGET_FUNCTION_ARG_BOUNDARY
8364#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8365
8366#undef TARGET_FUNCTION_OK_FOR_SIBCALL
8367#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8368
8369#undef TARGET_FUNCTION_VALUE
8370#define TARGET_FUNCTION_VALUE aarch64_function_value
8371
8372#undef TARGET_FUNCTION_VALUE_REGNO_P
8373#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8374
8375#undef TARGET_FRAME_POINTER_REQUIRED
8376#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8377
0ac198d3
JG
8378#undef TARGET_GIMPLE_FOLD_BUILTIN
8379#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8380
43e9d192
IB
8381#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8382#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8383
8384#undef TARGET_INIT_BUILTINS
8385#define TARGET_INIT_BUILTINS aarch64_init_builtins
8386
8387#undef TARGET_LEGITIMATE_ADDRESS_P
8388#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8389
8390#undef TARGET_LEGITIMATE_CONSTANT_P
8391#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8392
8393#undef TARGET_LIBGCC_CMP_RETURN_MODE
8394#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8395
38e8f663
YR
8396#undef TARGET_LRA_P
8397#define TARGET_LRA_P aarch64_lra_p
8398
ac2b960f
YZ
8399#undef TARGET_MANGLE_TYPE
8400#define TARGET_MANGLE_TYPE aarch64_mangle_type
8401
43e9d192
IB
8402#undef TARGET_MEMORY_MOVE_COST
8403#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8404
8405#undef TARGET_MUST_PASS_IN_STACK
8406#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8407
8408/* This target hook should return true if accesses to volatile bitfields
8409 should use the narrowest mode possible. It should return false if these
8410 accesses should use the bitfield container type. */
8411#undef TARGET_NARROW_VOLATILE_BITFIELD
8412#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8413
8414#undef TARGET_OPTION_OVERRIDE
8415#define TARGET_OPTION_OVERRIDE aarch64_override_options
8416
8417#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8418#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8419 aarch64_override_options_after_change
8420
8421#undef TARGET_PASS_BY_REFERENCE
8422#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8423
8424#undef TARGET_PREFERRED_RELOAD_CLASS
8425#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8426
8427#undef TARGET_SECONDARY_RELOAD
8428#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8429
8430#undef TARGET_SHIFT_TRUNCATION_MASK
8431#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8432
8433#undef TARGET_SETUP_INCOMING_VARARGS
8434#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8435
8436#undef TARGET_STRUCT_VALUE_RTX
8437#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8438
8439#undef TARGET_REGISTER_MOVE_COST
8440#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8441
8442#undef TARGET_RETURN_IN_MEMORY
8443#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8444
8445#undef TARGET_RETURN_IN_MSB
8446#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8447
8448#undef TARGET_RTX_COSTS
8449#define TARGET_RTX_COSTS aarch64_rtx_costs
8450
d126a4ae
AP
8451#undef TARGET_SCHED_ISSUE_RATE
8452#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8453
43e9d192
IB
8454#undef TARGET_TRAMPOLINE_INIT
8455#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8456
8457#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8458#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8459
8460#undef TARGET_VECTOR_MODE_SUPPORTED_P
8461#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8462
8463#undef TARGET_ARRAY_MODE_SUPPORTED_P
8464#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8465
8990e73a
TB
8466#undef TARGET_VECTORIZE_ADD_STMT_COST
8467#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8468
8469#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8470#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8471 aarch64_builtin_vectorization_cost
8472
43e9d192
IB
8473#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8474#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8475
42fc9a7f
JG
8476#undef TARGET_VECTORIZE_BUILTINS
8477#define TARGET_VECTORIZE_BUILTINS
8478
8479#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8480#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8481 aarch64_builtin_vectorized_function
8482
3b357264
JG
8483#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8484#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8485 aarch64_autovectorize_vector_sizes
8486
43e9d192
IB
8487/* Section anchor support. */
8488
8489#undef TARGET_MIN_ANCHOR_OFFSET
8490#define TARGET_MIN_ANCHOR_OFFSET -256
8491
8492/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8493 byte offset; we can do much more for larger data types, but have no way
8494 to determine the size of the access. We assume accesses are aligned. */
8495#undef TARGET_MAX_ANCHOR_OFFSET
8496#define TARGET_MAX_ANCHOR_OFFSET 4095
8497
db0253a4
TB
8498#undef TARGET_VECTOR_ALIGNMENT
8499#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8500
8501#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8502#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8503 aarch64_simd_vector_alignment_reachable
8504
88b08073
JG
8505/* vec_perm support. */
8506
8507#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8508#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8509 aarch64_vectorize_vec_perm_const_ok
8510
70f09188 8511
706b2314 8512#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8513#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8514
43e9d192
IB
8515struct gcc_target targetm = TARGET_INITIALIZER;
8516
8517#include "gt-aarch64.h"