]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
re PR target/59216 (ARM negdi*extendsidi regression)
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
d1e082c2 2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
18f429e2 49#include "gimple.h"
45b0be94 50#include "gimplify.h"
43e9d192
IB
51#include "optabs.h"
52#include "dwarf2.h"
8990e73a
TB
53#include "cfgloop.h"
54#include "tree-vectorizer.h"
73250c4c 55#include "config/arm/aarch-cost-tables.h"
43e9d192 56
28514dda
YZ
57/* Defined for convenience. */
58#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
59
43e9d192
IB
60/* Classifies an address.
61
62 ADDRESS_REG_IMM
63 A simple base register plus immediate offset.
64
65 ADDRESS_REG_WB
66 A base register indexed by immediate offset with writeback.
67
68 ADDRESS_REG_REG
69 A base register indexed by (optionally scaled) register.
70
71 ADDRESS_REG_UXTW
72 A base register indexed by (optionally scaled) zero-extended register.
73
74 ADDRESS_REG_SXTW
75 A base register indexed by (optionally scaled) sign-extended register.
76
77 ADDRESS_LO_SUM
78 A LO_SUM rtx with a base register and "LO12" symbol relocation.
79
80 ADDRESS_SYMBOLIC:
81 A constant symbolic address, in pc-relative literal pool. */
82
83enum aarch64_address_type {
84 ADDRESS_REG_IMM,
85 ADDRESS_REG_WB,
86 ADDRESS_REG_REG,
87 ADDRESS_REG_UXTW,
88 ADDRESS_REG_SXTW,
89 ADDRESS_LO_SUM,
90 ADDRESS_SYMBOLIC
91};
92
93struct aarch64_address_info {
94 enum aarch64_address_type type;
95 rtx base;
96 rtx offset;
97 int shift;
98 enum aarch64_symbol_type symbol_type;
99};
100
48063b9d
IB
101struct simd_immediate_info
102{
103 rtx value;
104 int shift;
105 int element_width;
48063b9d 106 bool mvn;
e4f0f84d 107 bool msl;
48063b9d
IB
108};
109
43e9d192
IB
110/* The current code model. */
111enum aarch64_code_model aarch64_cmodel;
112
113#ifdef HAVE_AS_TLS
114#undef TARGET_HAVE_TLS
115#define TARGET_HAVE_TLS 1
116#endif
117
38e8f663 118static bool aarch64_lra_p (void);
43e9d192
IB
119static bool aarch64_composite_type_p (const_tree, enum machine_mode);
120static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
121 const_tree,
122 enum machine_mode *, int *,
123 bool *);
124static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
125static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 126static void aarch64_override_options_after_change (void);
43e9d192
IB
127static bool aarch64_vector_mode_supported_p (enum machine_mode);
128static unsigned bit_count (unsigned HOST_WIDE_INT);
129static bool aarch64_const_vec_all_same_int_p (rtx,
130 HOST_WIDE_INT, HOST_WIDE_INT);
131
88b08073
JG
132static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
133 const unsigned char *sel);
134
43e9d192 135/* The processor for which instructions should be scheduled. */
02fdbd5b 136enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
137
138/* The current tuning set. */
139const struct tune_params *aarch64_tune_params;
140
141/* Mask to specify which instructions we are allowed to generate. */
142unsigned long aarch64_isa_flags = 0;
143
144/* Mask to specify which instruction scheduling options should be used. */
145unsigned long aarch64_tune_flags = 0;
146
147/* Tuning parameters. */
148
149#if HAVE_DESIGNATED_INITIALIZERS
150#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
151#else
152#define NAMED_PARAM(NAME, VAL) (VAL)
153#endif
154
155#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156__extension__
157#endif
43e9d192
IB
158
159#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
160__extension__
161#endif
162static const struct cpu_addrcost_table generic_addrcost_table =
163{
164 NAMED_PARAM (pre_modify, 0),
165 NAMED_PARAM (post_modify, 0),
166 NAMED_PARAM (register_offset, 0),
167 NAMED_PARAM (register_extend, 0),
168 NAMED_PARAM (imm_offset, 0)
169};
170
171#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172__extension__
173#endif
174static const struct cpu_regmove_cost generic_regmove_cost =
175{
176 NAMED_PARAM (GP2GP, 1),
177 NAMED_PARAM (GP2FP, 2),
178 NAMED_PARAM (FP2GP, 2),
179 /* We currently do not provide direct support for TFmode Q->Q move.
180 Therefore we need to raise the cost above 2 in order to have
181 reload handle the situation. */
182 NAMED_PARAM (FP2FP, 4)
183};
184
8990e73a
TB
185/* Generic costs for vector insn classes. */
186#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
187__extension__
188#endif
189static const struct cpu_vector_cost generic_vector_cost =
190{
191 NAMED_PARAM (scalar_stmt_cost, 1),
192 NAMED_PARAM (scalar_load_cost, 1),
193 NAMED_PARAM (scalar_store_cost, 1),
194 NAMED_PARAM (vec_stmt_cost, 1),
195 NAMED_PARAM (vec_to_scalar_cost, 1),
196 NAMED_PARAM (scalar_to_vec_cost, 1),
197 NAMED_PARAM (vec_align_load_cost, 1),
198 NAMED_PARAM (vec_unalign_load_cost, 1),
199 NAMED_PARAM (vec_unalign_store_cost, 1),
200 NAMED_PARAM (vec_store_cost, 1),
201 NAMED_PARAM (cond_taken_branch_cost, 3),
202 NAMED_PARAM (cond_not_taken_branch_cost, 1)
203};
204
43e9d192
IB
205#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
206__extension__
207#endif
208static const struct tune_params generic_tunings =
209{
73250c4c 210 &generic_extra_costs,
43e9d192
IB
211 &generic_addrcost_table,
212 &generic_regmove_cost,
8990e73a 213 &generic_vector_cost,
43e9d192
IB
214 NAMED_PARAM (memmov_cost, 4)
215};
216
217/* A processor implementing AArch64. */
218struct processor
219{
220 const char *const name;
221 enum aarch64_processor core;
222 const char *arch;
223 const unsigned long flags;
224 const struct tune_params *const tune;
225};
226
227/* Processor cores implementing AArch64. */
228static const struct processor all_cores[] =
229{
230#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
231 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
232#include "aarch64-cores.def"
233#undef AARCH64_CORE
02fdbd5b 234 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
235 {NULL, aarch64_none, NULL, 0, NULL}
236};
237
238/* Architectures implementing AArch64. */
239static const struct processor all_architectures[] =
240{
241#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
242 {NAME, CORE, #ARCH, FLAGS, NULL},
243#include "aarch64-arches.def"
244#undef AARCH64_ARCH
43e9d192
IB
245 {NULL, aarch64_none, NULL, 0, NULL}
246};
247
248/* Target specification. These are populated as commandline arguments
249 are processed, or NULL if not specified. */
250static const struct processor *selected_arch;
251static const struct processor *selected_cpu;
252static const struct processor *selected_tune;
253
254#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
255
256/* An ISA extension in the co-processor and main instruction set space. */
257struct aarch64_option_extension
258{
259 const char *const name;
260 const unsigned long flags_on;
261 const unsigned long flags_off;
262};
263
264/* ISA extensions in AArch64. */
265static const struct aarch64_option_extension all_extensions[] =
266{
267#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
268 {NAME, FLAGS_ON, FLAGS_OFF},
269#include "aarch64-option-extensions.def"
270#undef AARCH64_OPT_EXTENSION
271 {NULL, 0, 0}
272};
273
274/* Used to track the size of an address when generating a pre/post
275 increment address. */
276static enum machine_mode aarch64_memory_reference_mode;
277
278/* Used to force GTY into this file. */
279static GTY(()) int gty_dummy;
280
281/* A table of valid AArch64 "bitmask immediate" values for
282 logical instructions. */
283
284#define AARCH64_NUM_BITMASKS 5334
285static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
286
287/* Did we set flag_omit_frame_pointer just so
288 aarch64_frame_pointer_required would be called? */
289static bool faked_omit_frame_pointer;
290
291typedef enum aarch64_cond_code
292{
293 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
294 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
295 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
296}
297aarch64_cc;
298
299#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
300
301/* The condition codes of the processor, and the inverse function. */
302static const char * const aarch64_condition_codes[] =
303{
304 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
305 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
306};
307
308/* Provide a mapping from gcc register numbers to dwarf register numbers. */
309unsigned
310aarch64_dbx_register_number (unsigned regno)
311{
312 if (GP_REGNUM_P (regno))
313 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
314 else if (regno == SP_REGNUM)
315 return AARCH64_DWARF_SP;
316 else if (FP_REGNUM_P (regno))
317 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
318
319 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
320 equivalent DWARF register. */
321 return DWARF_FRAME_REGISTERS;
322}
323
324/* Return TRUE if MODE is any of the large INT modes. */
325static bool
326aarch64_vect_struct_mode_p (enum machine_mode mode)
327{
328 return mode == OImode || mode == CImode || mode == XImode;
329}
330
331/* Return TRUE if MODE is any of the vector modes. */
332static bool
333aarch64_vector_mode_p (enum machine_mode mode)
334{
335 return aarch64_vector_mode_supported_p (mode)
336 || aarch64_vect_struct_mode_p (mode);
337}
338
339/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
340static bool
341aarch64_array_mode_supported_p (enum machine_mode mode,
342 unsigned HOST_WIDE_INT nelems)
343{
344 if (TARGET_SIMD
345 && AARCH64_VALID_SIMD_QREG_MODE (mode)
346 && (nelems >= 2 && nelems <= 4))
347 return true;
348
349 return false;
350}
351
352/* Implement HARD_REGNO_NREGS. */
353
354int
355aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
356{
357 switch (aarch64_regno_regclass (regno))
358 {
359 case FP_REGS:
360 case FP_LO_REGS:
361 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
362 default:
363 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
364 }
365 gcc_unreachable ();
366}
367
368/* Implement HARD_REGNO_MODE_OK. */
369
370int
371aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
372{
373 if (GET_MODE_CLASS (mode) == MODE_CC)
374 return regno == CC_REGNUM;
375
9259db42
YZ
376 if (regno == SP_REGNUM)
377 /* The purpose of comparing with ptr_mode is to support the
378 global register variable associated with the stack pointer
379 register via the syntax of asm ("wsp") in ILP32. */
380 return mode == Pmode || mode == ptr_mode;
381
382 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
383 return mode == Pmode;
384
385 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
386 return 1;
387
388 if (FP_REGNUM_P (regno))
389 {
390 if (aarch64_vect_struct_mode_p (mode))
391 return
392 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
393 else
394 return 1;
395 }
396
397 return 0;
398}
399
400/* Return true if calls to DECL should be treated as
401 long-calls (ie called via a register). */
402static bool
403aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
404{
405 return false;
406}
407
408/* Return true if calls to symbol-ref SYM should be treated as
409 long-calls (ie called via a register). */
410bool
411aarch64_is_long_call_p (rtx sym)
412{
413 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
414}
415
416/* Return true if the offsets to a zero/sign-extract operation
417 represent an expression that matches an extend operation. The
418 operands represent the paramters from
419
420 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
421bool
422aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
423 rtx extract_imm)
424{
425 HOST_WIDE_INT mult_val, extract_val;
426
427 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
428 return false;
429
430 mult_val = INTVAL (mult_imm);
431 extract_val = INTVAL (extract_imm);
432
433 if (extract_val > 8
434 && extract_val < GET_MODE_BITSIZE (mode)
435 && exact_log2 (extract_val & ~7) > 0
436 && (extract_val & 7) <= 4
437 && mult_val == (1 << (extract_val & 7)))
438 return true;
439
440 return false;
441}
442
443/* Emit an insn that's a simple single-set. Both the operands must be
444 known to be valid. */
445inline static rtx
446emit_set_insn (rtx x, rtx y)
447{
448 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
449}
450
451/* X and Y are two things to compare using CODE. Emit the compare insn and
452 return the rtx for register 0 in the proper mode. */
453rtx
454aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
455{
456 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
457 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
458
459 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
460 return cc_reg;
461}
462
463/* Build the SYMBOL_REF for __tls_get_addr. */
464
465static GTY(()) rtx tls_get_addr_libfunc;
466
467rtx
468aarch64_tls_get_addr (void)
469{
470 if (!tls_get_addr_libfunc)
471 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
472 return tls_get_addr_libfunc;
473}
474
475/* Return the TLS model to use for ADDR. */
476
477static enum tls_model
478tls_symbolic_operand_type (rtx addr)
479{
480 enum tls_model tls_kind = TLS_MODEL_NONE;
481 rtx sym, addend;
482
483 if (GET_CODE (addr) == CONST)
484 {
485 split_const (addr, &sym, &addend);
486 if (GET_CODE (sym) == SYMBOL_REF)
487 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
488 }
489 else if (GET_CODE (addr) == SYMBOL_REF)
490 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
491
492 return tls_kind;
493}
494
495/* We'll allow lo_sum's in addresses in our legitimate addresses
496 so that combine would take care of combining addresses where
497 necessary, but for generation purposes, we'll generate the address
498 as :
499 RTL Absolute
500 tmp = hi (symbol_ref); adrp x1, foo
501 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
502 nop
503
504 PIC TLS
505 adrp x1, :got:foo adrp tmp, :tlsgd:foo
506 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
507 bl __tls_get_addr
508 nop
509
510 Load TLS symbol, depending on TLS mechanism and TLS access model.
511
512 Global Dynamic - Traditional TLS:
513 adrp tmp, :tlsgd:imm
514 add dest, tmp, #:tlsgd_lo12:imm
515 bl __tls_get_addr
516
517 Global Dynamic - TLS Descriptors:
518 adrp dest, :tlsdesc:imm
519 ldr tmp, [dest, #:tlsdesc_lo12:imm]
520 add dest, dest, #:tlsdesc_lo12:imm
521 blr tmp
522 mrs tp, tpidr_el0
523 add dest, dest, tp
524
525 Initial Exec:
526 mrs tp, tpidr_el0
527 adrp tmp, :gottprel:imm
528 ldr dest, [tmp, #:gottprel_lo12:imm]
529 add dest, dest, tp
530
531 Local Exec:
532 mrs tp, tpidr_el0
533 add t0, tp, #:tprel_hi12:imm
534 add t0, #:tprel_lo12_nc:imm
535*/
536
537static void
538aarch64_load_symref_appropriately (rtx dest, rtx imm,
539 enum aarch64_symbol_type type)
540{
541 switch (type)
542 {
543 case SYMBOL_SMALL_ABSOLUTE:
544 {
28514dda 545 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 546 rtx tmp_reg = dest;
28514dda
YZ
547 enum machine_mode mode = GET_MODE (dest);
548
549 gcc_assert (mode == Pmode || mode == ptr_mode);
550
43e9d192 551 if (can_create_pseudo_p ())
28514dda 552 tmp_reg = gen_reg_rtx (mode);
43e9d192 553
28514dda 554 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
555 emit_insn (gen_add_losym (dest, tmp_reg, imm));
556 return;
557 }
558
a5350ddc
CSS
559 case SYMBOL_TINY_ABSOLUTE:
560 emit_insn (gen_rtx_SET (Pmode, dest, imm));
561 return;
562
43e9d192
IB
563 case SYMBOL_SMALL_GOT:
564 {
28514dda
YZ
565 /* In ILP32, the mode of dest can be either SImode or DImode,
566 while the got entry is always of SImode size. The mode of
567 dest depends on how dest is used: if dest is assigned to a
568 pointer (e.g. in the memory), it has SImode; it may have
569 DImode if dest is dereferenced to access the memeory.
570 This is why we have to handle three different ldr_got_small
571 patterns here (two patterns for ILP32). */
43e9d192 572 rtx tmp_reg = dest;
28514dda
YZ
573 enum machine_mode mode = GET_MODE (dest);
574
43e9d192 575 if (can_create_pseudo_p ())
28514dda
YZ
576 tmp_reg = gen_reg_rtx (mode);
577
578 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
579 if (mode == ptr_mode)
580 {
581 if (mode == DImode)
582 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
583 else
584 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
585 }
586 else
587 {
588 gcc_assert (mode == Pmode);
589 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
590 }
591
43e9d192
IB
592 return;
593 }
594
595 case SYMBOL_SMALL_TLSGD:
596 {
597 rtx insns;
598 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
599
600 start_sequence ();
601 emit_call_insn (gen_tlsgd_small (result, imm));
602 insns = get_insns ();
603 end_sequence ();
604
605 RTL_CONST_CALL_P (insns) = 1;
606 emit_libcall_block (insns, dest, result, imm);
607 return;
608 }
609
610 case SYMBOL_SMALL_TLSDESC:
611 {
612 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
613 rtx tp;
614
615 emit_insn (gen_tlsdesc_small (imm));
616 tp = aarch64_load_tp (NULL);
617 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
618 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
619 return;
620 }
621
622 case SYMBOL_SMALL_GOTTPREL:
623 {
624 rtx tmp_reg = gen_reg_rtx (Pmode);
625 rtx tp = aarch64_load_tp (NULL);
626 emit_insn (gen_tlsie_small (tmp_reg, imm));
627 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
629 return;
630 }
631
632 case SYMBOL_SMALL_TPREL:
633 {
634 rtx tp = aarch64_load_tp (NULL);
635 emit_insn (gen_tlsle_small (dest, tp, imm));
636 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
637 return;
638 }
639
87dd8ab0
MS
640 case SYMBOL_TINY_GOT:
641 emit_insn (gen_ldr_got_tiny (dest, imm));
642 return;
643
43e9d192
IB
644 default:
645 gcc_unreachable ();
646 }
647}
648
649/* Emit a move from SRC to DEST. Assume that the move expanders can
650 handle all moves if !can_create_pseudo_p (). The distinction is
651 important because, unlike emit_move_insn, the move expanders know
652 how to force Pmode objects into the constant pool even when the
653 constant pool address is not itself legitimate. */
654static rtx
655aarch64_emit_move (rtx dest, rtx src)
656{
657 return (can_create_pseudo_p ()
658 ? emit_move_insn (dest, src)
659 : emit_move_insn_1 (dest, src));
660}
661
662void
663aarch64_split_128bit_move (rtx dst, rtx src)
664{
665 rtx low_dst;
666
12dc6974
SN
667 enum machine_mode src_mode = GET_MODE (src);
668 enum machine_mode dst_mode = GET_MODE (dst);
669 int src_regno = REGNO (src);
670 int dst_regno = REGNO (dst);
671
672 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
43e9d192
IB
673
674 if (REG_P (dst) && REG_P (src))
675 {
12dc6974 676 gcc_assert (src_mode == TImode || src_mode == TFmode);
43e9d192
IB
677
678 /* Handle r -> w, w -> r. */
679 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
680 {
12dc6974
SN
681 switch (src_mode) {
682 case TImode:
683 emit_insn
684 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
685 emit_insn
686 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
687 return;
688 case TFmode:
689 emit_insn
690 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
691 emit_insn
692 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
693 return;
694 default:
695 gcc_unreachable ();
696 }
43e9d192
IB
697 }
698 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
699 {
12dc6974
SN
700 switch (src_mode) {
701 case TImode:
702 emit_insn
703 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
704 emit_insn
705 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
706 return;
707 case TFmode:
708 emit_insn
709 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
710 emit_insn
711 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
712 return;
713 default:
714 gcc_unreachable ();
715 }
43e9d192
IB
716 }
717 /* Fall through to r -> r cases. */
718 }
719
12dc6974
SN
720 switch (dst_mode) {
721 case TImode:
722 low_dst = gen_lowpart (word_mode, dst);
723 if (REG_P (low_dst)
724 && reg_overlap_mentioned_p (low_dst, src))
725 {
726 aarch64_emit_move (gen_highpart (word_mode, dst),
727 gen_highpart_mode (word_mode, TImode, src));
728 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
729 }
730 else
731 {
732 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
733 aarch64_emit_move (gen_highpart (word_mode, dst),
734 gen_highpart_mode (word_mode, TImode, src));
735 }
736 return;
737 case TFmode:
738 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
739 gen_rtx_REG (DFmode, src_regno));
740 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
741 gen_rtx_REG (DFmode, src_regno + 1));
742 return;
743 default:
744 gcc_unreachable ();
745 }
43e9d192
IB
746}
747
748bool
749aarch64_split_128bit_move_p (rtx dst, rtx src)
750{
751 return (! REG_P (src)
752 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
753}
754
8b033a8a
SN
755/* Split a complex SIMD combine. */
756
757void
758aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
759{
760 enum machine_mode src_mode = GET_MODE (src1);
761 enum machine_mode dst_mode = GET_MODE (dst);
762
763 gcc_assert (VECTOR_MODE_P (dst_mode));
764
765 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
766 {
767 rtx (*gen) (rtx, rtx, rtx);
768
769 switch (src_mode)
770 {
771 case V8QImode:
772 gen = gen_aarch64_simd_combinev8qi;
773 break;
774 case V4HImode:
775 gen = gen_aarch64_simd_combinev4hi;
776 break;
777 case V2SImode:
778 gen = gen_aarch64_simd_combinev2si;
779 break;
780 case V2SFmode:
781 gen = gen_aarch64_simd_combinev2sf;
782 break;
783 case DImode:
784 gen = gen_aarch64_simd_combinedi;
785 break;
786 case DFmode:
787 gen = gen_aarch64_simd_combinedf;
788 break;
789 default:
790 gcc_unreachable ();
791 }
792
793 emit_insn (gen (dst, src1, src2));
794 return;
795 }
796}
797
fd4842cd
SN
798/* Split a complex SIMD move. */
799
800void
801aarch64_split_simd_move (rtx dst, rtx src)
802{
803 enum machine_mode src_mode = GET_MODE (src);
804 enum machine_mode dst_mode = GET_MODE (dst);
805
806 gcc_assert (VECTOR_MODE_P (dst_mode));
807
808 if (REG_P (dst) && REG_P (src))
809 {
c59b7e28
SN
810 rtx (*gen) (rtx, rtx);
811
fd4842cd
SN
812 gcc_assert (VECTOR_MODE_P (src_mode));
813
814 switch (src_mode)
815 {
816 case V16QImode:
c59b7e28 817 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
818 break;
819 case V8HImode:
c59b7e28 820 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
821 break;
822 case V4SImode:
c59b7e28 823 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
824 break;
825 case V2DImode:
c59b7e28 826 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
827 break;
828 case V4SFmode:
c59b7e28 829 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
830 break;
831 case V2DFmode:
c59b7e28 832 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
833 break;
834 default:
835 gcc_unreachable ();
836 }
c59b7e28
SN
837
838 emit_insn (gen (dst, src));
fd4842cd
SN
839 return;
840 }
841}
842
43e9d192 843static rtx
e18b4a81 844aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
845{
846 if (can_create_pseudo_p ())
e18b4a81 847 return force_reg (mode, value);
43e9d192
IB
848 else
849 {
850 x = aarch64_emit_move (x, value);
851 return x;
852 }
853}
854
855
856static rtx
857aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
858{
9c023bf0 859 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
860 {
861 rtx high;
862 /* Load the full offset into a register. This
863 might be improvable in the future. */
864 high = GEN_INT (offset);
865 offset = 0;
e18b4a81
YZ
866 high = aarch64_force_temporary (mode, temp, high);
867 reg = aarch64_force_temporary (mode, temp,
868 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
869 }
870 return plus_constant (mode, reg, offset);
871}
872
873void
874aarch64_expand_mov_immediate (rtx dest, rtx imm)
875{
876 enum machine_mode mode = GET_MODE (dest);
877 unsigned HOST_WIDE_INT mask;
878 int i;
879 bool first;
880 unsigned HOST_WIDE_INT val;
881 bool subtargets;
882 rtx subtarget;
883 int one_match, zero_match;
884
885 gcc_assert (mode == SImode || mode == DImode);
886
887 /* Check on what type of symbol it is. */
888 if (GET_CODE (imm) == SYMBOL_REF
889 || GET_CODE (imm) == LABEL_REF
890 || GET_CODE (imm) == CONST)
891 {
892 rtx mem, base, offset;
893 enum aarch64_symbol_type sty;
894
895 /* If we have (const (plus symbol offset)), separate out the offset
896 before we start classifying the symbol. */
897 split_const (imm, &base, &offset);
898
899 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
900 switch (sty)
901 {
902 case SYMBOL_FORCE_TO_MEM:
903 if (offset != const0_rtx
904 && targetm.cannot_force_const_mem (mode, imm))
905 {
906 gcc_assert(can_create_pseudo_p ());
e18b4a81 907 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
908 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
909 aarch64_emit_move (dest, base);
910 return;
911 }
28514dda 912 mem = force_const_mem (ptr_mode, imm);
43e9d192 913 gcc_assert (mem);
28514dda
YZ
914 if (mode != ptr_mode)
915 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
916 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
917 return;
918
919 case SYMBOL_SMALL_TLSGD:
920 case SYMBOL_SMALL_TLSDESC:
921 case SYMBOL_SMALL_GOTTPREL:
922 case SYMBOL_SMALL_GOT:
87dd8ab0 923 case SYMBOL_TINY_GOT:
43e9d192
IB
924 if (offset != const0_rtx)
925 {
926 gcc_assert(can_create_pseudo_p ());
e18b4a81 927 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
928 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
929 aarch64_emit_move (dest, base);
930 return;
931 }
932 /* FALLTHRU */
933
934 case SYMBOL_SMALL_TPREL:
935 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 936 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
937 aarch64_load_symref_appropriately (dest, imm, sty);
938 return;
939
940 default:
941 gcc_unreachable ();
942 }
943 }
944
945 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
946 {
947 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
948 return;
949 }
950
951 if (!CONST_INT_P (imm))
952 {
953 if (GET_CODE (imm) == HIGH)
954 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
955 else
956 {
957 rtx mem = force_const_mem (mode, imm);
958 gcc_assert (mem);
959 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
960 }
961
962 return;
963 }
964
965 if (mode == SImode)
966 {
967 /* We know we can't do this in 1 insn, and we must be able to do it
968 in two; so don't mess around looking for sequences that don't buy
969 us anything. */
970 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
971 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
972 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
973 return;
974 }
975
976 /* Remaining cases are all for DImode. */
977
978 val = INTVAL (imm);
979 subtargets = optimize && can_create_pseudo_p ();
980
981 one_match = 0;
982 zero_match = 0;
983 mask = 0xffff;
984
985 for (i = 0; i < 64; i += 16, mask <<= 16)
986 {
987 if ((val & mask) == 0)
988 zero_match++;
989 else if ((val & mask) == mask)
990 one_match++;
991 }
992
993 if (one_match == 2)
994 {
995 mask = 0xffff;
996 for (i = 0; i < 64; i += 16, mask <<= 16)
997 {
998 if ((val & mask) != mask)
999 {
1000 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1001 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1002 GEN_INT ((val >> i) & 0xffff)));
1003 return;
1004 }
1005 }
1006 gcc_unreachable ();
1007 }
1008
1009 if (zero_match == 2)
1010 goto simple_sequence;
1011
1012 mask = 0x0ffff0000UL;
1013 for (i = 16; i < 64; i += 16, mask <<= 16)
1014 {
1015 HOST_WIDE_INT comp = mask & ~(mask - 1);
1016
1017 if (aarch64_uimm12_shift (val - (val & mask)))
1018 {
1019 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1020
1021 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1022 emit_insn (gen_adddi3 (dest, subtarget,
1023 GEN_INT (val - (val & mask))));
1024 return;
1025 }
1026 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1027 {
1028 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1029
1030 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1031 GEN_INT ((val + comp) & mask)));
1032 emit_insn (gen_adddi3 (dest, subtarget,
1033 GEN_INT (val - ((val + comp) & mask))));
1034 return;
1035 }
1036 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1037 {
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041 GEN_INT ((val - comp) | ~mask)));
1042 emit_insn (gen_adddi3 (dest, subtarget,
1043 GEN_INT (val - ((val - comp) | ~mask))));
1044 return;
1045 }
1046 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1047 {
1048 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049
1050 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051 GEN_INT (val | ~mask)));
1052 emit_insn (gen_adddi3 (dest, subtarget,
1053 GEN_INT (val - (val | ~mask))));
1054 return;
1055 }
1056 }
1057
1058 /* See if we can do it by arithmetically combining two
1059 immediates. */
1060 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1061 {
1062 int j;
1063 mask = 0xffff;
1064
1065 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1066 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1067 {
1068 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1069 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070 GEN_INT (aarch64_bitmasks[i])));
1071 emit_insn (gen_adddi3 (dest, subtarget,
1072 GEN_INT (val - aarch64_bitmasks[i])));
1073 return;
1074 }
1075
1076 for (j = 0; j < 64; j += 16, mask <<= 16)
1077 {
1078 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1079 {
1080 emit_insn (gen_rtx_SET (VOIDmode, dest,
1081 GEN_INT (aarch64_bitmasks[i])));
1082 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1083 GEN_INT ((val >> j) & 0xffff)));
1084 return;
1085 }
1086 }
1087 }
1088
1089 /* See if we can do it by logically combining two immediates. */
1090 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1091 {
1092 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1093 {
1094 int j;
1095
1096 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1097 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1098 {
1099 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1100 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1101 GEN_INT (aarch64_bitmasks[i])));
1102 emit_insn (gen_iordi3 (dest, subtarget,
1103 GEN_INT (aarch64_bitmasks[j])));
1104 return;
1105 }
1106 }
1107 else if ((val & aarch64_bitmasks[i]) == val)
1108 {
1109 int j;
1110
1111 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1112 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1113 {
1114
1115 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1116 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1117 GEN_INT (aarch64_bitmasks[j])));
1118 emit_insn (gen_anddi3 (dest, subtarget,
1119 GEN_INT (aarch64_bitmasks[i])));
1120 return;
1121 }
1122 }
1123 }
1124
1125 simple_sequence:
1126 first = true;
1127 mask = 0xffff;
1128 for (i = 0; i < 64; i += 16, mask <<= 16)
1129 {
1130 if ((val & mask) != 0)
1131 {
1132 if (first)
1133 {
1134 emit_insn (gen_rtx_SET (VOIDmode, dest,
1135 GEN_INT (val & mask)));
1136 first = false;
1137 }
1138 else
1139 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1140 GEN_INT ((val >> i) & 0xffff)));
1141 }
1142 }
1143}
1144
1145static bool
1146aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1147{
1148 /* Indirect calls are not currently supported. */
1149 if (decl == NULL)
1150 return false;
1151
1152 /* Cannot tail-call to long-calls, since these are outside of the
1153 range of a branch instruction (we could handle this if we added
1154 support for indirect tail-calls. */
1155 if (aarch64_decl_is_long_call_p (decl))
1156 return false;
1157
1158 return true;
1159}
1160
1161/* Implement TARGET_PASS_BY_REFERENCE. */
1162
1163static bool
1164aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1165 enum machine_mode mode,
1166 const_tree type,
1167 bool named ATTRIBUTE_UNUSED)
1168{
1169 HOST_WIDE_INT size;
1170 enum machine_mode dummymode;
1171 int nregs;
1172
1173 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1174 size = (mode == BLKmode && type)
1175 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1176
1177 if (type)
1178 {
1179 /* Arrays always passed by reference. */
1180 if (TREE_CODE (type) == ARRAY_TYPE)
1181 return true;
1182 /* Other aggregates based on their size. */
1183 if (AGGREGATE_TYPE_P (type))
1184 size = int_size_in_bytes (type);
1185 }
1186
1187 /* Variable sized arguments are always returned by reference. */
1188 if (size < 0)
1189 return true;
1190
1191 /* Can this be a candidate to be passed in fp/simd register(s)? */
1192 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1193 &dummymode, &nregs,
1194 NULL))
1195 return false;
1196
1197 /* Arguments which are variable sized or larger than 2 registers are
1198 passed by reference unless they are a homogenous floating point
1199 aggregate. */
1200 return size > 2 * UNITS_PER_WORD;
1201}
1202
1203/* Return TRUE if VALTYPE is padded to its least significant bits. */
1204static bool
1205aarch64_return_in_msb (const_tree valtype)
1206{
1207 enum machine_mode dummy_mode;
1208 int dummy_int;
1209
1210 /* Never happens in little-endian mode. */
1211 if (!BYTES_BIG_ENDIAN)
1212 return false;
1213
1214 /* Only composite types smaller than or equal to 16 bytes can
1215 be potentially returned in registers. */
1216 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1217 || int_size_in_bytes (valtype) <= 0
1218 || int_size_in_bytes (valtype) > 16)
1219 return false;
1220
1221 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1222 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1223 is always passed/returned in the least significant bits of fp/simd
1224 register(s). */
1225 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1226 &dummy_mode, &dummy_int, NULL))
1227 return false;
1228
1229 return true;
1230}
1231
1232/* Implement TARGET_FUNCTION_VALUE.
1233 Define how to find the value returned by a function. */
1234
1235static rtx
1236aarch64_function_value (const_tree type, const_tree func,
1237 bool outgoing ATTRIBUTE_UNUSED)
1238{
1239 enum machine_mode mode;
1240 int unsignedp;
1241 int count;
1242 enum machine_mode ag_mode;
1243
1244 mode = TYPE_MODE (type);
1245 if (INTEGRAL_TYPE_P (type))
1246 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1247
1248 if (aarch64_return_in_msb (type))
1249 {
1250 HOST_WIDE_INT size = int_size_in_bytes (type);
1251
1252 if (size % UNITS_PER_WORD != 0)
1253 {
1254 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1255 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1256 }
1257 }
1258
1259 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1260 &ag_mode, &count, NULL))
1261 {
1262 if (!aarch64_composite_type_p (type, mode))
1263 {
1264 gcc_assert (count == 1 && mode == ag_mode);
1265 return gen_rtx_REG (mode, V0_REGNUM);
1266 }
1267 else
1268 {
1269 int i;
1270 rtx par;
1271
1272 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1273 for (i = 0; i < count; i++)
1274 {
1275 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1276 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1277 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1278 XVECEXP (par, 0, i) = tmp;
1279 }
1280 return par;
1281 }
1282 }
1283 else
1284 return gen_rtx_REG (mode, R0_REGNUM);
1285}
1286
1287/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1288 Return true if REGNO is the number of a hard register in which the values
1289 of called function may come back. */
1290
1291static bool
1292aarch64_function_value_regno_p (const unsigned int regno)
1293{
1294 /* Maximum of 16 bytes can be returned in the general registers. Examples
1295 of 16-byte return values are: 128-bit integers and 16-byte small
1296 structures (excluding homogeneous floating-point aggregates). */
1297 if (regno == R0_REGNUM || regno == R1_REGNUM)
1298 return true;
1299
1300 /* Up to four fp/simd registers can return a function value, e.g. a
1301 homogeneous floating-point aggregate having four members. */
1302 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1303 return !TARGET_GENERAL_REGS_ONLY;
1304
1305 return false;
1306}
1307
1308/* Implement TARGET_RETURN_IN_MEMORY.
1309
1310 If the type T of the result of a function is such that
1311 void func (T arg)
1312 would require that arg be passed as a value in a register (or set of
1313 registers) according to the parameter passing rules, then the result
1314 is returned in the same registers as would be used for such an
1315 argument. */
1316
1317static bool
1318aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1319{
1320 HOST_WIDE_INT size;
1321 enum machine_mode ag_mode;
1322 int count;
1323
1324 if (!AGGREGATE_TYPE_P (type)
1325 && TREE_CODE (type) != COMPLEX_TYPE
1326 && TREE_CODE (type) != VECTOR_TYPE)
1327 /* Simple scalar types always returned in registers. */
1328 return false;
1329
1330 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1331 type,
1332 &ag_mode,
1333 &count,
1334 NULL))
1335 return false;
1336
1337 /* Types larger than 2 registers returned in memory. */
1338 size = int_size_in_bytes (type);
1339 return (size < 0 || size > 2 * UNITS_PER_WORD);
1340}
1341
1342static bool
1343aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1344 const_tree type, int *nregs)
1345{
1346 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1347 return aarch64_vfp_is_call_or_return_candidate (mode,
1348 type,
1349 &pcum->aapcs_vfp_rmode,
1350 nregs,
1351 NULL);
1352}
1353
1354/* Given MODE and TYPE of a function argument, return the alignment in
1355 bits. The idea is to suppress any stronger alignment requested by
1356 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1357 This is a helper function for local use only. */
1358
1359static unsigned int
1360aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1361{
1362 unsigned int alignment;
1363
1364 if (type)
1365 {
1366 if (!integer_zerop (TYPE_SIZE (type)))
1367 {
1368 if (TYPE_MODE (type) == mode)
1369 alignment = TYPE_ALIGN (type);
1370 else
1371 alignment = GET_MODE_ALIGNMENT (mode);
1372 }
1373 else
1374 alignment = 0;
1375 }
1376 else
1377 alignment = GET_MODE_ALIGNMENT (mode);
1378
1379 return alignment;
1380}
1381
1382/* Layout a function argument according to the AAPCS64 rules. The rule
1383 numbers refer to the rule numbers in the AAPCS64. */
1384
1385static void
1386aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1387 const_tree type,
1388 bool named ATTRIBUTE_UNUSED)
1389{
1390 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1391 int ncrn, nvrn, nregs;
1392 bool allocate_ncrn, allocate_nvrn;
1393
1394 /* We need to do this once per argument. */
1395 if (pcum->aapcs_arg_processed)
1396 return;
1397
1398 pcum->aapcs_arg_processed = true;
1399
1400 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1401 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1402 mode,
1403 type,
1404 &nregs);
1405
1406 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1407 The following code thus handles passing by SIMD/FP registers first. */
1408
1409 nvrn = pcum->aapcs_nvrn;
1410
1411 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1412 and homogenous short-vector aggregates (HVA). */
1413 if (allocate_nvrn)
1414 {
1415 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1416 {
1417 pcum->aapcs_nextnvrn = nvrn + nregs;
1418 if (!aarch64_composite_type_p (type, mode))
1419 {
1420 gcc_assert (nregs == 1);
1421 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1422 }
1423 else
1424 {
1425 rtx par;
1426 int i;
1427 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1428 for (i = 0; i < nregs; i++)
1429 {
1430 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1431 V0_REGNUM + nvrn + i);
1432 tmp = gen_rtx_EXPR_LIST
1433 (VOIDmode, tmp,
1434 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1435 XVECEXP (par, 0, i) = tmp;
1436 }
1437 pcum->aapcs_reg = par;
1438 }
1439 return;
1440 }
1441 else
1442 {
1443 /* C.3 NSRN is set to 8. */
1444 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1445 goto on_stack;
1446 }
1447 }
1448
1449 ncrn = pcum->aapcs_ncrn;
1450 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1451 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1452
1453
1454 /* C6 - C9. though the sign and zero extension semantics are
1455 handled elsewhere. This is the case where the argument fits
1456 entirely general registers. */
1457 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1458 {
1459 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1460
1461 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1462
1463 /* C.8 if the argument has an alignment of 16 then the NGRN is
1464 rounded up to the next even number. */
1465 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1466 {
1467 ++ncrn;
1468 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1469 }
1470 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1471 A reg is still generated for it, but the caller should be smart
1472 enough not to use it. */
1473 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1474 {
1475 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1476 }
1477 else
1478 {
1479 rtx par;
1480 int i;
1481
1482 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1483 for (i = 0; i < nregs; i++)
1484 {
1485 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1486 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1487 GEN_INT (i * UNITS_PER_WORD));
1488 XVECEXP (par, 0, i) = tmp;
1489 }
1490 pcum->aapcs_reg = par;
1491 }
1492
1493 pcum->aapcs_nextncrn = ncrn + nregs;
1494 return;
1495 }
1496
1497 /* C.11 */
1498 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1499
1500 /* The argument is passed on stack; record the needed number of words for
1501 this argument (we can re-use NREGS) and align the total size if
1502 necessary. */
1503on_stack:
1504 pcum->aapcs_stack_words = nregs;
1505 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1506 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1507 16 / UNITS_PER_WORD) + 1;
1508 return;
1509}
1510
1511/* Implement TARGET_FUNCTION_ARG. */
1512
1513static rtx
1514aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1515 const_tree type, bool named)
1516{
1517 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1518 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1519
1520 if (mode == VOIDmode)
1521 return NULL_RTX;
1522
1523 aarch64_layout_arg (pcum_v, mode, type, named);
1524 return pcum->aapcs_reg;
1525}
1526
1527void
1528aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1529 const_tree fntype ATTRIBUTE_UNUSED,
1530 rtx libname ATTRIBUTE_UNUSED,
1531 const_tree fndecl ATTRIBUTE_UNUSED,
1532 unsigned n_named ATTRIBUTE_UNUSED)
1533{
1534 pcum->aapcs_ncrn = 0;
1535 pcum->aapcs_nvrn = 0;
1536 pcum->aapcs_nextncrn = 0;
1537 pcum->aapcs_nextnvrn = 0;
1538 pcum->pcs_variant = ARM_PCS_AAPCS64;
1539 pcum->aapcs_reg = NULL_RTX;
1540 pcum->aapcs_arg_processed = false;
1541 pcum->aapcs_stack_words = 0;
1542 pcum->aapcs_stack_size = 0;
1543
1544 return;
1545}
1546
1547static void
1548aarch64_function_arg_advance (cumulative_args_t pcum_v,
1549 enum machine_mode mode,
1550 const_tree type,
1551 bool named)
1552{
1553 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1554 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1555 {
1556 aarch64_layout_arg (pcum_v, mode, type, named);
1557 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1558 != (pcum->aapcs_stack_words != 0));
1559 pcum->aapcs_arg_processed = false;
1560 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1561 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1562 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1563 pcum->aapcs_stack_words = 0;
1564 pcum->aapcs_reg = NULL_RTX;
1565 }
1566}
1567
1568bool
1569aarch64_function_arg_regno_p (unsigned regno)
1570{
1571 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1572 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1573}
1574
1575/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1576 PARM_BOUNDARY bits of alignment, but will be given anything up
1577 to STACK_BOUNDARY bits if the type requires it. This makes sure
1578 that both before and after the layout of each argument, the Next
1579 Stacked Argument Address (NSAA) will have a minimum alignment of
1580 8 bytes. */
1581
1582static unsigned int
1583aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1584{
1585 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1586
1587 if (alignment < PARM_BOUNDARY)
1588 alignment = PARM_BOUNDARY;
1589 if (alignment > STACK_BOUNDARY)
1590 alignment = STACK_BOUNDARY;
1591 return alignment;
1592}
1593
1594/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1595
1596 Return true if an argument passed on the stack should be padded upwards,
1597 i.e. if the least-significant byte of the stack slot has useful data.
1598
1599 Small aggregate types are placed in the lowest memory address.
1600
1601 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1602
1603bool
1604aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1605{
1606 /* On little-endian targets, the least significant byte of every stack
1607 argument is passed at the lowest byte address of the stack slot. */
1608 if (!BYTES_BIG_ENDIAN)
1609 return true;
1610
00edcfbe 1611 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1612 the least significant byte of a stack argument is passed at the highest
1613 byte address of the stack slot. */
1614 if (type
00edcfbe
YZ
1615 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1616 || POINTER_TYPE_P (type))
43e9d192
IB
1617 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1618 return false;
1619
1620 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1621 return true;
1622}
1623
1624/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1625
1626 It specifies padding for the last (may also be the only)
1627 element of a block move between registers and memory. If
1628 assuming the block is in the memory, padding upward means that
1629 the last element is padded after its highest significant byte,
1630 while in downward padding, the last element is padded at the
1631 its least significant byte side.
1632
1633 Small aggregates and small complex types are always padded
1634 upwards.
1635
1636 We don't need to worry about homogeneous floating-point or
1637 short-vector aggregates; their move is not affected by the
1638 padding direction determined here. Regardless of endianness,
1639 each element of such an aggregate is put in the least
1640 significant bits of a fp/simd register.
1641
1642 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1643 register has useful data, and return the opposite if the most
1644 significant byte does. */
1645
1646bool
1647aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1648 bool first ATTRIBUTE_UNUSED)
1649{
1650
1651 /* Small composite types are always padded upward. */
1652 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1653 {
1654 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1655 : GET_MODE_SIZE (mode));
1656 if (size < 2 * UNITS_PER_WORD)
1657 return true;
1658 }
1659
1660 /* Otherwise, use the default padding. */
1661 return !BYTES_BIG_ENDIAN;
1662}
1663
1664static enum machine_mode
1665aarch64_libgcc_cmp_return_mode (void)
1666{
1667 return SImode;
1668}
1669
1670static bool
1671aarch64_frame_pointer_required (void)
1672{
1673 /* If the function contains dynamic stack allocations, we need to
1674 use the frame pointer to access the static parts of the frame. */
1675 if (cfun->calls_alloca)
1676 return true;
1677
1678 /* We may have turned flag_omit_frame_pointer on in order to have this
1679 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1680 and we'll check it here.
1681 If we really did set flag_omit_frame_pointer normally, then we return false
1682 (no frame pointer required) in all cases. */
1683
1684 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1685 return false;
1686 else if (flag_omit_leaf_frame_pointer)
1687 return !crtl->is_leaf;
1688 return true;
1689}
1690
1691/* Mark the registers that need to be saved by the callee and calculate
1692 the size of the callee-saved registers area and frame record (both FP
1693 and LR may be omitted). */
1694static void
1695aarch64_layout_frame (void)
1696{
1697 HOST_WIDE_INT offset = 0;
1698 int regno;
1699
1700 if (reload_completed && cfun->machine->frame.laid_out)
1701 return;
1702
1703 cfun->machine->frame.fp_lr_offset = 0;
1704
1705 /* First mark all the registers that really need to be saved... */
1706 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1707 cfun->machine->frame.reg_offset[regno] = -1;
1708
1709 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1710 cfun->machine->frame.reg_offset[regno] = -1;
1711
1712 /* ... that includes the eh data registers (if needed)... */
1713 if (crtl->calls_eh_return)
1714 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1715 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1716
1717 /* ... and any callee saved register that dataflow says is live. */
1718 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1719 if (df_regs_ever_live_p (regno)
1720 && !call_used_regs[regno])
1721 cfun->machine->frame.reg_offset[regno] = 0;
1722
1723 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1724 if (df_regs_ever_live_p (regno)
1725 && !call_used_regs[regno])
1726 cfun->machine->frame.reg_offset[regno] = 0;
1727
1728 if (frame_pointer_needed)
1729 {
1730 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1731 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1732 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1733 }
1734
1735 /* Now assign stack slots for them. */
1736 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1737 if (cfun->machine->frame.reg_offset[regno] != -1)
1738 {
1739 cfun->machine->frame.reg_offset[regno] = offset;
1740 offset += UNITS_PER_WORD;
1741 }
1742
1743 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1744 if (cfun->machine->frame.reg_offset[regno] != -1)
1745 {
1746 cfun->machine->frame.reg_offset[regno] = offset;
1747 offset += UNITS_PER_WORD;
1748 }
1749
1750 if (frame_pointer_needed)
1751 {
1752 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1753 offset += UNITS_PER_WORD;
1754 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1755 }
1756
1757 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1758 {
1759 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1760 offset += UNITS_PER_WORD;
1761 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1762 }
1763
1764 cfun->machine->frame.padding0 =
1765 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1766 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1767
1768 cfun->machine->frame.saved_regs_size = offset;
1769 cfun->machine->frame.laid_out = true;
1770}
1771
1772/* Make the last instruction frame-related and note that it performs
1773 the operation described by FRAME_PATTERN. */
1774
1775static void
1776aarch64_set_frame_expr (rtx frame_pattern)
1777{
1778 rtx insn;
1779
1780 insn = get_last_insn ();
1781 RTX_FRAME_RELATED_P (insn) = 1;
1782 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1783 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1784 frame_pattern,
1785 REG_NOTES (insn));
1786}
1787
1788static bool
1789aarch64_register_saved_on_entry (int regno)
1790{
1791 return cfun->machine->frame.reg_offset[regno] != -1;
1792}
1793
1794
1795static void
1796aarch64_save_or_restore_fprs (int start_offset, int increment,
1797 bool restore, rtx base_rtx)
1798
1799{
1800 unsigned regno;
1801 unsigned regno2;
1802 rtx insn;
e0f396bc
MS
1803 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1804 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
43e9d192
IB
1805
1806
1807 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1808 {
1809 if (aarch64_register_saved_on_entry (regno))
1810 {
1811 rtx mem;
1812 mem = gen_mem_ref (DFmode,
1813 plus_constant (Pmode,
1814 base_rtx,
1815 start_offset));
1816
1817 for (regno2 = regno + 1;
1818 regno2 <= V31_REGNUM
1819 && !aarch64_register_saved_on_entry (regno2);
1820 regno2++)
1821 {
1822 /* Empty loop. */
1823 }
1824 if (regno2 <= V31_REGNUM &&
1825 aarch64_register_saved_on_entry (regno2))
1826 {
1827 rtx mem2;
1828 /* Next highest register to be saved. */
1829 mem2 = gen_mem_ref (DFmode,
1830 plus_constant
1831 (Pmode,
1832 base_rtx,
1833 start_offset + increment));
1834 if (restore == false)
1835 {
1836 insn = emit_insn
1837 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1838 mem2, gen_rtx_REG (DFmode, regno2)));
1839
1840 }
1841 else
1842 {
1843 insn = emit_insn
1844 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1845 gen_rtx_REG (DFmode, regno2), mem2));
1846
e0f396bc
MS
1847 add_reg_note (insn, REG_CFA_RESTORE,
1848 gen_rtx_REG (DFmode, regno));
1849 add_reg_note (insn, REG_CFA_RESTORE,
1850 gen_rtx_REG (DFmode, regno2));
43e9d192
IB
1851 }
1852
1853 /* The first part of a frame-related parallel insn
1854 is always assumed to be relevant to the frame
1855 calculations; subsequent parts, are only
1856 frame-related if explicitly marked. */
e0f396bc 1857 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
43e9d192
IB
1858 regno = regno2;
1859 start_offset += increment * 2;
1860 }
1861 else
1862 {
1863 if (restore == false)
1864 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1865 else
1866 {
1867 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
e0f396bc
MS
1868 add_reg_note (insn, REG_CFA_RESTORE,
1869 gen_rtx_REG (DImode, regno));
43e9d192
IB
1870 }
1871 start_offset += increment;
1872 }
1873 RTX_FRAME_RELATED_P (insn) = 1;
1874 }
1875 }
1876
1877}
1878
1879
1880/* offset from the stack pointer of where the saves and
1881 restore's have to happen. */
1882static void
1883aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1884 bool restore)
1885{
1886 rtx insn;
1887 rtx base_rtx = stack_pointer_rtx;
1888 HOST_WIDE_INT start_offset = offset;
1889 HOST_WIDE_INT increment = UNITS_PER_WORD;
1890 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1891 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1892 unsigned regno;
1893 unsigned regno2;
1894
1895 for (regno = R0_REGNUM; regno <= limit; regno++)
1896 {
1897 if (aarch64_register_saved_on_entry (regno))
1898 {
1899 rtx mem;
1900 mem = gen_mem_ref (Pmode,
1901 plus_constant (Pmode,
1902 base_rtx,
1903 start_offset));
1904
1905 for (regno2 = regno + 1;
1906 regno2 <= limit
1907 && !aarch64_register_saved_on_entry (regno2);
1908 regno2++)
1909 {
1910 /* Empty loop. */
1911 }
1912 if (regno2 <= limit &&
1913 aarch64_register_saved_on_entry (regno2))
1914 {
1915 rtx mem2;
1916 /* Next highest register to be saved. */
1917 mem2 = gen_mem_ref (Pmode,
1918 plus_constant
1919 (Pmode,
1920 base_rtx,
1921 start_offset + increment));
1922 if (restore == false)
1923 {
1924 insn = emit_insn
1925 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1926 mem2, gen_rtx_REG (DImode, regno2)));
1927
1928 }
1929 else
1930 {
1931 insn = emit_insn
1932 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1933 gen_rtx_REG (DImode, regno2), mem2));
1934
1935 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1936 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1937 }
1938
1939 /* The first part of a frame-related parallel insn
1940 is always assumed to be relevant to the frame
1941 calculations; subsequent parts, are only
1942 frame-related if explicitly marked. */
1943 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1944 1)) = 1;
1945 regno = regno2;
1946 start_offset += increment * 2;
1947 }
1948 else
1949 {
1950 if (restore == false)
1951 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1952 else
1953 {
1954 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1955 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1956 }
1957 start_offset += increment;
1958 }
1959 RTX_FRAME_RELATED_P (insn) = 1;
1960 }
1961 }
1962
1963 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1964
1965}
1966
1967/* AArch64 stack frames generated by this compiler look like:
1968
1969 +-------------------------------+
1970 | |
1971 | incoming stack arguments |
1972 | |
1973 +-------------------------------+ <-- arg_pointer_rtx
1974 | |
1975 | callee-allocated save area |
1976 | for register varargs |
1977 | |
1978 +-------------------------------+
1979 | |
1980 | local variables |
1981 | |
1982 +-------------------------------+ <-- frame_pointer_rtx
1983 | |
1984 | callee-saved registers |
1985 | |
1986 +-------------------------------+
1987 | LR' |
1988 +-------------------------------+
1989 | FP' |
1990 P +-------------------------------+ <-- hard_frame_pointer_rtx
1991 | dynamic allocation |
1992 +-------------------------------+
1993 | |
1994 | outgoing stack arguments |
1995 | |
1996 +-------------------------------+ <-- stack_pointer_rtx
1997
1998 Dynamic stack allocations such as alloca insert data at point P.
1999 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2000 hard_frame_pointer_rtx unchanged. */
2001
2002/* Generate the prologue instructions for entry into a function.
2003 Establish the stack frame by decreasing the stack pointer with a
2004 properly calculated size and, if necessary, create a frame record
2005 filled with the values of LR and previous frame pointer. The
6991c977 2006 current FP is also set up if it is in use. */
43e9d192
IB
2007
2008void
2009aarch64_expand_prologue (void)
2010{
2011 /* sub sp, sp, #<frame_size>
2012 stp {fp, lr}, [sp, #<frame_size> - 16]
2013 add fp, sp, #<frame_size> - hardfp_offset
2014 stp {cs_reg}, [fp, #-16] etc.
2015
2016 sub sp, sp, <final_adjustment_if_any>
2017 */
2018 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2019 HOST_WIDE_INT frame_size, offset;
2020 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2021 rtx insn;
2022
2023 aarch64_layout_frame ();
2024 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2025 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2026 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2027 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2028 + crtl->outgoing_args_size);
2029 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2030 STACK_BOUNDARY / BITS_PER_UNIT);
2031
2032 if (flag_stack_usage_info)
2033 current_function_static_stack_size = frame_size;
2034
2035 fp_offset = (offset
2036 - original_frame_size
2037 - cfun->machine->frame.saved_regs_size);
2038
44c0e7b9 2039 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2040 if (offset >= 512)
2041 {
2042 /* When the frame has a large size, an initial decrease is done on
2043 the stack pointer to jump over the callee-allocated save area for
2044 register varargs, the local variable area and/or the callee-saved
2045 register area. This will allow the pre-index write-back
2046 store pair instructions to be used for setting up the stack frame
2047 efficiently. */
2048 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2049 if (offset >= 512)
2050 offset = cfun->machine->frame.saved_regs_size;
2051
2052 frame_size -= (offset + crtl->outgoing_args_size);
2053 fp_offset = 0;
2054
2055 if (frame_size >= 0x1000000)
2056 {
2057 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2058 emit_move_insn (op0, GEN_INT (-frame_size));
2059 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2060 aarch64_set_frame_expr (gen_rtx_SET
2061 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2062 plus_constant (Pmode,
2063 stack_pointer_rtx,
2064 -frame_size)));
43e9d192
IB
2065 }
2066 else if (frame_size > 0)
2067 {
2068 if ((frame_size & 0xfff) != frame_size)
2069 {
2070 insn = emit_insn (gen_add2_insn
2071 (stack_pointer_rtx,
2072 GEN_INT (-(frame_size
2073 & ~(HOST_WIDE_INT)0xfff))));
2074 RTX_FRAME_RELATED_P (insn) = 1;
2075 }
2076 if ((frame_size & 0xfff) != 0)
2077 {
2078 insn = emit_insn (gen_add2_insn
2079 (stack_pointer_rtx,
2080 GEN_INT (-(frame_size
2081 & (HOST_WIDE_INT)0xfff))));
2082 RTX_FRAME_RELATED_P (insn) = 1;
2083 }
2084 }
2085 }
2086 else
2087 frame_size = -1;
2088
2089 if (offset > 0)
2090 {
2091 /* Save the frame pointer and lr if the frame pointer is needed
2092 first. Make the frame pointer point to the location of the
2093 old frame pointer on the stack. */
2094 if (frame_pointer_needed)
2095 {
2096 rtx mem_fp, mem_lr;
2097
2098 if (fp_offset)
2099 {
2100 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2101 GEN_INT (-offset)));
2102 RTX_FRAME_RELATED_P (insn) = 1;
2103 aarch64_set_frame_expr (gen_rtx_SET
2104 (Pmode, stack_pointer_rtx,
2105 gen_rtx_MINUS (Pmode,
2106 stack_pointer_rtx,
2107 GEN_INT (offset))));
2108 mem_fp = gen_frame_mem (DImode,
2109 plus_constant (Pmode,
2110 stack_pointer_rtx,
2111 fp_offset));
2112 mem_lr = gen_frame_mem (DImode,
2113 plus_constant (Pmode,
2114 stack_pointer_rtx,
2115 fp_offset
2116 + UNITS_PER_WORD));
2117 insn = emit_insn (gen_store_pairdi (mem_fp,
2118 hard_frame_pointer_rtx,
2119 mem_lr,
2120 gen_rtx_REG (DImode,
2121 LR_REGNUM)));
2122 }
2123 else
2124 {
2125 insn = emit_insn (gen_storewb_pairdi_di
2126 (stack_pointer_rtx, stack_pointer_rtx,
2127 hard_frame_pointer_rtx,
2128 gen_rtx_REG (DImode, LR_REGNUM),
2129 GEN_INT (-offset),
2130 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2131 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2132 }
2133
2134 /* The first part of a frame-related parallel insn is always
2135 assumed to be relevant to the frame calculations;
2136 subsequent parts, are only frame-related if explicitly
2137 marked. */
2138 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2139 RTX_FRAME_RELATED_P (insn) = 1;
2140
2141 /* Set up frame pointer to point to the location of the
2142 previous frame pointer on the stack. */
2143 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2144 stack_pointer_rtx,
2145 GEN_INT (fp_offset)));
2146 aarch64_set_frame_expr (gen_rtx_SET
2147 (Pmode, hard_frame_pointer_rtx,
f6fe771a
RL
2148 plus_constant (Pmode,
2149 stack_pointer_rtx,
2150 fp_offset)));
43e9d192
IB
2151 RTX_FRAME_RELATED_P (insn) = 1;
2152 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2153 hard_frame_pointer_rtx));
2154 }
2155 else
2156 {
2157 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2158 GEN_INT (-offset)));
2159 RTX_FRAME_RELATED_P (insn) = 1;
2160 }
2161
2162 aarch64_save_or_restore_callee_save_registers
2163 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2164 }
2165
2166 /* when offset >= 512,
2167 sub sp, sp, #<outgoing_args_size> */
2168 if (frame_size > -1)
2169 {
2170 if (crtl->outgoing_args_size > 0)
2171 {
2172 insn = emit_insn (gen_add2_insn
2173 (stack_pointer_rtx,
2174 GEN_INT (- crtl->outgoing_args_size)));
2175 RTX_FRAME_RELATED_P (insn) = 1;
2176 }
2177 }
2178}
2179
2180/* Generate the epilogue instructions for returning from a function. */
2181void
2182aarch64_expand_epilogue (bool for_sibcall)
2183{
2184 HOST_WIDE_INT original_frame_size, frame_size, offset;
2185 HOST_WIDE_INT fp_offset;
2186 rtx insn;
44c0e7b9 2187 rtx cfa_reg;
43e9d192
IB
2188
2189 aarch64_layout_frame ();
2190 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2191 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2192 + crtl->outgoing_args_size);
2193 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2194 STACK_BOUNDARY / BITS_PER_UNIT);
2195
2196 fp_offset = (offset
2197 - original_frame_size
2198 - cfun->machine->frame.saved_regs_size);
2199
44c0e7b9
YZ
2200 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2201
2202 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2203 if (offset >= 512)
2204 {
2205 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2206 if (offset >= 512)
2207 offset = cfun->machine->frame.saved_regs_size;
2208
2209 frame_size -= (offset + crtl->outgoing_args_size);
2210 fp_offset = 0;
2211 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2212 {
2213 insn = emit_insn (gen_add2_insn
2214 (stack_pointer_rtx,
2215 GEN_INT (crtl->outgoing_args_size)));
2216 RTX_FRAME_RELATED_P (insn) = 1;
2217 }
2218 }
2219 else
2220 frame_size = -1;
2221
2222 /* If there were outgoing arguments or we've done dynamic stack
2223 allocation, then restore the stack pointer from the frame
2224 pointer. This is at most one insn and more efficient than using
2225 GCC's internal mechanism. */
2226 if (frame_pointer_needed
2227 && (crtl->outgoing_args_size || cfun->calls_alloca))
2228 {
2229 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2230 hard_frame_pointer_rtx,
2231 GEN_INT (- fp_offset)));
2232 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2233 /* As SP is set to (FP - fp_offset), according to the rules in
2234 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2235 from the value of SP from now on. */
2236 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2237 }
2238
2239 aarch64_save_or_restore_callee_save_registers
2240 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2241
2242 /* Restore the frame pointer and lr if the frame pointer is needed. */
2243 if (offset > 0)
2244 {
2245 if (frame_pointer_needed)
2246 {
2247 rtx mem_fp, mem_lr;
2248
2249 if (fp_offset)
2250 {
2251 mem_fp = gen_frame_mem (DImode,
2252 plus_constant (Pmode,
2253 stack_pointer_rtx,
2254 fp_offset));
2255 mem_lr = gen_frame_mem (DImode,
2256 plus_constant (Pmode,
2257 stack_pointer_rtx,
2258 fp_offset
2259 + UNITS_PER_WORD));
2260 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2261 mem_fp,
2262 gen_rtx_REG (DImode,
2263 LR_REGNUM),
2264 mem_lr));
2265 }
2266 else
2267 {
2268 insn = emit_insn (gen_loadwb_pairdi_di
2269 (stack_pointer_rtx,
2270 stack_pointer_rtx,
2271 hard_frame_pointer_rtx,
2272 gen_rtx_REG (DImode, LR_REGNUM),
2273 GEN_INT (offset),
2274 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2275 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2276 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2277 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2278 plus_constant (Pmode, cfa_reg,
2279 offset))));
43e9d192
IB
2280 }
2281
2282 /* The first part of a frame-related parallel insn
2283 is always assumed to be relevant to the frame
2284 calculations; subsequent parts, are only
2285 frame-related if explicitly marked. */
2286 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2287 RTX_FRAME_RELATED_P (insn) = 1;
2288 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2289 add_reg_note (insn, REG_CFA_RESTORE,
2290 gen_rtx_REG (DImode, LR_REGNUM));
2291
2292 if (fp_offset)
2293 {
2294 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2295 GEN_INT (offset)));
2296 RTX_FRAME_RELATED_P (insn) = 1;
2297 }
2298 }
43e9d192
IB
2299 else
2300 {
2301 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2302 GEN_INT (offset)));
2303 RTX_FRAME_RELATED_P (insn) = 1;
2304 }
2305 }
2306
2307 /* Stack adjustment for exception handler. */
2308 if (crtl->calls_eh_return)
2309 {
2310 /* We need to unwind the stack by the offset computed by
2311 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2312 based on SP. Ideally we would update the SP and define the
2313 CFA along the lines of:
2314
2315 SP = SP + EH_RETURN_STACKADJ_RTX
2316 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2317
2318 However the dwarf emitter only understands a constant
2319 register offset.
2320
631b20a7 2321 The solution chosen here is to use the otherwise unused IP0
43e9d192
IB
2322 as a temporary register to hold the current SP value. The
2323 CFA is described using IP0 then SP is modified. */
2324
2325 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2326
2327 insn = emit_move_insn (ip0, stack_pointer_rtx);
2328 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2329 RTX_FRAME_RELATED_P (insn) = 1;
2330
2331 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2332
2333 /* Ensure the assignment to IP0 does not get optimized away. */
2334 emit_use (ip0);
2335 }
2336
2337 if (frame_size > -1)
2338 {
2339 if (frame_size >= 0x1000000)
2340 {
2341 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2342 emit_move_insn (op0, GEN_INT (frame_size));
2343 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2344 aarch64_set_frame_expr (gen_rtx_SET
2345 (Pmode, stack_pointer_rtx,
f6fe771a
RL
2346 plus_constant (Pmode,
2347 stack_pointer_rtx,
2348 frame_size)));
43e9d192
IB
2349 }
2350 else if (frame_size > 0)
2351 {
2352 if ((frame_size & 0xfff) != 0)
2353 {
2354 insn = emit_insn (gen_add2_insn
2355 (stack_pointer_rtx,
2356 GEN_INT ((frame_size
2357 & (HOST_WIDE_INT) 0xfff))));
2358 RTX_FRAME_RELATED_P (insn) = 1;
2359 }
2360 if ((frame_size & 0xfff) != frame_size)
2361 {
2362 insn = emit_insn (gen_add2_insn
2363 (stack_pointer_rtx,
2364 GEN_INT ((frame_size
2365 & ~ (HOST_WIDE_INT) 0xfff))));
2366 RTX_FRAME_RELATED_P (insn) = 1;
2367 }
2368 }
2369
f6fe771a
RL
2370 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2371 plus_constant (Pmode,
2372 stack_pointer_rtx,
2373 offset)));
43e9d192
IB
2374 }
2375
2376 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2377 if (!for_sibcall)
2378 emit_jump_insn (ret_rtx);
2379}
2380
2381/* Return the place to copy the exception unwinding return address to.
2382 This will probably be a stack slot, but could (in theory be the
2383 return register). */
2384rtx
2385aarch64_final_eh_return_addr (void)
2386{
2387 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2388 aarch64_layout_frame ();
2389 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2390 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2391 + crtl->outgoing_args_size);
2392 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2393 STACK_BOUNDARY / BITS_PER_UNIT);
2394 fp_offset = offset
2395 - original_frame_size
2396 - cfun->machine->frame.saved_regs_size;
2397
2398 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2399 return gen_rtx_REG (DImode, LR_REGNUM);
2400
2401 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2402 result in a store to save LR introduced by builtin_eh_return () being
2403 incorrectly deleted because the alias is not detected.
2404 So in the calculation of the address to copy the exception unwinding
2405 return address to, we note 2 cases.
2406 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2407 we return a SP-relative location since all the addresses are SP-relative
2408 in this case. This prevents the store from being optimized away.
2409 If the fp_offset is not 0, then the addresses will be FP-relative and
2410 therefore we return a FP-relative location. */
2411
2412 if (frame_pointer_needed)
2413 {
2414 if (fp_offset)
2415 return gen_frame_mem (DImode,
2416 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2417 else
2418 return gen_frame_mem (DImode,
2419 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2420 }
2421
2422 /* If FP is not needed, we calculate the location of LR, which would be
2423 at the top of the saved registers block. */
2424
2425 return gen_frame_mem (DImode,
2426 plus_constant (Pmode,
2427 stack_pointer_rtx,
2428 fp_offset
2429 + cfun->machine->frame.saved_regs_size
2430 - 2 * UNITS_PER_WORD));
2431}
2432
2433/* Output code to build up a constant in a register. */
2434static void
d9600ae5 2435aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2436{
2437 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2438 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2439 else
2440 {
2441 int i;
2442 int ncount = 0;
2443 int zcount = 0;
2444 HOST_WIDE_INT valp = val >> 16;
2445 HOST_WIDE_INT valm;
2446 HOST_WIDE_INT tval;
2447
2448 for (i = 16; i < 64; i += 16)
2449 {
2450 valm = (valp & 0xffff);
2451
2452 if (valm != 0)
2453 ++ zcount;
2454
2455 if (valm != 0xffff)
2456 ++ ncount;
2457
2458 valp >>= 16;
2459 }
2460
2461 /* zcount contains the number of additional MOVK instructions
2462 required if the constant is built up with an initial MOVZ instruction,
2463 while ncount is the number of MOVK instructions required if starting
2464 with a MOVN instruction. Choose the sequence that yields the fewest
2465 number of instructions, preferring MOVZ instructions when they are both
2466 the same. */
2467 if (ncount < zcount)
2468 {
d9600ae5
SN
2469 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2470 GEN_INT ((~val) & 0xffff));
43e9d192
IB
2471 tval = 0xffff;
2472 }
2473 else
2474 {
d9600ae5
SN
2475 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2476 GEN_INT (val & 0xffff));
43e9d192
IB
2477 tval = 0;
2478 }
2479
2480 val >>= 16;
2481
2482 for (i = 16; i < 64; i += 16)
2483 {
2484 if ((val & 0xffff) != tval)
d9600ae5
SN
2485 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2486 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2487 val >>= 16;
2488 }
2489 }
2490}
2491
2492static void
d9600ae5 2493aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2494{
2495 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2496 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2497 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2498
2499 if (mdelta < 0)
2500 mdelta = -mdelta;
2501
2502 if (mdelta >= 4096 * 4096)
2503 {
d9600ae5
SN
2504 aarch64_build_constant (scratchreg, delta);
2505 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2506 }
2507 else if (mdelta > 0)
2508 {
43e9d192 2509 if (mdelta >= 4096)
d9600ae5
SN
2510 {
2511 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2512 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2513 if (delta < 0)
2514 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2515 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2516 else
2517 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2518 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2519 }
43e9d192 2520 if (mdelta % 4096 != 0)
d9600ae5
SN
2521 {
2522 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2523 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2524 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2525 }
43e9d192
IB
2526 }
2527}
2528
2529/* Output code to add DELTA to the first argument, and then jump
2530 to FUNCTION. Used for C++ multiple inheritance. */
2531static void
2532aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2533 HOST_WIDE_INT delta,
2534 HOST_WIDE_INT vcall_offset,
2535 tree function)
2536{
2537 /* The this pointer is always in x0. Note that this differs from
2538 Arm where the this pointer maybe bumped to r1 if r0 is required
2539 to return a pointer to an aggregate. On AArch64 a result value
2540 pointer will be in x8. */
2541 int this_regno = R0_REGNUM;
75f1d6fc 2542 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2543
75f1d6fc
SN
2544 reload_completed = 1;
2545 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2546
2547 if (vcall_offset == 0)
d9600ae5 2548 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2549 else
2550 {
28514dda 2551 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2552
75f1d6fc
SN
2553 this_rtx = gen_rtx_REG (Pmode, this_regno);
2554 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2555 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2556
75f1d6fc
SN
2557 addr = this_rtx;
2558 if (delta != 0)
2559 {
2560 if (delta >= -256 && delta < 256)
2561 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2562 plus_constant (Pmode, this_rtx, delta));
2563 else
d9600ae5 2564 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2565 }
2566
28514dda
YZ
2567 if (Pmode == ptr_mode)
2568 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2569 else
2570 aarch64_emit_move (temp0,
2571 gen_rtx_ZERO_EXTEND (Pmode,
2572 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2573
28514dda 2574 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2575 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2576 else
2577 {
d9600ae5 2578 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2579 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2580 }
2581
28514dda
YZ
2582 if (Pmode == ptr_mode)
2583 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2584 else
2585 aarch64_emit_move (temp1,
2586 gen_rtx_SIGN_EXTEND (Pmode,
2587 gen_rtx_MEM (ptr_mode, addr)));
2588
75f1d6fc 2589 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2590 }
2591
75f1d6fc
SN
2592 /* Generate a tail call to the target function. */
2593 if (!TREE_USED (function))
2594 {
2595 assemble_external (function);
2596 TREE_USED (function) = 1;
2597 }
2598 funexp = XEXP (DECL_RTL (function), 0);
2599 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2600 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2601 SIBLING_CALL_P (insn) = 1;
2602
2603 insn = get_insns ();
2604 shorten_branches (insn);
2605 final_start_function (insn, file, 1);
2606 final (insn, file, 1);
43e9d192 2607 final_end_function ();
75f1d6fc
SN
2608
2609 /* Stop pretending to be a post-reload pass. */
2610 reload_completed = 0;
43e9d192
IB
2611}
2612
43e9d192
IB
2613static int
2614aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2615{
2616 if (GET_CODE (*x) == SYMBOL_REF)
2617 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2618
2619 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2620 TLS offsets, not real symbol references. */
2621 if (GET_CODE (*x) == UNSPEC
2622 && XINT (*x, 1) == UNSPEC_TLS)
2623 return -1;
2624
2625 return 0;
2626}
2627
2628static bool
2629aarch64_tls_referenced_p (rtx x)
2630{
2631 if (!TARGET_HAVE_TLS)
2632 return false;
2633
2634 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2635}
2636
2637
2638static int
2639aarch64_bitmasks_cmp (const void *i1, const void *i2)
2640{
2641 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2642 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2643
2644 if (*imm1 < *imm2)
2645 return -1;
2646 if (*imm1 > *imm2)
2647 return +1;
2648 return 0;
2649}
2650
2651
2652static void
2653aarch64_build_bitmask_table (void)
2654{
2655 unsigned HOST_WIDE_INT mask, imm;
2656 unsigned int log_e, e, s, r;
2657 unsigned int nimms = 0;
2658
2659 for (log_e = 1; log_e <= 6; log_e++)
2660 {
2661 e = 1 << log_e;
2662 if (e == 64)
2663 mask = ~(HOST_WIDE_INT) 0;
2664 else
2665 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2666 for (s = 1; s < e; s++)
2667 {
2668 for (r = 0; r < e; r++)
2669 {
2670 /* set s consecutive bits to 1 (s < 64) */
2671 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2672 /* rotate right by r */
2673 if (r != 0)
2674 imm = ((imm >> r) | (imm << (e - r))) & mask;
2675 /* replicate the constant depending on SIMD size */
2676 switch (log_e) {
2677 case 1: imm |= (imm << 2);
2678 case 2: imm |= (imm << 4);
2679 case 3: imm |= (imm << 8);
2680 case 4: imm |= (imm << 16);
2681 case 5: imm |= (imm << 32);
2682 case 6:
2683 break;
2684 default:
2685 gcc_unreachable ();
2686 }
2687 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2688 aarch64_bitmasks[nimms++] = imm;
2689 }
2690 }
2691 }
2692
2693 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2694 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2695 aarch64_bitmasks_cmp);
2696}
2697
2698
2699/* Return true if val can be encoded as a 12-bit unsigned immediate with
2700 a left shift of 0 or 12 bits. */
2701bool
2702aarch64_uimm12_shift (HOST_WIDE_INT val)
2703{
2704 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2705 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2706 );
2707}
2708
2709
2710/* Return true if val is an immediate that can be loaded into a
2711 register by a MOVZ instruction. */
2712static bool
2713aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2714{
2715 if (GET_MODE_SIZE (mode) > 4)
2716 {
2717 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2718 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2719 return 1;
2720 }
2721 else
2722 {
2723 /* Ignore sign extension. */
2724 val &= (HOST_WIDE_INT) 0xffffffff;
2725 }
2726 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2727 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2728}
2729
2730
2731/* Return true if val is a valid bitmask immediate. */
2732bool
2733aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2734{
2735 if (GET_MODE_SIZE (mode) < 8)
2736 {
2737 /* Replicate bit pattern. */
2738 val &= (HOST_WIDE_INT) 0xffffffff;
2739 val |= val << 32;
2740 }
2741 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2742 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2743}
2744
2745
2746/* Return true if val is an immediate that can be loaded into a
2747 register in a single instruction. */
2748bool
2749aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2750{
2751 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2752 return 1;
2753 return aarch64_bitmask_imm (val, mode);
2754}
2755
2756static bool
2757aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2758{
2759 rtx base, offset;
7eda14e1 2760
43e9d192
IB
2761 if (GET_CODE (x) == HIGH)
2762 return true;
2763
2764 split_const (x, &base, &offset);
2765 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2766 {
2767 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2768 != SYMBOL_FORCE_TO_MEM)
2769 return true;
2770 else
2771 /* Avoid generating a 64-bit relocation in ILP32; leave
2772 to aarch64_expand_mov_immediate to handle it properly. */
2773 return mode != ptr_mode;
2774 }
43e9d192
IB
2775
2776 return aarch64_tls_referenced_p (x);
2777}
2778
2779/* Return true if register REGNO is a valid index register.
2780 STRICT_P is true if REG_OK_STRICT is in effect. */
2781
2782bool
2783aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2784{
2785 if (!HARD_REGISTER_NUM_P (regno))
2786 {
2787 if (!strict_p)
2788 return true;
2789
2790 if (!reg_renumber)
2791 return false;
2792
2793 regno = reg_renumber[regno];
2794 }
2795 return GP_REGNUM_P (regno);
2796}
2797
2798/* Return true if register REGNO is a valid base register for mode MODE.
2799 STRICT_P is true if REG_OK_STRICT is in effect. */
2800
2801bool
2802aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2803{
2804 if (!HARD_REGISTER_NUM_P (regno))
2805 {
2806 if (!strict_p)
2807 return true;
2808
2809 if (!reg_renumber)
2810 return false;
2811
2812 regno = reg_renumber[regno];
2813 }
2814
2815 /* The fake registers will be eliminated to either the stack or
2816 hard frame pointer, both of which are usually valid base registers.
2817 Reload deals with the cases where the eliminated form isn't valid. */
2818 return (GP_REGNUM_P (regno)
2819 || regno == SP_REGNUM
2820 || regno == FRAME_POINTER_REGNUM
2821 || regno == ARG_POINTER_REGNUM);
2822}
2823
2824/* Return true if X is a valid base register for mode MODE.
2825 STRICT_P is true if REG_OK_STRICT is in effect. */
2826
2827static bool
2828aarch64_base_register_rtx_p (rtx x, bool strict_p)
2829{
2830 if (!strict_p && GET_CODE (x) == SUBREG)
2831 x = SUBREG_REG (x);
2832
2833 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2834}
2835
2836/* Return true if address offset is a valid index. If it is, fill in INFO
2837 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2838
2839static bool
2840aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2841 enum machine_mode mode, bool strict_p)
2842{
2843 enum aarch64_address_type type;
2844 rtx index;
2845 int shift;
2846
2847 /* (reg:P) */
2848 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2849 && GET_MODE (x) == Pmode)
2850 {
2851 type = ADDRESS_REG_REG;
2852 index = x;
2853 shift = 0;
2854 }
2855 /* (sign_extend:DI (reg:SI)) */
2856 else if ((GET_CODE (x) == SIGN_EXTEND
2857 || GET_CODE (x) == ZERO_EXTEND)
2858 && GET_MODE (x) == DImode
2859 && GET_MODE (XEXP (x, 0)) == SImode)
2860 {
2861 type = (GET_CODE (x) == SIGN_EXTEND)
2862 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2863 index = XEXP (x, 0);
2864 shift = 0;
2865 }
2866 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2867 else if (GET_CODE (x) == MULT
2868 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2869 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2870 && GET_MODE (XEXP (x, 0)) == DImode
2871 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2872 && CONST_INT_P (XEXP (x, 1)))
2873 {
2874 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2875 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2876 index = XEXP (XEXP (x, 0), 0);
2877 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2878 }
2879 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2880 else if (GET_CODE (x) == ASHIFT
2881 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2882 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2883 && GET_MODE (XEXP (x, 0)) == DImode
2884 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2885 && CONST_INT_P (XEXP (x, 1)))
2886 {
2887 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2888 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2889 index = XEXP (XEXP (x, 0), 0);
2890 shift = INTVAL (XEXP (x, 1));
2891 }
2892 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2893 else if ((GET_CODE (x) == SIGN_EXTRACT
2894 || GET_CODE (x) == ZERO_EXTRACT)
2895 && GET_MODE (x) == DImode
2896 && GET_CODE (XEXP (x, 0)) == MULT
2897 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2898 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2899 {
2900 type = (GET_CODE (x) == SIGN_EXTRACT)
2901 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2902 index = XEXP (XEXP (x, 0), 0);
2903 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2904 if (INTVAL (XEXP (x, 1)) != 32 + shift
2905 || INTVAL (XEXP (x, 2)) != 0)
2906 shift = -1;
2907 }
2908 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2909 (const_int 0xffffffff<<shift)) */
2910 else if (GET_CODE (x) == AND
2911 && GET_MODE (x) == DImode
2912 && GET_CODE (XEXP (x, 0)) == MULT
2913 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2914 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2915 && CONST_INT_P (XEXP (x, 1)))
2916 {
2917 type = ADDRESS_REG_UXTW;
2918 index = XEXP (XEXP (x, 0), 0);
2919 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2920 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2921 shift = -1;
2922 }
2923 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2924 else if ((GET_CODE (x) == SIGN_EXTRACT
2925 || GET_CODE (x) == ZERO_EXTRACT)
2926 && GET_MODE (x) == DImode
2927 && GET_CODE (XEXP (x, 0)) == ASHIFT
2928 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2929 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2930 {
2931 type = (GET_CODE (x) == SIGN_EXTRACT)
2932 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2933 index = XEXP (XEXP (x, 0), 0);
2934 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2935 if (INTVAL (XEXP (x, 1)) != 32 + shift
2936 || INTVAL (XEXP (x, 2)) != 0)
2937 shift = -1;
2938 }
2939 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2940 (const_int 0xffffffff<<shift)) */
2941 else if (GET_CODE (x) == AND
2942 && GET_MODE (x) == DImode
2943 && GET_CODE (XEXP (x, 0)) == ASHIFT
2944 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2945 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2946 && CONST_INT_P (XEXP (x, 1)))
2947 {
2948 type = ADDRESS_REG_UXTW;
2949 index = XEXP (XEXP (x, 0), 0);
2950 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2951 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2952 shift = -1;
2953 }
2954 /* (mult:P (reg:P) (const_int scale)) */
2955 else if (GET_CODE (x) == MULT
2956 && GET_MODE (x) == Pmode
2957 && GET_MODE (XEXP (x, 0)) == Pmode
2958 && CONST_INT_P (XEXP (x, 1)))
2959 {
2960 type = ADDRESS_REG_REG;
2961 index = XEXP (x, 0);
2962 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2963 }
2964 /* (ashift:P (reg:P) (const_int shift)) */
2965 else if (GET_CODE (x) == ASHIFT
2966 && GET_MODE (x) == Pmode
2967 && GET_MODE (XEXP (x, 0)) == Pmode
2968 && CONST_INT_P (XEXP (x, 1)))
2969 {
2970 type = ADDRESS_REG_REG;
2971 index = XEXP (x, 0);
2972 shift = INTVAL (XEXP (x, 1));
2973 }
2974 else
2975 return false;
2976
2977 if (GET_CODE (index) == SUBREG)
2978 index = SUBREG_REG (index);
2979
2980 if ((shift == 0 ||
2981 (shift > 0 && shift <= 3
2982 && (1 << shift) == GET_MODE_SIZE (mode)))
2983 && REG_P (index)
2984 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2985 {
2986 info->type = type;
2987 info->offset = index;
2988 info->shift = shift;
2989 return true;
2990 }
2991
2992 return false;
2993}
2994
2995static inline bool
2996offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2997{
2998 return (offset >= -64 * GET_MODE_SIZE (mode)
2999 && offset < 64 * GET_MODE_SIZE (mode)
3000 && offset % GET_MODE_SIZE (mode) == 0);
3001}
3002
3003static inline bool
3004offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3005 HOST_WIDE_INT offset)
3006{
3007 return offset >= -256 && offset < 256;
3008}
3009
3010static inline bool
3011offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3012{
3013 return (offset >= 0
3014 && offset < 4096 * GET_MODE_SIZE (mode)
3015 && offset % GET_MODE_SIZE (mode) == 0);
3016}
3017
3018/* Return true if X is a valid address for machine mode MODE. If it is,
3019 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3020 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3021
3022static bool
3023aarch64_classify_address (struct aarch64_address_info *info,
3024 rtx x, enum machine_mode mode,
3025 RTX_CODE outer_code, bool strict_p)
3026{
3027 enum rtx_code code = GET_CODE (x);
3028 rtx op0, op1;
3029 bool allow_reg_index_p =
3030 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3031
3032 /* Don't support anything other than POST_INC or REG addressing for
3033 AdvSIMD. */
3034 if (aarch64_vector_mode_p (mode)
3035 && (code != POST_INC && code != REG))
3036 return false;
3037
3038 switch (code)
3039 {
3040 case REG:
3041 case SUBREG:
3042 info->type = ADDRESS_REG_IMM;
3043 info->base = x;
3044 info->offset = const0_rtx;
3045 return aarch64_base_register_rtx_p (x, strict_p);
3046
3047 case PLUS:
3048 op0 = XEXP (x, 0);
3049 op1 = XEXP (x, 1);
3050 if (GET_MODE_SIZE (mode) != 0
3051 && CONST_INT_P (op1)
3052 && aarch64_base_register_rtx_p (op0, strict_p))
3053 {
3054 HOST_WIDE_INT offset = INTVAL (op1);
3055
3056 info->type = ADDRESS_REG_IMM;
3057 info->base = op0;
3058 info->offset = op1;
3059
3060 /* TImode and TFmode values are allowed in both pairs of X
3061 registers and individual Q registers. The available
3062 address modes are:
3063 X,X: 7-bit signed scaled offset
3064 Q: 9-bit signed offset
3065 We conservatively require an offset representable in either mode.
3066 */
3067 if (mode == TImode || mode == TFmode)
3068 return (offset_7bit_signed_scaled_p (mode, offset)
3069 && offset_9bit_signed_unscaled_p (mode, offset));
3070
3071 if (outer_code == PARALLEL)
3072 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3073 && offset_7bit_signed_scaled_p (mode, offset));
3074 else
3075 return (offset_9bit_signed_unscaled_p (mode, offset)
3076 || offset_12bit_unsigned_scaled_p (mode, offset));
3077 }
3078
3079 if (allow_reg_index_p)
3080 {
3081 /* Look for base + (scaled/extended) index register. */
3082 if (aarch64_base_register_rtx_p (op0, strict_p)
3083 && aarch64_classify_index (info, op1, mode, strict_p))
3084 {
3085 info->base = op0;
3086 return true;
3087 }
3088 if (aarch64_base_register_rtx_p (op1, strict_p)
3089 && aarch64_classify_index (info, op0, mode, strict_p))
3090 {
3091 info->base = op1;
3092 return true;
3093 }
3094 }
3095
3096 return false;
3097
3098 case POST_INC:
3099 case POST_DEC:
3100 case PRE_INC:
3101 case PRE_DEC:
3102 info->type = ADDRESS_REG_WB;
3103 info->base = XEXP (x, 0);
3104 info->offset = NULL_RTX;
3105 return aarch64_base_register_rtx_p (info->base, strict_p);
3106
3107 case POST_MODIFY:
3108 case PRE_MODIFY:
3109 info->type = ADDRESS_REG_WB;
3110 info->base = XEXP (x, 0);
3111 if (GET_CODE (XEXP (x, 1)) == PLUS
3112 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3113 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3114 && aarch64_base_register_rtx_p (info->base, strict_p))
3115 {
3116 HOST_WIDE_INT offset;
3117 info->offset = XEXP (XEXP (x, 1), 1);
3118 offset = INTVAL (info->offset);
3119
3120 /* TImode and TFmode values are allowed in both pairs of X
3121 registers and individual Q registers. The available
3122 address modes are:
3123 X,X: 7-bit signed scaled offset
3124 Q: 9-bit signed offset
3125 We conservatively require an offset representable in either mode.
3126 */
3127 if (mode == TImode || mode == TFmode)
3128 return (offset_7bit_signed_scaled_p (mode, offset)
3129 && offset_9bit_signed_unscaled_p (mode, offset));
3130
3131 if (outer_code == PARALLEL)
3132 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3133 && offset_7bit_signed_scaled_p (mode, offset));
3134 else
3135 return offset_9bit_signed_unscaled_p (mode, offset);
3136 }
3137 return false;
3138
3139 case CONST:
3140 case SYMBOL_REF:
3141 case LABEL_REF:
79517551
SN
3142 /* load literal: pc-relative constant pool entry. Only supported
3143 for SI mode or larger. */
43e9d192 3144 info->type = ADDRESS_SYMBOLIC;
79517551 3145 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3146 {
3147 rtx sym, addend;
3148
3149 split_const (x, &sym, &addend);
3150 return (GET_CODE (sym) == LABEL_REF
3151 || (GET_CODE (sym) == SYMBOL_REF
3152 && CONSTANT_POOL_ADDRESS_P (sym)));
3153 }
3154 return false;
3155
3156 case LO_SUM:
3157 info->type = ADDRESS_LO_SUM;
3158 info->base = XEXP (x, 0);
3159 info->offset = XEXP (x, 1);
3160 if (allow_reg_index_p
3161 && aarch64_base_register_rtx_p (info->base, strict_p))
3162 {
3163 rtx sym, offs;
3164 split_const (info->offset, &sym, &offs);
3165 if (GET_CODE (sym) == SYMBOL_REF
3166 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3167 == SYMBOL_SMALL_ABSOLUTE))
3168 {
3169 /* The symbol and offset must be aligned to the access size. */
3170 unsigned int align;
3171 unsigned int ref_size;
3172
3173 if (CONSTANT_POOL_ADDRESS_P (sym))
3174 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3175 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3176 {
3177 tree exp = SYMBOL_REF_DECL (sym);
3178 align = TYPE_ALIGN (TREE_TYPE (exp));
3179 align = CONSTANT_ALIGNMENT (exp, align);
3180 }
3181 else if (SYMBOL_REF_DECL (sym))
3182 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3183 else
3184 align = BITS_PER_UNIT;
3185
3186 ref_size = GET_MODE_SIZE (mode);
3187 if (ref_size == 0)
3188 ref_size = GET_MODE_SIZE (DImode);
3189
3190 return ((INTVAL (offs) & (ref_size - 1)) == 0
3191 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3192 }
3193 }
3194 return false;
3195
3196 default:
3197 return false;
3198 }
3199}
3200
3201bool
3202aarch64_symbolic_address_p (rtx x)
3203{
3204 rtx offset;
3205
3206 split_const (x, &x, &offset);
3207 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3208}
3209
3210/* Classify the base of symbolic expression X, given that X appears in
3211 context CONTEXT. */
da4f13a4
MS
3212
3213enum aarch64_symbol_type
3214aarch64_classify_symbolic_expression (rtx x,
3215 enum aarch64_symbol_context context)
43e9d192
IB
3216{
3217 rtx offset;
da4f13a4 3218
43e9d192
IB
3219 split_const (x, &x, &offset);
3220 return aarch64_classify_symbol (x, context);
3221}
3222
3223
3224/* Return TRUE if X is a legitimate address for accessing memory in
3225 mode MODE. */
3226static bool
3227aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3228{
3229 struct aarch64_address_info addr;
3230
3231 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3232}
3233
3234/* Return TRUE if X is a legitimate address for accessing memory in
3235 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3236 pair operation. */
3237bool
3238aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3239 RTX_CODE outer_code, bool strict_p)
3240{
3241 struct aarch64_address_info addr;
3242
3243 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3244}
3245
3246/* Return TRUE if rtx X is immediate constant 0.0 */
3247bool
3520f7cc 3248aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3249{
3250 REAL_VALUE_TYPE r;
3251
3252 if (GET_MODE (x) == VOIDmode)
3253 return false;
3254
3255 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3256 if (REAL_VALUE_MINUS_ZERO (r))
3257 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3258 return REAL_VALUES_EQUAL (r, dconst0);
3259}
3260
70f09188
AP
3261/* Return the fixed registers used for condition codes. */
3262
3263static bool
3264aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3265{
3266 *p1 = CC_REGNUM;
3267 *p2 = INVALID_REGNUM;
3268 return true;
3269}
3270
43e9d192
IB
3271enum machine_mode
3272aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3273{
3274 /* All floating point compares return CCFP if it is an equality
3275 comparison, and CCFPE otherwise. */
3276 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3277 {
3278 switch (code)
3279 {
3280 case EQ:
3281 case NE:
3282 case UNORDERED:
3283 case ORDERED:
3284 case UNLT:
3285 case UNLE:
3286 case UNGT:
3287 case UNGE:
3288 case UNEQ:
3289 case LTGT:
3290 return CCFPmode;
3291
3292 case LT:
3293 case LE:
3294 case GT:
3295 case GE:
3296 return CCFPEmode;
3297
3298 default:
3299 gcc_unreachable ();
3300 }
3301 }
3302
3303 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3304 && y == const0_rtx
3305 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3306 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3307 || GET_CODE (x) == NEG))
43e9d192
IB
3308 return CC_NZmode;
3309
274b2532 3310 /* A compare with a shifted or negated operand. Because of canonicalization,
43e9d192
IB
3311 the comparison will have to be swapped when we emit the assembly
3312 code. */
3313 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3314 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3315 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3316 || GET_CODE (x) == LSHIFTRT
274b2532
KT
3317 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3318 || GET_CODE (x) == NEG))
43e9d192
IB
3319 return CC_SWPmode;
3320
3321 /* A compare of a mode narrower than SI mode against zero can be done
3322 by extending the value in the comparison. */
3323 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3324 && y == const0_rtx)
3325 /* Only use sign-extension if we really need it. */
3326 return ((code == GT || code == GE || code == LE || code == LT)
3327 ? CC_SESWPmode : CC_ZESWPmode);
3328
3329 /* For everything else, return CCmode. */
3330 return CCmode;
3331}
3332
3333static unsigned
3334aarch64_get_condition_code (rtx x)
3335{
3336 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3337 enum rtx_code comp_code = GET_CODE (x);
3338
3339 if (GET_MODE_CLASS (mode) != MODE_CC)
3340 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3341
3342 switch (mode)
3343 {
3344 case CCFPmode:
3345 case CCFPEmode:
3346 switch (comp_code)
3347 {
3348 case GE: return AARCH64_GE;
3349 case GT: return AARCH64_GT;
3350 case LE: return AARCH64_LS;
3351 case LT: return AARCH64_MI;
3352 case NE: return AARCH64_NE;
3353 case EQ: return AARCH64_EQ;
3354 case ORDERED: return AARCH64_VC;
3355 case UNORDERED: return AARCH64_VS;
3356 case UNLT: return AARCH64_LT;
3357 case UNLE: return AARCH64_LE;
3358 case UNGT: return AARCH64_HI;
3359 case UNGE: return AARCH64_PL;
3360 default: gcc_unreachable ();
3361 }
3362 break;
3363
3364 case CCmode:
3365 switch (comp_code)
3366 {
3367 case NE: return AARCH64_NE;
3368 case EQ: return AARCH64_EQ;
3369 case GE: return AARCH64_GE;
3370 case GT: return AARCH64_GT;
3371 case LE: return AARCH64_LE;
3372 case LT: return AARCH64_LT;
3373 case GEU: return AARCH64_CS;
3374 case GTU: return AARCH64_HI;
3375 case LEU: return AARCH64_LS;
3376 case LTU: return AARCH64_CC;
3377 default: gcc_unreachable ();
3378 }
3379 break;
3380
3381 case CC_SWPmode:
3382 case CC_ZESWPmode:
3383 case CC_SESWPmode:
3384 switch (comp_code)
3385 {
3386 case NE: return AARCH64_NE;
3387 case EQ: return AARCH64_EQ;
3388 case GE: return AARCH64_LE;
3389 case GT: return AARCH64_LT;
3390 case LE: return AARCH64_GE;
3391 case LT: return AARCH64_GT;
3392 case GEU: return AARCH64_LS;
3393 case GTU: return AARCH64_CC;
3394 case LEU: return AARCH64_CS;
3395 case LTU: return AARCH64_HI;
3396 default: gcc_unreachable ();
3397 }
3398 break;
3399
3400 case CC_NZmode:
3401 switch (comp_code)
3402 {
3403 case NE: return AARCH64_NE;
3404 case EQ: return AARCH64_EQ;
3405 case GE: return AARCH64_PL;
3406 case LT: return AARCH64_MI;
3407 default: gcc_unreachable ();
3408 }
3409 break;
3410
3411 default:
3412 gcc_unreachable ();
3413 break;
3414 }
3415}
3416
3417static unsigned
3418bit_count (unsigned HOST_WIDE_INT value)
3419{
3420 unsigned count = 0;
3421
3422 while (value)
3423 {
3424 count++;
3425 value &= value - 1;
3426 }
3427
3428 return count;
3429}
3430
3431void
3432aarch64_print_operand (FILE *f, rtx x, char code)
3433{
3434 switch (code)
3435 {
f541a481
KT
3436 /* An integer or symbol address without a preceding # sign. */
3437 case 'c':
3438 switch (GET_CODE (x))
3439 {
3440 case CONST_INT:
3441 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3442 break;
3443
3444 case SYMBOL_REF:
3445 output_addr_const (f, x);
3446 break;
3447
3448 case CONST:
3449 if (GET_CODE (XEXP (x, 0)) == PLUS
3450 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3451 {
3452 output_addr_const (f, x);
3453 break;
3454 }
3455 /* Fall through. */
3456
3457 default:
3458 output_operand_lossage ("Unsupported operand for code '%c'", code);
3459 }
3460 break;
3461
43e9d192
IB
3462 case 'e':
3463 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3464 {
3465 int n;
3466
3467 if (GET_CODE (x) != CONST_INT
3468 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3469 {
3470 output_operand_lossage ("invalid operand for '%%%c'", code);
3471 return;
3472 }
3473
3474 switch (n)
3475 {
3476 case 3:
3477 fputc ('b', f);
3478 break;
3479 case 4:
3480 fputc ('h', f);
3481 break;
3482 case 5:
3483 fputc ('w', f);
3484 break;
3485 default:
3486 output_operand_lossage ("invalid operand for '%%%c'", code);
3487 return;
3488 }
3489 }
3490 break;
3491
3492 case 'p':
3493 {
3494 int n;
3495
3496 /* Print N such that 2^N == X. */
3497 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3498 {
3499 output_operand_lossage ("invalid operand for '%%%c'", code);
3500 return;
3501 }
3502
3503 asm_fprintf (f, "%d", n);
3504 }
3505 break;
3506
3507 case 'P':
3508 /* Print the number of non-zero bits in X (a const_int). */
3509 if (GET_CODE (x) != CONST_INT)
3510 {
3511 output_operand_lossage ("invalid operand for '%%%c'", code);
3512 return;
3513 }
3514
3515 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3516 break;
3517
3518 case 'H':
3519 /* Print the higher numbered register of a pair (TImode) of regs. */
3520 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3521 {
3522 output_operand_lossage ("invalid operand for '%%%c'", code);
3523 return;
3524 }
3525
01a3a324 3526 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3527 break;
3528
43e9d192
IB
3529 case 'm':
3530 /* Print a condition (eq, ne, etc). */
3531
3532 /* CONST_TRUE_RTX means always -- that's the default. */
3533 if (x == const_true_rtx)
3534 return;
3535
3536 if (!COMPARISON_P (x))
3537 {
3538 output_operand_lossage ("invalid operand for '%%%c'", code);
3539 return;
3540 }
3541
3542 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3543 break;
3544
3545 case 'M':
3546 /* Print the inverse of a condition (eq <-> ne, etc). */
3547
3548 /* CONST_TRUE_RTX means never -- that's the default. */
3549 if (x == const_true_rtx)
3550 {
3551 fputs ("nv", f);
3552 return;
3553 }
3554
3555 if (!COMPARISON_P (x))
3556 {
3557 output_operand_lossage ("invalid operand for '%%%c'", code);
3558 return;
3559 }
3560
3561 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3562 (aarch64_get_condition_code (x))], f);
3563 break;
3564
3565 case 'b':
3566 case 'h':
3567 case 's':
3568 case 'd':
3569 case 'q':
3570 /* Print a scalar FP/SIMD register name. */
3571 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3572 {
3573 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3574 return;
3575 }
50ce6f88 3576 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3577 break;
3578
3579 case 'S':
3580 case 'T':
3581 case 'U':
3582 case 'V':
3583 /* Print the first FP/SIMD register name in a list. */
3584 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3585 {
3586 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3587 return;
3588 }
50ce6f88 3589 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3590 break;
3591
a05c0ddf 3592 case 'X':
50d38551 3593 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3594 if (GET_CODE (x) != CONST_INT)
3595 {
3596 output_operand_lossage ("invalid operand for '%%%c'", code);
3597 return;
3598 }
50d38551 3599 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3600 break;
3601
43e9d192
IB
3602 case 'w':
3603 case 'x':
3604 /* Print a general register name or the zero register (32-bit or
3605 64-bit). */
3520f7cc
JG
3606 if (x == const0_rtx
3607 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3608 {
50ce6f88 3609 asm_fprintf (f, "%czr", code);
43e9d192
IB
3610 break;
3611 }
3612
3613 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3614 {
50ce6f88 3615 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3616 break;
3617 }
3618
3619 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3620 {
50ce6f88 3621 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3622 break;
3623 }
3624
3625 /* Fall through */
3626
3627 case 0:
3628 /* Print a normal operand, if it's a general register, then we
3629 assume DImode. */
3630 if (x == NULL)
3631 {
3632 output_operand_lossage ("missing operand");
3633 return;
3634 }
3635
3636 switch (GET_CODE (x))
3637 {
3638 case REG:
01a3a324 3639 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3640 break;
3641
3642 case MEM:
3643 aarch64_memory_reference_mode = GET_MODE (x);
3644 output_address (XEXP (x, 0));
3645 break;
3646
3647 case LABEL_REF:
3648 case SYMBOL_REF:
3649 output_addr_const (asm_out_file, x);
3650 break;
3651
3652 case CONST_INT:
3653 asm_fprintf (f, "%wd", INTVAL (x));
3654 break;
3655
3656 case CONST_VECTOR:
3520f7cc
JG
3657 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3658 {
3659 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3660 HOST_WIDE_INT_MIN,
3661 HOST_WIDE_INT_MAX));
3662 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3663 }
3664 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3665 {
3666 fputc ('0', f);
3667 }
3668 else
3669 gcc_unreachable ();
43e9d192
IB
3670 break;
3671
3520f7cc
JG
3672 case CONST_DOUBLE:
3673 /* CONST_DOUBLE can represent a double-width integer.
3674 In this case, the mode of x is VOIDmode. */
3675 if (GET_MODE (x) == VOIDmode)
3676 ; /* Do Nothing. */
3677 else if (aarch64_float_const_zero_rtx_p (x))
3678 {
3679 fputc ('0', f);
3680 break;
3681 }
3682 else if (aarch64_float_const_representable_p (x))
3683 {
3684#define buf_size 20
3685 char float_buf[buf_size] = {'\0'};
3686 REAL_VALUE_TYPE r;
3687 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3688 real_to_decimal_for_mode (float_buf, &r,
3689 buf_size, buf_size,
3690 1, GET_MODE (x));
3691 asm_fprintf (asm_out_file, "%s", float_buf);
3692 break;
3693#undef buf_size
3694 }
3695 output_operand_lossage ("invalid constant");
3696 return;
43e9d192
IB
3697 default:
3698 output_operand_lossage ("invalid operand");
3699 return;
3700 }
3701 break;
3702
3703 case 'A':
3704 if (GET_CODE (x) == HIGH)
3705 x = XEXP (x, 0);
3706
3707 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3708 {
3709 case SYMBOL_SMALL_GOT:
3710 asm_fprintf (asm_out_file, ":got:");
3711 break;
3712
3713 case SYMBOL_SMALL_TLSGD:
3714 asm_fprintf (asm_out_file, ":tlsgd:");
3715 break;
3716
3717 case SYMBOL_SMALL_TLSDESC:
3718 asm_fprintf (asm_out_file, ":tlsdesc:");
3719 break;
3720
3721 case SYMBOL_SMALL_GOTTPREL:
3722 asm_fprintf (asm_out_file, ":gottprel:");
3723 break;
3724
3725 case SYMBOL_SMALL_TPREL:
3726 asm_fprintf (asm_out_file, ":tprel:");
3727 break;
3728
87dd8ab0
MS
3729 case SYMBOL_TINY_GOT:
3730 gcc_unreachable ();
3731 break;
3732
43e9d192
IB
3733 default:
3734 break;
3735 }
3736 output_addr_const (asm_out_file, x);
3737 break;
3738
3739 case 'L':
3740 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3741 {
3742 case SYMBOL_SMALL_GOT:
3743 asm_fprintf (asm_out_file, ":lo12:");
3744 break;
3745
3746 case SYMBOL_SMALL_TLSGD:
3747 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3748 break;
3749
3750 case SYMBOL_SMALL_TLSDESC:
3751 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3752 break;
3753
3754 case SYMBOL_SMALL_GOTTPREL:
3755 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3756 break;
3757
3758 case SYMBOL_SMALL_TPREL:
3759 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3760 break;
3761
87dd8ab0
MS
3762 case SYMBOL_TINY_GOT:
3763 asm_fprintf (asm_out_file, ":got:");
3764 break;
3765
43e9d192
IB
3766 default:
3767 break;
3768 }
3769 output_addr_const (asm_out_file, x);
3770 break;
3771
3772 case 'G':
3773
3774 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3775 {
3776 case SYMBOL_SMALL_TPREL:
3777 asm_fprintf (asm_out_file, ":tprel_hi12:");
3778 break;
3779 default:
3780 break;
3781 }
3782 output_addr_const (asm_out_file, x);
3783 break;
3784
3785 default:
3786 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3787 return;
3788 }
3789}
3790
3791void
3792aarch64_print_operand_address (FILE *f, rtx x)
3793{
3794 struct aarch64_address_info addr;
3795
3796 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3797 MEM, true))
3798 switch (addr.type)
3799 {
3800 case ADDRESS_REG_IMM:
3801 if (addr.offset == const0_rtx)
01a3a324 3802 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3803 else
01a3a324 3804 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3805 INTVAL (addr.offset));
3806 return;
3807
3808 case ADDRESS_REG_REG:
3809 if (addr.shift == 0)
01a3a324
N
3810 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3811 reg_names [REGNO (addr.offset)]);
43e9d192 3812 else
01a3a324
N
3813 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3814 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3815 return;
3816
3817 case ADDRESS_REG_UXTW:
3818 if (addr.shift == 0)
01a3a324 3819 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3820 REGNO (addr.offset) - R0_REGNUM);
3821 else
01a3a324 3822 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3823 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3824 return;
3825
3826 case ADDRESS_REG_SXTW:
3827 if (addr.shift == 0)
01a3a324 3828 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3829 REGNO (addr.offset) - R0_REGNUM);
3830 else
01a3a324 3831 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3832 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3833 return;
3834
3835 case ADDRESS_REG_WB:
3836 switch (GET_CODE (x))
3837 {
3838 case PRE_INC:
01a3a324 3839 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3840 GET_MODE_SIZE (aarch64_memory_reference_mode));
3841 return;
3842 case POST_INC:
01a3a324 3843 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3844 GET_MODE_SIZE (aarch64_memory_reference_mode));
3845 return;
3846 case PRE_DEC:
01a3a324 3847 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3848 GET_MODE_SIZE (aarch64_memory_reference_mode));
3849 return;
3850 case POST_DEC:
01a3a324 3851 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3852 GET_MODE_SIZE (aarch64_memory_reference_mode));
3853 return;
3854 case PRE_MODIFY:
01a3a324 3855 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3856 INTVAL (addr.offset));
3857 return;
3858 case POST_MODIFY:
01a3a324 3859 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3860 INTVAL (addr.offset));
3861 return;
3862 default:
3863 break;
3864 }
3865 break;
3866
3867 case ADDRESS_LO_SUM:
01a3a324 3868 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3869 output_addr_const (f, addr.offset);
3870 asm_fprintf (f, "]");
3871 return;
3872
3873 case ADDRESS_SYMBOLIC:
3874 break;
3875 }
3876
3877 output_addr_const (f, x);
3878}
3879
43e9d192
IB
3880bool
3881aarch64_label_mentioned_p (rtx x)
3882{
3883 const char *fmt;
3884 int i;
3885
3886 if (GET_CODE (x) == LABEL_REF)
3887 return true;
3888
3889 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3890 referencing instruction, but they are constant offsets, not
3891 symbols. */
3892 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3893 return false;
3894
3895 fmt = GET_RTX_FORMAT (GET_CODE (x));
3896 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3897 {
3898 if (fmt[i] == 'E')
3899 {
3900 int j;
3901
3902 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3903 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3904 return 1;
3905 }
3906 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3907 return 1;
3908 }
3909
3910 return 0;
3911}
3912
3913/* Implement REGNO_REG_CLASS. */
3914
3915enum reg_class
3916aarch64_regno_regclass (unsigned regno)
3917{
3918 if (GP_REGNUM_P (regno))
3919 return CORE_REGS;
3920
3921 if (regno == SP_REGNUM)
3922 return STACK_REG;
3923
3924 if (regno == FRAME_POINTER_REGNUM
3925 || regno == ARG_POINTER_REGNUM)
f24bb080 3926 return POINTER_REGS;
43e9d192
IB
3927
3928 if (FP_REGNUM_P (regno))
3929 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3930
3931 return NO_REGS;
3932}
3933
3934/* Try a machine-dependent way of reloading an illegitimate address
3935 operand. If we find one, push the reload and return the new rtx. */
3936
3937rtx
3938aarch64_legitimize_reload_address (rtx *x_p,
3939 enum machine_mode mode,
3940 int opnum, int type,
3941 int ind_levels ATTRIBUTE_UNUSED)
3942{
3943 rtx x = *x_p;
3944
3945 /* Do not allow mem (plus (reg, const)) if vector mode. */
3946 if (aarch64_vector_mode_p (mode)
3947 && GET_CODE (x) == PLUS
3948 && REG_P (XEXP (x, 0))
3949 && CONST_INT_P (XEXP (x, 1)))
3950 {
3951 rtx orig_rtx = x;
3952 x = copy_rtx (x);
3953 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3954 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3955 opnum, (enum reload_type) type);
3956 return x;
3957 }
3958
3959 /* We must recognize output that we have already generated ourselves. */
3960 if (GET_CODE (x) == PLUS
3961 && GET_CODE (XEXP (x, 0)) == PLUS
3962 && REG_P (XEXP (XEXP (x, 0), 0))
3963 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3964 && CONST_INT_P (XEXP (x, 1)))
3965 {
3966 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3967 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3968 opnum, (enum reload_type) type);
3969 return x;
3970 }
3971
3972 /* We wish to handle large displacements off a base register by splitting
3973 the addend across an add and the mem insn. This can cut the number of
3974 extra insns needed from 3 to 1. It is only useful for load/store of a
3975 single register with 12 bit offset field. */
3976 if (GET_CODE (x) == PLUS
3977 && REG_P (XEXP (x, 0))
3978 && CONST_INT_P (XEXP (x, 1))
3979 && HARD_REGISTER_P (XEXP (x, 0))
3980 && mode != TImode
3981 && mode != TFmode
3982 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3983 {
3984 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3985 HOST_WIDE_INT low = val & 0xfff;
3986 HOST_WIDE_INT high = val - low;
3987 HOST_WIDE_INT offs;
3988 rtx cst;
28514dda
YZ
3989 enum machine_mode xmode = GET_MODE (x);
3990
3991 /* In ILP32, xmode can be either DImode or SImode. */
3992 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
3993
3994 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3995 BLKmode alignment. */
3996 if (GET_MODE_SIZE (mode) == 0)
3997 return NULL_RTX;
3998
3999 offs = low % GET_MODE_SIZE (mode);
4000
4001 /* Align misaligned offset by adjusting high part to compensate. */
4002 if (offs != 0)
4003 {
4004 if (aarch64_uimm12_shift (high + offs))
4005 {
4006 /* Align down. */
4007 low = low - offs;
4008 high = high + offs;
4009 }
4010 else
4011 {
4012 /* Align up. */
4013 offs = GET_MODE_SIZE (mode) - offs;
4014 low = low + offs;
4015 high = high + (low & 0x1000) - offs;
4016 low &= 0xfff;
4017 }
4018 }
4019
4020 /* Check for overflow. */
4021 if (high + low != val)
4022 return NULL_RTX;
4023
4024 cst = GEN_INT (high);
4025 if (!aarch64_uimm12_shift (high))
28514dda 4026 cst = force_const_mem (xmode, cst);
43e9d192
IB
4027
4028 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4029 in the mem instruction.
4030 Note that replacing this gen_rtx_PLUS with plus_constant is
4031 wrong in this case because we rely on the
4032 (plus (plus reg c1) c2) structure being preserved so that
4033 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4034 x = gen_rtx_PLUS (xmode,
4035 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4036 GEN_INT (low));
43e9d192
IB
4037
4038 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4039 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4040 opnum, (enum reload_type) type);
4041 return x;
4042 }
4043
4044 return NULL_RTX;
4045}
4046
4047
4048static reg_class_t
4049aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4050 reg_class_t rclass,
4051 enum machine_mode mode,
4052 secondary_reload_info *sri)
4053{
43e9d192
IB
4054 /* Without the TARGET_SIMD instructions we cannot move a Q register
4055 to a Q register directly. We need a scratch. */
4056 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4057 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4058 && reg_class_subset_p (rclass, FP_REGS))
4059 {
4060 if (mode == TFmode)
4061 sri->icode = CODE_FOR_aarch64_reload_movtf;
4062 else if (mode == TImode)
4063 sri->icode = CODE_FOR_aarch64_reload_movti;
4064 return NO_REGS;
4065 }
4066
4067 /* A TFmode or TImode memory access should be handled via an FP_REGS
4068 because AArch64 has richer addressing modes for LDR/STR instructions
4069 than LDP/STP instructions. */
4070 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4071 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4072 return FP_REGS;
4073
4074 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4075 return CORE_REGS;
4076
4077 return NO_REGS;
4078}
4079
4080static bool
4081aarch64_can_eliminate (const int from, const int to)
4082{
4083 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4084 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4085
4086 if (frame_pointer_needed)
4087 {
4088 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4089 return true;
4090 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4091 return false;
4092 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4093 && !cfun->calls_alloca)
4094 return true;
4095 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4096 return true;
4097 return false;
4098 }
4099 else
4100 {
777e6976
IB
4101 /* If we decided that we didn't need a leaf frame pointer but then used
4102 LR in the function, then we'll want a frame pointer after all, so
4103 prevent this elimination to ensure a frame pointer is used.
4104
4105 NOTE: the original value of flag_omit_frame_pointer gets trashed
4106 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4107 of faked_omit_frame_pointer here (which is true when we always
4108 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4109 pointers when LR is clobbered). */
43e9d192 4110 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
777e6976
IB
4111 && df_regs_ever_live_p (LR_REGNUM)
4112 && faked_omit_frame_pointer)
43e9d192
IB
4113 return false;
4114 }
777e6976 4115
43e9d192
IB
4116 return true;
4117}
4118
4119HOST_WIDE_INT
4120aarch64_initial_elimination_offset (unsigned from, unsigned to)
4121{
4122 HOST_WIDE_INT frame_size;
4123 HOST_WIDE_INT offset;
4124
4125 aarch64_layout_frame ();
4126 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4127 + crtl->outgoing_args_size
4128 + cfun->machine->saved_varargs_size);
4129
4130 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4131 offset = frame_size;
4132
4133 if (to == HARD_FRAME_POINTER_REGNUM)
4134 {
4135 if (from == ARG_POINTER_REGNUM)
4136 return offset - crtl->outgoing_args_size;
4137
4138 if (from == FRAME_POINTER_REGNUM)
6991c977 4139 return cfun->machine->frame.saved_regs_size + get_frame_size ();
43e9d192
IB
4140 }
4141
4142 if (to == STACK_POINTER_REGNUM)
4143 {
4144 if (from == FRAME_POINTER_REGNUM)
4145 {
4146 HOST_WIDE_INT elim = crtl->outgoing_args_size
4147 + cfun->machine->frame.saved_regs_size
6991c977 4148 + get_frame_size ()
43e9d192
IB
4149 - cfun->machine->frame.fp_lr_offset;
4150 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4151 return elim;
4152 }
4153 }
4154
4155 return offset;
4156}
4157
4158
4159/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4160 previous frame. */
4161
4162rtx
4163aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4164{
4165 if (count != 0)
4166 return const0_rtx;
4167 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4168}
4169
4170
4171static void
4172aarch64_asm_trampoline_template (FILE *f)
4173{
28514dda
YZ
4174 if (TARGET_ILP32)
4175 {
4176 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4177 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4178 }
4179 else
4180 {
4181 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4182 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4183 }
01a3a324 4184 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4185 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4186 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4187 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4188}
4189
4190static void
4191aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4192{
4193 rtx fnaddr, mem, a_tramp;
28514dda 4194 const int tramp_code_sz = 16;
43e9d192
IB
4195
4196 /* Don't need to copy the trailing D-words, we fill those in below. */
4197 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4198 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4199 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4200 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4201 if (GET_MODE (fnaddr) != ptr_mode)
4202 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4203 emit_move_insn (mem, fnaddr);
4204
28514dda 4205 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4206 emit_move_insn (mem, chain_value);
4207
4208 /* XXX We should really define a "clear_cache" pattern and use
4209 gen_clear_cache(). */
4210 a_tramp = XEXP (m_tramp, 0);
4211 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4212 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4213 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4214 ptr_mode);
43e9d192
IB
4215}
4216
4217static unsigned char
4218aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4219{
4220 switch (regclass)
4221 {
4222 case CORE_REGS:
4223 case POINTER_REGS:
4224 case GENERAL_REGS:
4225 case ALL_REGS:
4226 case FP_REGS:
4227 case FP_LO_REGS:
4228 return
4229 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4230 (GET_MODE_SIZE (mode) + 7) / 8;
4231 case STACK_REG:
4232 return 1;
4233
4234 case NO_REGS:
4235 return 0;
4236
4237 default:
4238 break;
4239 }
4240 gcc_unreachable ();
4241}
4242
4243static reg_class_t
78d8b9f0 4244aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4245{
51bb310d 4246 if (regclass == POINTER_REGS)
78d8b9f0
IB
4247 return GENERAL_REGS;
4248
51bb310d
MS
4249 if (regclass == STACK_REG)
4250 {
4251 if (REG_P(x)
4252 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4253 return regclass;
4254
4255 return NO_REGS;
4256 }
4257
78d8b9f0
IB
4258 /* If it's an integer immediate that MOVI can't handle, then
4259 FP_REGS is not an option, so we return NO_REGS instead. */
4260 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4261 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4262 return NO_REGS;
4263
27bd251b
IB
4264 /* Register eliminiation can result in a request for
4265 SP+constant->FP_REGS. We cannot support such operations which
4266 use SP as source and an FP_REG as destination, so reject out
4267 right now. */
4268 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4269 {
4270 rtx lhs = XEXP (x, 0);
4271
4272 /* Look through a possible SUBREG introduced by ILP32. */
4273 if (GET_CODE (lhs) == SUBREG)
4274 lhs = SUBREG_REG (lhs);
4275
4276 gcc_assert (REG_P (lhs));
4277 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4278 POINTER_REGS));
4279 return NO_REGS;
4280 }
4281
78d8b9f0 4282 return regclass;
43e9d192
IB
4283}
4284
4285void
4286aarch64_asm_output_labelref (FILE* f, const char *name)
4287{
4288 asm_fprintf (f, "%U%s", name);
4289}
4290
4291static void
4292aarch64_elf_asm_constructor (rtx symbol, int priority)
4293{
4294 if (priority == DEFAULT_INIT_PRIORITY)
4295 default_ctor_section_asm_out_constructor (symbol, priority);
4296 else
4297 {
4298 section *s;
4299 char buf[18];
4300 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4301 s = get_section (buf, SECTION_WRITE, NULL);
4302 switch_to_section (s);
4303 assemble_align (POINTER_SIZE);
28514dda 4304 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4305 }
4306}
4307
4308static void
4309aarch64_elf_asm_destructor (rtx symbol, int priority)
4310{
4311 if (priority == DEFAULT_INIT_PRIORITY)
4312 default_dtor_section_asm_out_destructor (symbol, priority);
4313 else
4314 {
4315 section *s;
4316 char buf[18];
4317 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4318 s = get_section (buf, SECTION_WRITE, NULL);
4319 switch_to_section (s);
4320 assemble_align (POINTER_SIZE);
28514dda 4321 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4322 }
4323}
4324
4325const char*
4326aarch64_output_casesi (rtx *operands)
4327{
4328 char buf[100];
4329 char label[100];
592a16fc 4330 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
43e9d192
IB
4331 int index;
4332 static const char *const patterns[4][2] =
4333 {
4334 {
4335 "ldrb\t%w3, [%0,%w1,uxtw]",
4336 "add\t%3, %4, %w3, sxtb #2"
4337 },
4338 {
4339 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4340 "add\t%3, %4, %w3, sxth #2"
4341 },
4342 {
4343 "ldr\t%w3, [%0,%w1,uxtw #2]",
4344 "add\t%3, %4, %w3, sxtw #2"
4345 },
4346 /* We assume that DImode is only generated when not optimizing and
4347 that we don't really need 64-bit address offsets. That would
4348 imply an object file with 8GB of code in a single function! */
4349 {
4350 "ldr\t%w3, [%0,%w1,uxtw #2]",
4351 "add\t%3, %4, %w3, sxtw #2"
4352 }
4353 };
4354
4355 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4356
4357 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4358
4359 gcc_assert (index >= 0 && index <= 3);
4360
4361 /* Need to implement table size reduction, by chaning the code below. */
4362 output_asm_insn (patterns[index][0], operands);
4363 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4364 snprintf (buf, sizeof (buf),
4365 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4366 output_asm_insn (buf, operands);
4367 output_asm_insn (patterns[index][1], operands);
4368 output_asm_insn ("br\t%3", operands);
4369 assemble_label (asm_out_file, label);
4370 return "";
4371}
4372
4373
4374/* Return size in bits of an arithmetic operand which is shifted/scaled and
4375 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4376 operator. */
4377
4378int
4379aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4380{
4381 if (shift >= 0 && shift <= 3)
4382 {
4383 int size;
4384 for (size = 8; size <= 32; size *= 2)
4385 {
4386 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4387 if (mask == bits << shift)
4388 return size;
4389 }
4390 }
4391 return 0;
4392}
4393
4394static bool
4395aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4396 const_rtx x ATTRIBUTE_UNUSED)
4397{
4398 /* We can't use blocks for constants when we're using a per-function
4399 constant pool. */
4400 return false;
4401}
4402
4403static section *
4404aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4405 rtx x ATTRIBUTE_UNUSED,
4406 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4407{
4408 /* Force all constant pool entries into the current function section. */
4409 return function_section (current_function_decl);
4410}
4411
4412
4413/* Costs. */
4414
4415/* Helper function for rtx cost calculation. Strip a shift expression
4416 from X. Returns the inner operand if successful, or the original
4417 expression on failure. */
4418static rtx
4419aarch64_strip_shift (rtx x)
4420{
4421 rtx op = x;
4422
4423 if ((GET_CODE (op) == ASHIFT
4424 || GET_CODE (op) == ASHIFTRT
4425 || GET_CODE (op) == LSHIFTRT)
4426 && CONST_INT_P (XEXP (op, 1)))
4427 return XEXP (op, 0);
4428
4429 if (GET_CODE (op) == MULT
4430 && CONST_INT_P (XEXP (op, 1))
4431 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4432 return XEXP (op, 0);
4433
4434 return x;
4435}
4436
4437/* Helper function for rtx cost calculation. Strip a shift or extend
4438 expression from X. Returns the inner operand if successful, or the
4439 original expression on failure. We deal with a number of possible
4440 canonicalization variations here. */
4441static rtx
4442aarch64_strip_shift_or_extend (rtx x)
4443{
4444 rtx op = x;
4445
4446 /* Zero and sign extraction of a widened value. */
4447 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4448 && XEXP (op, 2) == const0_rtx
4449 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4450 XEXP (op, 1)))
4451 return XEXP (XEXP (op, 0), 0);
4452
4453 /* It can also be represented (for zero-extend) as an AND with an
4454 immediate. */
4455 if (GET_CODE (op) == AND
4456 && GET_CODE (XEXP (op, 0)) == MULT
4457 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4458 && CONST_INT_P (XEXP (op, 1))
4459 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4460 INTVAL (XEXP (op, 1))) != 0)
4461 return XEXP (XEXP (op, 0), 0);
4462
4463 /* Now handle extended register, as this may also have an optional
4464 left shift by 1..4. */
4465 if (GET_CODE (op) == ASHIFT
4466 && CONST_INT_P (XEXP (op, 1))
4467 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4468 op = XEXP (op, 0);
4469
4470 if (GET_CODE (op) == ZERO_EXTEND
4471 || GET_CODE (op) == SIGN_EXTEND)
4472 op = XEXP (op, 0);
4473
4474 if (op != x)
4475 return op;
4476
4477 return aarch64_strip_shift (x);
4478}
4479
4480/* Calculate the cost of calculating X, storing it in *COST. Result
4481 is true if the total cost of the operation has now been calculated. */
4482static bool
4483aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4484 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4485{
4486 rtx op0, op1;
73250c4c 4487 const struct cpu_cost_table *extra_cost
43e9d192
IB
4488 = aarch64_tune_params->insn_extra_cost;
4489
4490 switch (code)
4491 {
4492 case SET:
4493 op0 = SET_DEST (x);
4494 op1 = SET_SRC (x);
4495
4496 switch (GET_CODE (op0))
4497 {
4498 case MEM:
4499 if (speed)
73250c4c 4500 *cost += extra_cost->ldst.store;
43e9d192
IB
4501
4502 if (op1 != const0_rtx)
4503 *cost += rtx_cost (op1, SET, 1, speed);
4504 return true;
4505
4506 case SUBREG:
4507 if (! REG_P (SUBREG_REG (op0)))
4508 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4509 /* Fall through. */
4510 case REG:
4511 /* Cost is just the cost of the RHS of the set. */
4512 *cost += rtx_cost (op1, SET, 1, true);
4513 return true;
4514
4515 case ZERO_EXTRACT: /* Bit-field insertion. */
4516 case SIGN_EXTRACT:
4517 /* Strip any redundant widening of the RHS to meet the width of
4518 the target. */
4519 if (GET_CODE (op1) == SUBREG)
4520 op1 = SUBREG_REG (op1);
4521 if ((GET_CODE (op1) == ZERO_EXTEND
4522 || GET_CODE (op1) == SIGN_EXTEND)
4523 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4524 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4525 >= INTVAL (XEXP (op0, 1))))
4526 op1 = XEXP (op1, 0);
4527 *cost += rtx_cost (op1, SET, 1, speed);
4528 return true;
4529
4530 default:
4531 break;
4532 }
4533 return false;
4534
4535 case MEM:
4536 if (speed)
73250c4c 4537 *cost += extra_cost->ldst.load;
43e9d192
IB
4538
4539 return true;
4540
4541 case NEG:
4542 op0 = CONST0_RTX (GET_MODE (x));
4543 op1 = XEXP (x, 0);
4544 goto cost_minus;
4545
4546 case COMPARE:
4547 op0 = XEXP (x, 0);
4548 op1 = XEXP (x, 1);
4549
4550 if (op1 == const0_rtx
4551 && GET_CODE (op0) == AND)
4552 {
4553 x = op0;
4554 goto cost_logic;
4555 }
4556
4557 /* Comparisons can work if the order is swapped.
4558 Canonicalization puts the more complex operation first, but
4559 we want it in op1. */
4560 if (! (REG_P (op0)
4561 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4562 {
4563 op0 = XEXP (x, 1);
4564 op1 = XEXP (x, 0);
4565 }
4566 goto cost_minus;
4567
4568 case MINUS:
4569 op0 = XEXP (x, 0);
4570 op1 = XEXP (x, 1);
4571
4572 cost_minus:
4573 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4574 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4575 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4576 {
4577 if (op0 != const0_rtx)
4578 *cost += rtx_cost (op0, MINUS, 0, speed);
4579
4580 if (CONST_INT_P (op1))
4581 {
4582 if (!aarch64_uimm12_shift (INTVAL (op1)))
4583 *cost += rtx_cost (op1, MINUS, 1, speed);
4584 }
4585 else
4586 {
4587 op1 = aarch64_strip_shift_or_extend (op1);
4588 *cost += rtx_cost (op1, MINUS, 1, speed);
4589 }
4590 return true;
4591 }
4592
4593 return false;
4594
4595 case PLUS:
4596 op0 = XEXP (x, 0);
4597 op1 = XEXP (x, 1);
4598
4599 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4600 {
4601 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4602 {
4603 *cost += rtx_cost (op0, PLUS, 0, speed);
4604 }
4605 else
4606 {
4607 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4608
4609 if (new_op0 == op0
4610 && GET_CODE (op0) == MULT)
4611 {
4612 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4613 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4614 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4615 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4616 {
4617 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4618 speed)
4619 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4620 speed)
4621 + rtx_cost (op1, PLUS, 1, speed));
4622 if (speed)
73250c4c
KT
4623 *cost +=
4624 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
43e9d192
IB
4625 return true;
4626 }
4627 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4628 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4629 + rtx_cost (op1, PLUS, 1, speed));
4630
4631 if (speed)
73250c4c 4632 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
43e9d192
IB
4633 }
4634
4635 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4636 + rtx_cost (op1, PLUS, 1, speed));
4637 }
4638 return true;
4639 }
4640
4641 return false;
4642
4643 case IOR:
4644 case XOR:
4645 case AND:
4646 cost_logic:
4647 op0 = XEXP (x, 0);
4648 op1 = XEXP (x, 1);
4649
4650 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4651 {
4652 if (CONST_INT_P (op1)
4653 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4654 {
4655 *cost += rtx_cost (op0, AND, 0, speed);
4656 }
4657 else
4658 {
4659 if (GET_CODE (op0) == NOT)
4660 op0 = XEXP (op0, 0);
4661 op0 = aarch64_strip_shift (op0);
4662 *cost += (rtx_cost (op0, AND, 0, speed)
4663 + rtx_cost (op1, AND, 1, speed));
4664 }
4665 return true;
4666 }
4667 return false;
4668
4669 case ZERO_EXTEND:
4670 if ((GET_MODE (x) == DImode
4671 && GET_MODE (XEXP (x, 0)) == SImode)
4672 || GET_CODE (XEXP (x, 0)) == MEM)
4673 {
4674 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4675 return true;
4676 }
4677 return false;
4678
4679 case SIGN_EXTEND:
4680 if (GET_CODE (XEXP (x, 0)) == MEM)
4681 {
4682 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4683 return true;
4684 }
4685 return false;
4686
4687 case ROTATE:
4688 if (!CONST_INT_P (XEXP (x, 1)))
4689 *cost += COSTS_N_INSNS (2);
4690 /* Fall through. */
4691 case ROTATERT:
4692 case LSHIFTRT:
4693 case ASHIFT:
4694 case ASHIFTRT:
4695
4696 /* Shifting by a register often takes an extra cycle. */
4697 if (speed && !CONST_INT_P (XEXP (x, 1)))
73250c4c 4698 *cost += extra_cost->alu.arith_shift_reg;
43e9d192
IB
4699
4700 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4701 return true;
4702
4703 case HIGH:
4704 if (!CONSTANT_P (XEXP (x, 0)))
4705 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4706 return true;
4707
4708 case LO_SUM:
4709 if (!CONSTANT_P (XEXP (x, 1)))
4710 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4711 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4712 return true;
4713
4714 case ZERO_EXTRACT:
4715 case SIGN_EXTRACT:
4716 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4717 return true;
4718
4719 case MULT:
4720 op0 = XEXP (x, 0);
4721 op1 = XEXP (x, 1);
4722
4723 *cost = COSTS_N_INSNS (1);
4724 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4725 {
4726 if (CONST_INT_P (op1)
4727 && exact_log2 (INTVAL (op1)) > 0)
4728 {
4729 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4730 return true;
4731 }
4732
4733 if ((GET_CODE (op0) == ZERO_EXTEND
4734 && GET_CODE (op1) == ZERO_EXTEND)
4735 || (GET_CODE (op0) == SIGN_EXTEND
4736 && GET_CODE (op1) == SIGN_EXTEND))
4737 {
4738 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4739 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4740 if (speed)
73250c4c 4741 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
43e9d192
IB
4742 return true;
4743 }
4744
4745 if (speed)
73250c4c 4746 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
43e9d192
IB
4747 }
4748 else if (speed)
4749 {
4750 if (GET_MODE (x) == DFmode)
73250c4c 4751 *cost += extra_cost->fp[1].mult;
43e9d192 4752 else if (GET_MODE (x) == SFmode)
73250c4c 4753 *cost += extra_cost->fp[0].mult;
43e9d192
IB
4754 }
4755
4756 return false; /* All arguments need to be in registers. */
4757
4758 case MOD:
4759 case UMOD:
4760 *cost = COSTS_N_INSNS (2);
4761 if (speed)
4762 {
4763 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
4764 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4765 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 4766 else if (GET_MODE (x) == DFmode)
73250c4c
KT
4767 *cost += (extra_cost->fp[1].mult
4768 + extra_cost->fp[1].div);
43e9d192 4769 else if (GET_MODE (x) == SFmode)
73250c4c
KT
4770 *cost += (extra_cost->fp[0].mult
4771 + extra_cost->fp[0].div);
43e9d192
IB
4772 }
4773 return false; /* All arguments need to be in registers. */
4774
4775 case DIV:
4776 case UDIV:
4777 *cost = COSTS_N_INSNS (1);
4778 if (speed)
4779 {
4780 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c 4781 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
43e9d192 4782 else if (GET_MODE (x) == DFmode)
73250c4c 4783 *cost += extra_cost->fp[1].div;
43e9d192 4784 else if (GET_MODE (x) == SFmode)
73250c4c 4785 *cost += extra_cost->fp[0].div;
43e9d192
IB
4786 }
4787 return false; /* All arguments need to be in registers. */
4788
4789 default:
4790 break;
4791 }
4792 return false;
4793}
4794
4795static int
4796aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4797 enum machine_mode mode ATTRIBUTE_UNUSED,
4798 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4799{
4800 enum rtx_code c = GET_CODE (x);
4801 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4802
4803 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4804 return addr_cost->pre_modify;
4805
4806 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4807 return addr_cost->post_modify;
4808
4809 if (c == PLUS)
4810 {
4811 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4812 return addr_cost->imm_offset;
4813 else if (GET_CODE (XEXP (x, 0)) == MULT
4814 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4815 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4816 return addr_cost->register_extend;
4817
4818 return addr_cost->register_offset;
4819 }
4820 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4821 return addr_cost->imm_offset;
4822
4823 return 0;
4824}
4825
4826static int
4827aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4828 reg_class_t from, reg_class_t to)
4829{
4830 const struct cpu_regmove_cost *regmove_cost
4831 = aarch64_tune_params->regmove_cost;
4832
4833 if (from == GENERAL_REGS && to == GENERAL_REGS)
4834 return regmove_cost->GP2GP;
4835 else if (from == GENERAL_REGS)
4836 return regmove_cost->GP2FP;
4837 else if (to == GENERAL_REGS)
4838 return regmove_cost->FP2GP;
4839
4840 /* When AdvSIMD instructions are disabled it is not possible to move
4841 a 128-bit value directly between Q registers. This is handled in
4842 secondary reload. A general register is used as a scratch to move
4843 the upper DI value and the lower DI value is moved directly,
4844 hence the cost is the sum of three moves. */
4845
4846 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4847 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4848
4849 return regmove_cost->FP2FP;
4850}
4851
4852static int
4853aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4854 reg_class_t rclass ATTRIBUTE_UNUSED,
4855 bool in ATTRIBUTE_UNUSED)
4856{
4857 return aarch64_tune_params->memmov_cost;
4858}
4859
8990e73a
TB
4860/* Vectorizer cost model target hooks. */
4861
4862/* Implement targetm.vectorize.builtin_vectorization_cost. */
4863static int
4864aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4865 tree vectype,
4866 int misalign ATTRIBUTE_UNUSED)
4867{
4868 unsigned elements;
4869
4870 switch (type_of_cost)
4871 {
4872 case scalar_stmt:
4873 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4874
4875 case scalar_load:
4876 return aarch64_tune_params->vec_costs->scalar_load_cost;
4877
4878 case scalar_store:
4879 return aarch64_tune_params->vec_costs->scalar_store_cost;
4880
4881 case vector_stmt:
4882 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4883
4884 case vector_load:
4885 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4886
4887 case vector_store:
4888 return aarch64_tune_params->vec_costs->vec_store_cost;
4889
4890 case vec_to_scalar:
4891 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4892
4893 case scalar_to_vec:
4894 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4895
4896 case unaligned_load:
4897 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4898
4899 case unaligned_store:
4900 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4901
4902 case cond_branch_taken:
4903 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4904
4905 case cond_branch_not_taken:
4906 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4907
4908 case vec_perm:
4909 case vec_promote_demote:
4910 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4911
4912 case vec_construct:
4913 elements = TYPE_VECTOR_SUBPARTS (vectype);
4914 return elements / 2 + 1;
4915
4916 default:
4917 gcc_unreachable ();
4918 }
4919}
4920
4921/* Implement targetm.vectorize.add_stmt_cost. */
4922static unsigned
4923aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4924 struct _stmt_vec_info *stmt_info, int misalign,
4925 enum vect_cost_model_location where)
4926{
4927 unsigned *cost = (unsigned *) data;
4928 unsigned retval = 0;
4929
4930 if (flag_vect_cost_model)
4931 {
4932 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4933 int stmt_cost =
4934 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4935
4936 /* Statements in an inner loop relative to the loop being
4937 vectorized are weighted more heavily. The value here is
4938 a function (linear for now) of the loop nest level. */
4939 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4940 {
4941 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4942 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4943 unsigned nest_level = loop_depth (loop);
4944
4945 count *= nest_level;
4946 }
4947
4948 retval = (unsigned) (count * stmt_cost);
4949 cost[where] += retval;
4950 }
4951
4952 return retval;
4953}
4954
43e9d192
IB
4955static void initialize_aarch64_code_model (void);
4956
4957/* Parse the architecture extension string. */
4958
4959static void
4960aarch64_parse_extension (char *str)
4961{
4962 /* The extension string is parsed left to right. */
4963 const struct aarch64_option_extension *opt = NULL;
4964
4965 /* Flag to say whether we are adding or removing an extension. */
4966 int adding_ext = -1;
4967
4968 while (str != NULL && *str != 0)
4969 {
4970 char *ext;
4971 size_t len;
4972
4973 str++;
4974 ext = strchr (str, '+');
4975
4976 if (ext != NULL)
4977 len = ext - str;
4978 else
4979 len = strlen (str);
4980
4981 if (len >= 2 && strncmp (str, "no", 2) == 0)
4982 {
4983 adding_ext = 0;
4984 len -= 2;
4985 str += 2;
4986 }
4987 else if (len > 0)
4988 adding_ext = 1;
4989
4990 if (len == 0)
4991 {
4992 error ("missing feature modifier after %qs", "+no");
4993 return;
4994 }
4995
4996 /* Scan over the extensions table trying to find an exact match. */
4997 for (opt = all_extensions; opt->name != NULL; opt++)
4998 {
4999 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5000 {
5001 /* Add or remove the extension. */
5002 if (adding_ext)
5003 aarch64_isa_flags |= opt->flags_on;
5004 else
5005 aarch64_isa_flags &= ~(opt->flags_off);
5006 break;
5007 }
5008 }
5009
5010 if (opt->name == NULL)
5011 {
5012 /* Extension not found in list. */
5013 error ("unknown feature modifier %qs", str);
5014 return;
5015 }
5016
5017 str = ext;
5018 };
5019
5020 return;
5021}
5022
5023/* Parse the ARCH string. */
5024
5025static void
5026aarch64_parse_arch (void)
5027{
5028 char *ext;
5029 const struct processor *arch;
5030 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5031 size_t len;
5032
5033 strcpy (str, aarch64_arch_string);
5034
5035 ext = strchr (str, '+');
5036
5037 if (ext != NULL)
5038 len = ext - str;
5039 else
5040 len = strlen (str);
5041
5042 if (len == 0)
5043 {
5044 error ("missing arch name in -march=%qs", str);
5045 return;
5046 }
5047
5048 /* Loop through the list of supported ARCHs to find a match. */
5049 for (arch = all_architectures; arch->name != NULL; arch++)
5050 {
5051 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5052 {
5053 selected_arch = arch;
5054 aarch64_isa_flags = selected_arch->flags;
5055 selected_cpu = &all_cores[selected_arch->core];
5056
5057 if (ext != NULL)
5058 {
5059 /* ARCH string contains at least one extension. */
5060 aarch64_parse_extension (ext);
5061 }
5062
5063 return;
5064 }
5065 }
5066
5067 /* ARCH name not found in list. */
5068 error ("unknown value %qs for -march", str);
5069 return;
5070}
5071
5072/* Parse the CPU string. */
5073
5074static void
5075aarch64_parse_cpu (void)
5076{
5077 char *ext;
5078 const struct processor *cpu;
5079 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5080 size_t len;
5081
5082 strcpy (str, aarch64_cpu_string);
5083
5084 ext = strchr (str, '+');
5085
5086 if (ext != NULL)
5087 len = ext - str;
5088 else
5089 len = strlen (str);
5090
5091 if (len == 0)
5092 {
5093 error ("missing cpu name in -mcpu=%qs", str);
5094 return;
5095 }
5096
5097 /* Loop through the list of supported CPUs to find a match. */
5098 for (cpu = all_cores; cpu->name != NULL; cpu++)
5099 {
5100 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5101 {
5102 selected_cpu = cpu;
5103 aarch64_isa_flags = selected_cpu->flags;
5104
5105 if (ext != NULL)
5106 {
5107 /* CPU string contains at least one extension. */
5108 aarch64_parse_extension (ext);
5109 }
5110
5111 return;
5112 }
5113 }
5114
5115 /* CPU name not found in list. */
5116 error ("unknown value %qs for -mcpu", str);
5117 return;
5118}
5119
5120/* Parse the TUNE string. */
5121
5122static void
5123aarch64_parse_tune (void)
5124{
5125 const struct processor *cpu;
5126 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5127 strcpy (str, aarch64_tune_string);
5128
5129 /* Loop through the list of supported CPUs to find a match. */
5130 for (cpu = all_cores; cpu->name != NULL; cpu++)
5131 {
5132 if (strcmp (cpu->name, str) == 0)
5133 {
5134 selected_tune = cpu;
5135 return;
5136 }
5137 }
5138
5139 /* CPU name not found in list. */
5140 error ("unknown value %qs for -mtune", str);
5141 return;
5142}
5143
5144
5145/* Implement TARGET_OPTION_OVERRIDE. */
5146
5147static void
5148aarch64_override_options (void)
5149{
5150 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5151 otherwise march remains undefined. mtune can be used with either march or
5152 mcpu. */
5153
5154 if (aarch64_arch_string)
5155 {
5156 aarch64_parse_arch ();
5157 aarch64_cpu_string = NULL;
5158 }
5159
5160 if (aarch64_cpu_string)
5161 {
5162 aarch64_parse_cpu ();
5163 selected_arch = NULL;
5164 }
5165
5166 if (aarch64_tune_string)
5167 {
5168 aarch64_parse_tune ();
5169 }
5170
5171 initialize_aarch64_code_model ();
5172
5173 aarch64_build_bitmask_table ();
5174
5175 /* This target defaults to strict volatile bitfields. */
5176 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5177 flag_strict_volatile_bitfields = 1;
5178
5179 /* If the user did not specify a processor, choose the default
5180 one for them. This will be the CPU set during configuration using
02fdbd5b 5181 --with-cpu, otherwise it is "coretex-a53". */
43e9d192
IB
5182 if (!selected_cpu)
5183 {
5184 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5185 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5186 }
5187
5188 gcc_assert (selected_cpu);
5189
5190 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5191 if (!selected_tune)
5192 selected_tune = &all_cores[selected_cpu->core];
5193
5194 aarch64_tune_flags = selected_tune->flags;
5195 aarch64_tune = selected_tune->core;
5196 aarch64_tune_params = selected_tune->tune;
5197
5198 aarch64_override_options_after_change ();
5199}
5200
5201/* Implement targetm.override_options_after_change. */
5202
5203static void
5204aarch64_override_options_after_change (void)
5205{
5206 faked_omit_frame_pointer = false;
5207
5208 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5209 that aarch64_frame_pointer_required will be called. We need to remember
5210 whether flag_omit_frame_pointer was turned on normally or just faked. */
5211
5212 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5213 {
5214 flag_omit_frame_pointer = true;
5215 faked_omit_frame_pointer = true;
5216 }
5217}
5218
5219static struct machine_function *
5220aarch64_init_machine_status (void)
5221{
5222 struct machine_function *machine;
5223 machine = ggc_alloc_cleared_machine_function ();
5224 return machine;
5225}
5226
5227void
5228aarch64_init_expanders (void)
5229{
5230 init_machine_status = aarch64_init_machine_status;
5231}
5232
5233/* A checking mechanism for the implementation of the various code models. */
5234static void
5235initialize_aarch64_code_model (void)
5236{
5237 if (flag_pic)
5238 {
5239 switch (aarch64_cmodel_var)
5240 {
5241 case AARCH64_CMODEL_TINY:
5242 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5243 break;
5244 case AARCH64_CMODEL_SMALL:
5245 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5246 break;
5247 case AARCH64_CMODEL_LARGE:
5248 sorry ("code model %qs with -f%s", "large",
5249 flag_pic > 1 ? "PIC" : "pic");
5250 default:
5251 gcc_unreachable ();
5252 }
5253 }
5254 else
5255 aarch64_cmodel = aarch64_cmodel_var;
5256}
5257
5258/* Return true if SYMBOL_REF X binds locally. */
5259
5260static bool
5261aarch64_symbol_binds_local_p (const_rtx x)
5262{
5263 return (SYMBOL_REF_DECL (x)
5264 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5265 : SYMBOL_REF_LOCAL_P (x));
5266}
5267
5268/* Return true if SYMBOL_REF X is thread local */
5269static bool
5270aarch64_tls_symbol_p (rtx x)
5271{
5272 if (! TARGET_HAVE_TLS)
5273 return false;
5274
5275 if (GET_CODE (x) != SYMBOL_REF)
5276 return false;
5277
5278 return SYMBOL_REF_TLS_MODEL (x) != 0;
5279}
5280
5281/* Classify a TLS symbol into one of the TLS kinds. */
5282enum aarch64_symbol_type
5283aarch64_classify_tls_symbol (rtx x)
5284{
5285 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5286
5287 switch (tls_kind)
5288 {
5289 case TLS_MODEL_GLOBAL_DYNAMIC:
5290 case TLS_MODEL_LOCAL_DYNAMIC:
5291 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5292
5293 case TLS_MODEL_INITIAL_EXEC:
5294 return SYMBOL_SMALL_GOTTPREL;
5295
5296 case TLS_MODEL_LOCAL_EXEC:
5297 return SYMBOL_SMALL_TPREL;
5298
5299 case TLS_MODEL_EMULATED:
5300 case TLS_MODEL_NONE:
5301 return SYMBOL_FORCE_TO_MEM;
5302
5303 default:
5304 gcc_unreachable ();
5305 }
5306}
5307
5308/* Return the method that should be used to access SYMBOL_REF or
5309 LABEL_REF X in context CONTEXT. */
17f4d4bf 5310
43e9d192
IB
5311enum aarch64_symbol_type
5312aarch64_classify_symbol (rtx x,
5313 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5314{
5315 if (GET_CODE (x) == LABEL_REF)
5316 {
5317 switch (aarch64_cmodel)
5318 {
5319 case AARCH64_CMODEL_LARGE:
5320 return SYMBOL_FORCE_TO_MEM;
5321
5322 case AARCH64_CMODEL_TINY_PIC:
5323 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5324 return SYMBOL_TINY_ABSOLUTE;
5325
43e9d192
IB
5326 case AARCH64_CMODEL_SMALL_PIC:
5327 case AARCH64_CMODEL_SMALL:
5328 return SYMBOL_SMALL_ABSOLUTE;
5329
5330 default:
5331 gcc_unreachable ();
5332 }
5333 }
5334
17f4d4bf 5335 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5336 {
17f4d4bf
CSS
5337 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5338 || CONSTANT_POOL_ADDRESS_P (x))
43e9d192
IB
5339 return SYMBOL_FORCE_TO_MEM;
5340
5341 if (aarch64_tls_symbol_p (x))
5342 return aarch64_classify_tls_symbol (x);
5343
17f4d4bf
CSS
5344 switch (aarch64_cmodel)
5345 {
5346 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5347 if (SYMBOL_REF_WEAK (x))
5348 return SYMBOL_FORCE_TO_MEM;
5349 return SYMBOL_TINY_ABSOLUTE;
5350
17f4d4bf
CSS
5351 case AARCH64_CMODEL_SMALL:
5352 if (SYMBOL_REF_WEAK (x))
5353 return SYMBOL_FORCE_TO_MEM;
5354 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5355
17f4d4bf 5356 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 5357 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 5358 return SYMBOL_TINY_GOT;
38e6c9a6
MS
5359 return SYMBOL_TINY_ABSOLUTE;
5360
17f4d4bf
CSS
5361 case AARCH64_CMODEL_SMALL_PIC:
5362 if (!aarch64_symbol_binds_local_p (x))
5363 return SYMBOL_SMALL_GOT;
5364 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5365
17f4d4bf
CSS
5366 default:
5367 gcc_unreachable ();
5368 }
43e9d192 5369 }
17f4d4bf 5370
43e9d192
IB
5371 /* By default push everything into the constant pool. */
5372 return SYMBOL_FORCE_TO_MEM;
5373}
5374
43e9d192
IB
5375bool
5376aarch64_constant_address_p (rtx x)
5377{
5378 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5379}
5380
5381bool
5382aarch64_legitimate_pic_operand_p (rtx x)
5383{
5384 if (GET_CODE (x) == SYMBOL_REF
5385 || (GET_CODE (x) == CONST
5386 && GET_CODE (XEXP (x, 0)) == PLUS
5387 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5388 return false;
5389
5390 return true;
5391}
5392
3520f7cc
JG
5393/* Return true if X holds either a quarter-precision or
5394 floating-point +0.0 constant. */
5395static bool
5396aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5397{
5398 if (!CONST_DOUBLE_P (x))
5399 return false;
5400
5401 /* TODO: We could handle moving 0.0 to a TFmode register,
5402 but first we would like to refactor the movtf_aarch64
5403 to be more amicable to split moves properly and
5404 correctly gate on TARGET_SIMD. For now - reject all
5405 constants which are not to SFmode or DFmode registers. */
5406 if (!(mode == SFmode || mode == DFmode))
5407 return false;
5408
5409 if (aarch64_float_const_zero_rtx_p (x))
5410 return true;
5411 return aarch64_float_const_representable_p (x);
5412}
5413
43e9d192
IB
5414static bool
5415aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5416{
5417 /* Do not allow vector struct mode constants. We could support
5418 0 and -1 easily, but they need support in aarch64-simd.md. */
5419 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5420 return false;
5421
5422 /* This could probably go away because
5423 we now decompose CONST_INTs according to expand_mov_immediate. */
5424 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5425 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5426 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5427 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5428
5429 if (GET_CODE (x) == HIGH
5430 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5431 return true;
5432
5433 return aarch64_constant_address_p (x);
5434}
5435
a5bc806c 5436rtx
43e9d192
IB
5437aarch64_load_tp (rtx target)
5438{
5439 if (!target
5440 || GET_MODE (target) != Pmode
5441 || !register_operand (target, Pmode))
5442 target = gen_reg_rtx (Pmode);
5443
5444 /* Can return in any reg. */
5445 emit_insn (gen_aarch64_load_tp_hard (target));
5446 return target;
5447}
5448
43e9d192
IB
5449/* On AAPCS systems, this is the "struct __va_list". */
5450static GTY(()) tree va_list_type;
5451
5452/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5453 Return the type to use as __builtin_va_list.
5454
5455 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5456
5457 struct __va_list
5458 {
5459 void *__stack;
5460 void *__gr_top;
5461 void *__vr_top;
5462 int __gr_offs;
5463 int __vr_offs;
5464 }; */
5465
5466static tree
5467aarch64_build_builtin_va_list (void)
5468{
5469 tree va_list_name;
5470 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5471
5472 /* Create the type. */
5473 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5474 /* Give it the required name. */
5475 va_list_name = build_decl (BUILTINS_LOCATION,
5476 TYPE_DECL,
5477 get_identifier ("__va_list"),
5478 va_list_type);
5479 DECL_ARTIFICIAL (va_list_name) = 1;
5480 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5481 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5482
5483 /* Create the fields. */
5484 f_stack = build_decl (BUILTINS_LOCATION,
5485 FIELD_DECL, get_identifier ("__stack"),
5486 ptr_type_node);
5487 f_grtop = build_decl (BUILTINS_LOCATION,
5488 FIELD_DECL, get_identifier ("__gr_top"),
5489 ptr_type_node);
5490 f_vrtop = build_decl (BUILTINS_LOCATION,
5491 FIELD_DECL, get_identifier ("__vr_top"),
5492 ptr_type_node);
5493 f_groff = build_decl (BUILTINS_LOCATION,
5494 FIELD_DECL, get_identifier ("__gr_offs"),
5495 integer_type_node);
5496 f_vroff = build_decl (BUILTINS_LOCATION,
5497 FIELD_DECL, get_identifier ("__vr_offs"),
5498 integer_type_node);
5499
5500 DECL_ARTIFICIAL (f_stack) = 1;
5501 DECL_ARTIFICIAL (f_grtop) = 1;
5502 DECL_ARTIFICIAL (f_vrtop) = 1;
5503 DECL_ARTIFICIAL (f_groff) = 1;
5504 DECL_ARTIFICIAL (f_vroff) = 1;
5505
5506 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5507 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5508 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5509 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5510 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5511
5512 TYPE_FIELDS (va_list_type) = f_stack;
5513 DECL_CHAIN (f_stack) = f_grtop;
5514 DECL_CHAIN (f_grtop) = f_vrtop;
5515 DECL_CHAIN (f_vrtop) = f_groff;
5516 DECL_CHAIN (f_groff) = f_vroff;
5517
5518 /* Compute its layout. */
5519 layout_type (va_list_type);
5520
5521 return va_list_type;
5522}
5523
5524/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5525static void
5526aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5527{
5528 const CUMULATIVE_ARGS *cum;
5529 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5530 tree stack, grtop, vrtop, groff, vroff;
5531 tree t;
5532 int gr_save_area_size;
5533 int vr_save_area_size;
5534 int vr_offset;
5535
5536 cum = &crtl->args.info;
5537 gr_save_area_size
5538 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5539 vr_save_area_size
5540 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5541
5542 if (TARGET_GENERAL_REGS_ONLY)
5543 {
5544 if (cum->aapcs_nvrn > 0)
5545 sorry ("%qs and floating point or vector arguments",
5546 "-mgeneral-regs-only");
5547 vr_save_area_size = 0;
5548 }
5549
5550 f_stack = TYPE_FIELDS (va_list_type_node);
5551 f_grtop = DECL_CHAIN (f_stack);
5552 f_vrtop = DECL_CHAIN (f_grtop);
5553 f_groff = DECL_CHAIN (f_vrtop);
5554 f_vroff = DECL_CHAIN (f_groff);
5555
5556 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5557 NULL_TREE);
5558 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5559 NULL_TREE);
5560 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5561 NULL_TREE);
5562 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5563 NULL_TREE);
5564 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5565 NULL_TREE);
5566
5567 /* Emit code to initialize STACK, which points to the next varargs stack
5568 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5569 by named arguments. STACK is 8-byte aligned. */
5570 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5571 if (cum->aapcs_stack_size > 0)
5572 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5573 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5574 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5575
5576 /* Emit code to initialize GRTOP, the top of the GR save area.
5577 virtual_incoming_args_rtx should have been 16 byte aligned. */
5578 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5579 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5580 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5581
5582 /* Emit code to initialize VRTOP, the top of the VR save area.
5583 This address is gr_save_area_bytes below GRTOP, rounded
5584 down to the next 16-byte boundary. */
5585 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5586 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5587 STACK_BOUNDARY / BITS_PER_UNIT);
5588
5589 if (vr_offset)
5590 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5591 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5592 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5593
5594 /* Emit code to initialize GROFF, the offset from GRTOP of the
5595 next GPR argument. */
5596 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5597 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5598 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5599
5600 /* Likewise emit code to initialize VROFF, the offset from FTOP
5601 of the next VR argument. */
5602 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5603 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5604 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5605}
5606
5607/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5608
5609static tree
5610aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5611 gimple_seq *post_p ATTRIBUTE_UNUSED)
5612{
5613 tree addr;
5614 bool indirect_p;
5615 bool is_ha; /* is HFA or HVA. */
5616 bool dw_align; /* double-word align. */
5617 enum machine_mode ag_mode = VOIDmode;
5618 int nregs;
5619 enum machine_mode mode;
5620
5621 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5622 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5623 HOST_WIDE_INT size, rsize, adjust, align;
5624 tree t, u, cond1, cond2;
5625
5626 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5627 if (indirect_p)
5628 type = build_pointer_type (type);
5629
5630 mode = TYPE_MODE (type);
5631
5632 f_stack = TYPE_FIELDS (va_list_type_node);
5633 f_grtop = DECL_CHAIN (f_stack);
5634 f_vrtop = DECL_CHAIN (f_grtop);
5635 f_groff = DECL_CHAIN (f_vrtop);
5636 f_vroff = DECL_CHAIN (f_groff);
5637
5638 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5639 f_stack, NULL_TREE);
5640 size = int_size_in_bytes (type);
5641 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5642
5643 dw_align = false;
5644 adjust = 0;
5645 if (aarch64_vfp_is_call_or_return_candidate (mode,
5646 type,
5647 &ag_mode,
5648 &nregs,
5649 &is_ha))
5650 {
5651 /* TYPE passed in fp/simd registers. */
5652 if (TARGET_GENERAL_REGS_ONLY)
5653 sorry ("%qs and floating point or vector arguments",
5654 "-mgeneral-regs-only");
5655
5656 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5657 unshare_expr (valist), f_vrtop, NULL_TREE);
5658 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5659 unshare_expr (valist), f_vroff, NULL_TREE);
5660
5661 rsize = nregs * UNITS_PER_VREG;
5662
5663 if (is_ha)
5664 {
5665 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5666 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5667 }
5668 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5669 && size < UNITS_PER_VREG)
5670 {
5671 adjust = UNITS_PER_VREG - size;
5672 }
5673 }
5674 else
5675 {
5676 /* TYPE passed in general registers. */
5677 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5678 unshare_expr (valist), f_grtop, NULL_TREE);
5679 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5680 unshare_expr (valist), f_groff, NULL_TREE);
5681 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5682 nregs = rsize / UNITS_PER_WORD;
5683
5684 if (align > 8)
5685 dw_align = true;
5686
5687 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5688 && size < UNITS_PER_WORD)
5689 {
5690 adjust = UNITS_PER_WORD - size;
5691 }
5692 }
5693
5694 /* Get a local temporary for the field value. */
5695 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5696
5697 /* Emit code to branch if off >= 0. */
5698 t = build2 (GE_EXPR, boolean_type_node, off,
5699 build_int_cst (TREE_TYPE (off), 0));
5700 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5701
5702 if (dw_align)
5703 {
5704 /* Emit: offs = (offs + 15) & -16. */
5705 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5706 build_int_cst (TREE_TYPE (off), 15));
5707 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5708 build_int_cst (TREE_TYPE (off), -16));
5709 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5710 }
5711 else
5712 roundup = NULL;
5713
5714 /* Update ap.__[g|v]r_offs */
5715 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5716 build_int_cst (TREE_TYPE (off), rsize));
5717 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5718
5719 /* String up. */
5720 if (roundup)
5721 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5722
5723 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5724 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5725 build_int_cst (TREE_TYPE (f_off), 0));
5726 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5727
5728 /* String up: make sure the assignment happens before the use. */
5729 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5730 COND_EXPR_ELSE (cond1) = t;
5731
5732 /* Prepare the trees handling the argument that is passed on the stack;
5733 the top level node will store in ON_STACK. */
5734 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5735 if (align > 8)
5736 {
5737 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5738 t = fold_convert (intDI_type_node, arg);
5739 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5740 build_int_cst (TREE_TYPE (t), 15));
5741 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5742 build_int_cst (TREE_TYPE (t), -16));
5743 t = fold_convert (TREE_TYPE (arg), t);
5744 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5745 }
5746 else
5747 roundup = NULL;
5748 /* Advance ap.__stack */
5749 t = fold_convert (intDI_type_node, arg);
5750 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5751 build_int_cst (TREE_TYPE (t), size + 7));
5752 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5753 build_int_cst (TREE_TYPE (t), -8));
5754 t = fold_convert (TREE_TYPE (arg), t);
5755 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5756 /* String up roundup and advance. */
5757 if (roundup)
5758 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5759 /* String up with arg */
5760 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5761 /* Big-endianness related address adjustment. */
5762 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5763 && size < UNITS_PER_WORD)
5764 {
5765 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5766 size_int (UNITS_PER_WORD - size));
5767 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5768 }
5769
5770 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5771 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5772
5773 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5774 t = off;
5775 if (adjust)
5776 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5777 build_int_cst (TREE_TYPE (off), adjust));
5778
5779 t = fold_convert (sizetype, t);
5780 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5781
5782 if (is_ha)
5783 {
5784 /* type ha; // treat as "struct {ftype field[n];}"
5785 ... [computing offs]
5786 for (i = 0; i <nregs; ++i, offs += 16)
5787 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5788 return ha; */
5789 int i;
5790 tree tmp_ha, field_t, field_ptr_t;
5791
5792 /* Declare a local variable. */
5793 tmp_ha = create_tmp_var_raw (type, "ha");
5794 gimple_add_tmp_var (tmp_ha);
5795
5796 /* Establish the base type. */
5797 switch (ag_mode)
5798 {
5799 case SFmode:
5800 field_t = float_type_node;
5801 field_ptr_t = float_ptr_type_node;
5802 break;
5803 case DFmode:
5804 field_t = double_type_node;
5805 field_ptr_t = double_ptr_type_node;
5806 break;
5807 case TFmode:
5808 field_t = long_double_type_node;
5809 field_ptr_t = long_double_ptr_type_node;
5810 break;
5811/* The half precision and quad precision are not fully supported yet. Enable
5812 the following code after the support is complete. Need to find the correct
5813 type node for __fp16 *. */
5814#if 0
5815 case HFmode:
5816 field_t = float_type_node;
5817 field_ptr_t = float_ptr_type_node;
5818 break;
5819#endif
5820 case V2SImode:
5821 case V4SImode:
5822 {
5823 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5824 field_t = build_vector_type_for_mode (innertype, ag_mode);
5825 field_ptr_t = build_pointer_type (field_t);
5826 }
5827 break;
5828 default:
5829 gcc_assert (0);
5830 }
5831
5832 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5833 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5834 addr = t;
5835 t = fold_convert (field_ptr_t, addr);
5836 t = build2 (MODIFY_EXPR, field_t,
5837 build1 (INDIRECT_REF, field_t, tmp_ha),
5838 build1 (INDIRECT_REF, field_t, t));
5839
5840 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5841 for (i = 1; i < nregs; ++i)
5842 {
5843 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5844 u = fold_convert (field_ptr_t, addr);
5845 u = build2 (MODIFY_EXPR, field_t,
5846 build2 (MEM_REF, field_t, tmp_ha,
5847 build_int_cst (field_ptr_t,
5848 (i *
5849 int_size_in_bytes (field_t)))),
5850 build1 (INDIRECT_REF, field_t, u));
5851 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5852 }
5853
5854 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5855 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5856 }
5857
5858 COND_EXPR_ELSE (cond2) = t;
5859 addr = fold_convert (build_pointer_type (type), cond1);
5860 addr = build_va_arg_indirect_ref (addr);
5861
5862 if (indirect_p)
5863 addr = build_va_arg_indirect_ref (addr);
5864
5865 return addr;
5866}
5867
5868/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5869
5870static void
5871aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5872 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5873 int no_rtl)
5874{
5875 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5876 CUMULATIVE_ARGS local_cum;
5877 int gr_saved, vr_saved;
5878
5879 /* The caller has advanced CUM up to, but not beyond, the last named
5880 argument. Advance a local copy of CUM past the last "real" named
5881 argument, to find out how many registers are left over. */
5882 local_cum = *cum;
5883 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5884
5885 /* Found out how many registers we need to save. */
5886 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5887 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5888
5889 if (TARGET_GENERAL_REGS_ONLY)
5890 {
5891 if (local_cum.aapcs_nvrn > 0)
5892 sorry ("%qs and floating point or vector arguments",
5893 "-mgeneral-regs-only");
5894 vr_saved = 0;
5895 }
5896
5897 if (!no_rtl)
5898 {
5899 if (gr_saved > 0)
5900 {
5901 rtx ptr, mem;
5902
5903 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5904 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5905 - gr_saved * UNITS_PER_WORD);
5906 mem = gen_frame_mem (BLKmode, ptr);
5907 set_mem_alias_set (mem, get_varargs_alias_set ());
5908
5909 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5910 mem, gr_saved);
5911 }
5912 if (vr_saved > 0)
5913 {
5914 /* We can't use move_block_from_reg, because it will use
5915 the wrong mode, storing D regs only. */
5916 enum machine_mode mode = TImode;
5917 int off, i;
5918
5919 /* Set OFF to the offset from virtual_incoming_args_rtx of
5920 the first vector register. The VR save area lies below
5921 the GR one, and is aligned to 16 bytes. */
5922 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5923 STACK_BOUNDARY / BITS_PER_UNIT);
5924 off -= vr_saved * UNITS_PER_VREG;
5925
5926 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5927 {
5928 rtx ptr, mem;
5929
5930 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5931 mem = gen_frame_mem (mode, ptr);
5932 set_mem_alias_set (mem, get_varargs_alias_set ());
5933 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5934 off += UNITS_PER_VREG;
5935 }
5936 }
5937 }
5938
5939 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5940 any complication of having crtl->args.pretend_args_size changed. */
5941 cfun->machine->saved_varargs_size
5942 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5943 STACK_BOUNDARY / BITS_PER_UNIT)
5944 + vr_saved * UNITS_PER_VREG);
5945}
5946
5947static void
5948aarch64_conditional_register_usage (void)
5949{
5950 int i;
5951 if (!TARGET_FLOAT)
5952 {
5953 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5954 {
5955 fixed_regs[i] = 1;
5956 call_used_regs[i] = 1;
5957 }
5958 }
5959}
5960
5961/* Walk down the type tree of TYPE counting consecutive base elements.
5962 If *MODEP is VOIDmode, then set it to the first valid floating point
5963 type. If a non-floating point type is found, or if a floating point
5964 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5965 otherwise return the count in the sub-tree. */
5966static int
5967aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5968{
5969 enum machine_mode mode;
5970 HOST_WIDE_INT size;
5971
5972 switch (TREE_CODE (type))
5973 {
5974 case REAL_TYPE:
5975 mode = TYPE_MODE (type);
5976 if (mode != DFmode && mode != SFmode && mode != TFmode)
5977 return -1;
5978
5979 if (*modep == VOIDmode)
5980 *modep = mode;
5981
5982 if (*modep == mode)
5983 return 1;
5984
5985 break;
5986
5987 case COMPLEX_TYPE:
5988 mode = TYPE_MODE (TREE_TYPE (type));
5989 if (mode != DFmode && mode != SFmode && mode != TFmode)
5990 return -1;
5991
5992 if (*modep == VOIDmode)
5993 *modep = mode;
5994
5995 if (*modep == mode)
5996 return 2;
5997
5998 break;
5999
6000 case VECTOR_TYPE:
6001 /* Use V2SImode and V4SImode as representatives of all 64-bit
6002 and 128-bit vector types. */
6003 size = int_size_in_bytes (type);
6004 switch (size)
6005 {
6006 case 8:
6007 mode = V2SImode;
6008 break;
6009 case 16:
6010 mode = V4SImode;
6011 break;
6012 default:
6013 return -1;
6014 }
6015
6016 if (*modep == VOIDmode)
6017 *modep = mode;
6018
6019 /* Vector modes are considered to be opaque: two vectors are
6020 equivalent for the purposes of being homogeneous aggregates
6021 if they are the same size. */
6022 if (*modep == mode)
6023 return 1;
6024
6025 break;
6026
6027 case ARRAY_TYPE:
6028 {
6029 int count;
6030 tree index = TYPE_DOMAIN (type);
6031
6032 /* Can't handle incomplete types. */
6033 if (!COMPLETE_TYPE_P (type))
6034 return -1;
6035
6036 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6037 if (count == -1
6038 || !index
6039 || !TYPE_MAX_VALUE (index)
cc269bb6 6040 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 6041 || !TYPE_MIN_VALUE (index)
cc269bb6 6042 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
6043 || count < 0)
6044 return -1;
6045
ae7e9ddd
RS
6046 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6047 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
6048
6049 /* There must be no padding. */
cc269bb6 6050 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6051 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6052 != count * GET_MODE_BITSIZE (*modep)))
6053 return -1;
6054
6055 return count;
6056 }
6057
6058 case RECORD_TYPE:
6059 {
6060 int count = 0;
6061 int sub_count;
6062 tree field;
6063
6064 /* Can't handle incomplete types. */
6065 if (!COMPLETE_TYPE_P (type))
6066 return -1;
6067
6068 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6069 {
6070 if (TREE_CODE (field) != FIELD_DECL)
6071 continue;
6072
6073 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6074 if (sub_count < 0)
6075 return -1;
6076 count += sub_count;
6077 }
6078
6079 /* There must be no padding. */
cc269bb6 6080 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6081 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6082 != count * GET_MODE_BITSIZE (*modep)))
6083 return -1;
6084
6085 return count;
6086 }
6087
6088 case UNION_TYPE:
6089 case QUAL_UNION_TYPE:
6090 {
6091 /* These aren't very interesting except in a degenerate case. */
6092 int count = 0;
6093 int sub_count;
6094 tree field;
6095
6096 /* Can't handle incomplete types. */
6097 if (!COMPLETE_TYPE_P (type))
6098 return -1;
6099
6100 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6101 {
6102 if (TREE_CODE (field) != FIELD_DECL)
6103 continue;
6104
6105 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6106 if (sub_count < 0)
6107 return -1;
6108 count = count > sub_count ? count : sub_count;
6109 }
6110
6111 /* There must be no padding. */
cc269bb6 6112 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7d362f6c 6113 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
43e9d192
IB
6114 != count * GET_MODE_BITSIZE (*modep)))
6115 return -1;
6116
6117 return count;
6118 }
6119
6120 default:
6121 break;
6122 }
6123
6124 return -1;
6125}
6126
38e8f663
YR
6127/* Return true if we use LRA instead of reload pass. */
6128static bool
6129aarch64_lra_p (void)
6130{
6131 return aarch64_lra_flag;
6132}
6133
43e9d192
IB
6134/* Return TRUE if the type, as described by TYPE and MODE, is a composite
6135 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6136 array types. The C99 floating-point complex types are also considered
6137 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6138 types, which are GCC extensions and out of the scope of AAPCS64, are
6139 treated as composite types here as well.
6140
6141 Note that MODE itself is not sufficient in determining whether a type
6142 is such a composite type or not. This is because
6143 stor-layout.c:compute_record_mode may have already changed the MODE
6144 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6145 structure with only one field may have its MODE set to the mode of the
6146 field. Also an integer mode whose size matches the size of the
6147 RECORD_TYPE type may be used to substitute the original mode
6148 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6149 solely relied on. */
6150
6151static bool
6152aarch64_composite_type_p (const_tree type,
6153 enum machine_mode mode)
6154{
6155 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6156 return true;
6157
6158 if (mode == BLKmode
6159 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6160 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6161 return true;
6162
6163 return false;
6164}
6165
6166/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6167 type as described in AAPCS64 \S 4.1.2.
6168
6169 See the comment above aarch64_composite_type_p for the notes on MODE. */
6170
6171static bool
6172aarch64_short_vector_p (const_tree type,
6173 enum machine_mode mode)
6174{
6175 HOST_WIDE_INT size = -1;
6176
6177 if (type && TREE_CODE (type) == VECTOR_TYPE)
6178 size = int_size_in_bytes (type);
6179 else if (!aarch64_composite_type_p (type, mode)
6180 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6181 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6182 size = GET_MODE_SIZE (mode);
6183
6184 return (size == 8 || size == 16) ? true : false;
6185}
6186
6187/* Return TRUE if an argument, whose type is described by TYPE and MODE,
6188 shall be passed or returned in simd/fp register(s) (providing these
6189 parameter passing registers are available).
6190
6191 Upon successful return, *COUNT returns the number of needed registers,
6192 *BASE_MODE returns the mode of the individual register and when IS_HAF
6193 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6194 floating-point aggregate or a homogeneous short-vector aggregate. */
6195
6196static bool
6197aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6198 const_tree type,
6199 enum machine_mode *base_mode,
6200 int *count,
6201 bool *is_ha)
6202{
6203 enum machine_mode new_mode = VOIDmode;
6204 bool composite_p = aarch64_composite_type_p (type, mode);
6205
6206 if (is_ha != NULL) *is_ha = false;
6207
6208 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6209 || aarch64_short_vector_p (type, mode))
6210 {
6211 *count = 1;
6212 new_mode = mode;
6213 }
6214 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6215 {
6216 if (is_ha != NULL) *is_ha = true;
6217 *count = 2;
6218 new_mode = GET_MODE_INNER (mode);
6219 }
6220 else if (type && composite_p)
6221 {
6222 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6223
6224 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6225 {
6226 if (is_ha != NULL) *is_ha = true;
6227 *count = ag_count;
6228 }
6229 else
6230 return false;
6231 }
6232 else
6233 return false;
6234
6235 *base_mode = new_mode;
6236 return true;
6237}
6238
6239/* Implement TARGET_STRUCT_VALUE_RTX. */
6240
6241static rtx
6242aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6243 int incoming ATTRIBUTE_UNUSED)
6244{
6245 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6246}
6247
6248/* Implements target hook vector_mode_supported_p. */
6249static bool
6250aarch64_vector_mode_supported_p (enum machine_mode mode)
6251{
6252 if (TARGET_SIMD
6253 && (mode == V4SImode || mode == V8HImode
6254 || mode == V16QImode || mode == V2DImode
6255 || mode == V2SImode || mode == V4HImode
6256 || mode == V8QImode || mode == V2SFmode
6257 || mode == V4SFmode || mode == V2DFmode))
6258 return true;
6259
6260 return false;
6261}
6262
b7342d25
IB
6263/* Return appropriate SIMD container
6264 for MODE within a vector of WIDTH bits. */
43e9d192 6265static enum machine_mode
b7342d25 6266aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6267{
b7342d25 6268 gcc_assert (width == 64 || width == 128);
43e9d192 6269 if (TARGET_SIMD)
b7342d25
IB
6270 {
6271 if (width == 128)
6272 switch (mode)
6273 {
6274 case DFmode:
6275 return V2DFmode;
6276 case SFmode:
6277 return V4SFmode;
6278 case SImode:
6279 return V4SImode;
6280 case HImode:
6281 return V8HImode;
6282 case QImode:
6283 return V16QImode;
6284 case DImode:
6285 return V2DImode;
6286 default:
6287 break;
6288 }
6289 else
6290 switch (mode)
6291 {
6292 case SFmode:
6293 return V2SFmode;
6294 case SImode:
6295 return V2SImode;
6296 case HImode:
6297 return V4HImode;
6298 case QImode:
6299 return V8QImode;
6300 default:
6301 break;
6302 }
6303 }
43e9d192
IB
6304 return word_mode;
6305}
6306
b7342d25
IB
6307/* Return 128-bit container as the preferred SIMD mode for MODE. */
6308static enum machine_mode
6309aarch64_preferred_simd_mode (enum machine_mode mode)
6310{
6311 return aarch64_simd_container_mode (mode, 128);
6312}
6313
3b357264
JG
6314/* Return the bitmask of possible vector sizes for the vectorizer
6315 to iterate over. */
6316static unsigned int
6317aarch64_autovectorize_vector_sizes (void)
6318{
6319 return (16 | 8);
6320}
6321
c6fc9e43
YZ
6322/* A table to help perform AArch64-specific name mangling for AdvSIMD
6323 vector types in order to conform to the AAPCS64 (see "Procedure
6324 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6325 qualify for emission with the mangled names defined in that document,
6326 a vector type must not only be of the correct mode but also be
6327 composed of AdvSIMD vector element types (e.g.
6328 _builtin_aarch64_simd_qi); these types are registered by
6329 aarch64_init_simd_builtins (). In other words, vector types defined
6330 in other ways e.g. via vector_size attribute will get default
6331 mangled names. */
6332typedef struct
6333{
6334 enum machine_mode mode;
6335 const char *element_type_name;
6336 const char *mangled_name;
6337} aarch64_simd_mangle_map_entry;
6338
6339static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6340 /* 64-bit containerized types. */
6341 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6342 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6343 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6344 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6345 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6346 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6347 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6348 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6349 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6350 /* 128-bit containerized types. */
6351 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6352 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6353 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6354 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6355 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6356 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6357 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6358 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6359 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6360 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6361 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6362 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6363 { VOIDmode, NULL, NULL }
6364};
6365
ac2b960f
YZ
6366/* Implement TARGET_MANGLE_TYPE. */
6367
6f549691 6368static const char *
ac2b960f
YZ
6369aarch64_mangle_type (const_tree type)
6370{
6371 /* The AArch64 ABI documents say that "__va_list" has to be
6372 managled as if it is in the "std" namespace. */
6373 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6374 return "St9__va_list";
6375
c6fc9e43
YZ
6376 /* Check the mode of the vector type, and the name of the vector
6377 element type, against the table. */
6378 if (TREE_CODE (type) == VECTOR_TYPE)
6379 {
6380 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6381
6382 while (pos->mode != VOIDmode)
6383 {
6384 tree elt_type = TREE_TYPE (type);
6385
6386 if (pos->mode == TYPE_MODE (type)
6387 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6388 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6389 pos->element_type_name))
6390 return pos->mangled_name;
6391
6392 pos++;
6393 }
6394 }
6395
ac2b960f
YZ
6396 /* Use the default mangling. */
6397 return NULL;
6398}
6399
43e9d192 6400/* Return the equivalent letter for size. */
81c2dfb9 6401static char
43e9d192
IB
6402sizetochar (int size)
6403{
6404 switch (size)
6405 {
6406 case 64: return 'd';
6407 case 32: return 's';
6408 case 16: return 'h';
6409 case 8 : return 'b';
6410 default: gcc_unreachable ();
6411 }
6412}
6413
3520f7cc
JG
6414/* Return true iff x is a uniform vector of floating-point
6415 constants, and the constant can be represented in
6416 quarter-precision form. Note, as aarch64_float_const_representable
6417 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6418static bool
6419aarch64_vect_float_const_representable_p (rtx x)
6420{
6421 int i = 0;
6422 REAL_VALUE_TYPE r0, ri;
6423 rtx x0, xi;
6424
6425 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6426 return false;
6427
6428 x0 = CONST_VECTOR_ELT (x, 0);
6429 if (!CONST_DOUBLE_P (x0))
6430 return false;
6431
6432 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6433
6434 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6435 {
6436 xi = CONST_VECTOR_ELT (x, i);
6437 if (!CONST_DOUBLE_P (xi))
6438 return false;
6439
6440 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6441 if (!REAL_VALUES_EQUAL (r0, ri))
6442 return false;
6443 }
6444
6445 return aarch64_float_const_representable_p (x0);
6446}
6447
d8edd899 6448/* Return true for valid and false for invalid. */
3ea63f60 6449bool
48063b9d
IB
6450aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6451 struct simd_immediate_info *info)
43e9d192
IB
6452{
6453#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6454 matches = 1; \
6455 for (i = 0; i < idx; i += (STRIDE)) \
6456 if (!(TEST)) \
6457 matches = 0; \
6458 if (matches) \
6459 { \
6460 immtype = (CLASS); \
6461 elsize = (ELSIZE); \
43e9d192
IB
6462 eshift = (SHIFT); \
6463 emvn = (NEG); \
6464 break; \
6465 }
6466
6467 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6468 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6469 unsigned char bytes[16];
43e9d192
IB
6470 int immtype = -1, matches;
6471 unsigned int invmask = inverse ? 0xff : 0;
6472 int eshift, emvn;
6473
43e9d192 6474 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6475 {
81c2dfb9
IB
6476 if (! (aarch64_simd_imm_zero_p (op, mode)
6477 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6478 return false;
3520f7cc 6479
48063b9d
IB
6480 if (info)
6481 {
6482 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6483 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6484 info->mvn = false;
6485 info->shift = 0;
6486 }
3520f7cc 6487
d8edd899 6488 return true;
3520f7cc 6489 }
43e9d192
IB
6490
6491 /* Splat vector constant out into a byte vector. */
6492 for (i = 0; i < n_elts; i++)
6493 {
6494 rtx el = CONST_VECTOR_ELT (op, i);
6495 unsigned HOST_WIDE_INT elpart;
6496 unsigned int part, parts;
6497
6498 if (GET_CODE (el) == CONST_INT)
6499 {
6500 elpart = INTVAL (el);
6501 parts = 1;
6502 }
6503 else if (GET_CODE (el) == CONST_DOUBLE)
6504 {
6505 elpart = CONST_DOUBLE_LOW (el);
6506 parts = 2;
6507 }
6508 else
6509 gcc_unreachable ();
6510
6511 for (part = 0; part < parts; part++)
6512 {
6513 unsigned int byte;
6514 for (byte = 0; byte < innersize; byte++)
6515 {
6516 bytes[idx++] = (elpart & 0xff) ^ invmask;
6517 elpart >>= BITS_PER_UNIT;
6518 }
6519 if (GET_CODE (el) == CONST_DOUBLE)
6520 elpart = CONST_DOUBLE_HIGH (el);
6521 }
6522 }
6523
6524 /* Sanity check. */
6525 gcc_assert (idx == GET_MODE_SIZE (mode));
6526
6527 do
6528 {
6529 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6530 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6531
6532 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6533 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6534
6535 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6536 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6537
6538 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6539 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6540
6541 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6542
6543 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6544
6545 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6546 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6547
6548 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6549 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6550
6551 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6552 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6553
6554 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6555 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6556
6557 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6558
6559 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6560
6561 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 6562 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
6563
6564 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 6565 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
6566
6567 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 6568 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
6569
6570 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 6571 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
6572
6573 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6574
6575 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6576 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6577 }
6578 while (0);
6579
e4f0f84d 6580 if (immtype == -1)
d8edd899 6581 return false;
43e9d192 6582
48063b9d 6583 if (info)
43e9d192 6584 {
48063b9d 6585 info->element_width = elsize;
48063b9d
IB
6586 info->mvn = emvn != 0;
6587 info->shift = eshift;
6588
43e9d192
IB
6589 unsigned HOST_WIDE_INT imm = 0;
6590
e4f0f84d
TB
6591 if (immtype >= 12 && immtype <= 15)
6592 info->msl = true;
6593
43e9d192
IB
6594 /* Un-invert bytes of recognized vector, if necessary. */
6595 if (invmask != 0)
6596 for (i = 0; i < idx; i++)
6597 bytes[i] ^= invmask;
6598
6599 if (immtype == 17)
6600 {
6601 /* FIXME: Broken on 32-bit H_W_I hosts. */
6602 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6603
6604 for (i = 0; i < 8; i++)
6605 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6606 << (i * BITS_PER_UNIT);
6607
43e9d192 6608
48063b9d
IB
6609 info->value = GEN_INT (imm);
6610 }
6611 else
6612 {
6613 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6614 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6615
6616 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6617 generic constants. */
6618 if (info->mvn)
43e9d192 6619 imm = ~imm;
48063b9d
IB
6620 imm = (imm >> info->shift) & 0xff;
6621 info->value = GEN_INT (imm);
6622 }
43e9d192
IB
6623 }
6624
48063b9d 6625 return true;
43e9d192
IB
6626#undef CHECK
6627}
6628
43e9d192
IB
6629static bool
6630aarch64_const_vec_all_same_int_p (rtx x,
6631 HOST_WIDE_INT minval,
6632 HOST_WIDE_INT maxval)
6633{
6634 HOST_WIDE_INT firstval;
6635 int count, i;
6636
6637 if (GET_CODE (x) != CONST_VECTOR
6638 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6639 return false;
6640
6641 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6642 if (firstval < minval || firstval > maxval)
6643 return false;
6644
6645 count = CONST_VECTOR_NUNITS (x);
6646 for (i = 1; i < count; i++)
6647 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6648 return false;
6649
6650 return true;
6651}
6652
6653/* Check of immediate shift constants are within range. */
6654bool
6655aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6656{
6657 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6658 if (left)
6659 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6660 else
6661 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6662}
6663
3520f7cc
JG
6664/* Return true if X is a uniform vector where all elements
6665 are either the floating-point constant 0.0 or the
6666 integer constant 0. */
43e9d192
IB
6667bool
6668aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6669{
3520f7cc 6670 return x == CONST0_RTX (mode);
43e9d192
IB
6671}
6672
6673bool
6674aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6675{
6676 HOST_WIDE_INT imm = INTVAL (x);
6677 int i;
6678
6679 for (i = 0; i < 8; i++)
6680 {
6681 unsigned int byte = imm & 0xff;
6682 if (byte != 0xff && byte != 0)
6683 return false;
6684 imm >>= 8;
6685 }
6686
6687 return true;
6688}
6689
83f8c414
CSS
6690bool
6691aarch64_mov_operand_p (rtx x,
a5350ddc 6692 enum aarch64_symbol_context context,
83f8c414
CSS
6693 enum machine_mode mode)
6694{
83f8c414
CSS
6695 if (GET_CODE (x) == HIGH
6696 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6697 return true;
6698
6699 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6700 return true;
6701
6702 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6703 return true;
6704
a5350ddc
CSS
6705 return aarch64_classify_symbolic_expression (x, context)
6706 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6707}
6708
43e9d192
IB
6709/* Return a const_int vector of VAL. */
6710rtx
6711aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6712{
6713 int nunits = GET_MODE_NUNITS (mode);
6714 rtvec v = rtvec_alloc (nunits);
6715 int i;
6716
6717 for (i=0; i < nunits; i++)
6718 RTVEC_ELT (v, i) = GEN_INT (val);
6719
6720 return gen_rtx_CONST_VECTOR (mode, v);
6721}
6722
051d0e2f
SN
6723/* Check OP is a legal scalar immediate for the MOVI instruction. */
6724
6725bool
6726aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6727{
6728 enum machine_mode vmode;
6729
6730 gcc_assert (!VECTOR_MODE_P (mode));
6731 vmode = aarch64_preferred_simd_mode (mode);
6732 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6733 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6734}
6735
43e9d192
IB
6736/* Construct and return a PARALLEL RTX vector. */
6737rtx
6738aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6739{
6740 int nunits = GET_MODE_NUNITS (mode);
6741 rtvec v = rtvec_alloc (nunits / 2);
6742 int base = high ? nunits / 2 : 0;
6743 rtx t1;
6744 int i;
6745
6746 for (i=0; i < nunits / 2; i++)
6747 RTVEC_ELT (v, i) = GEN_INT (base + i);
6748
6749 t1 = gen_rtx_PARALLEL (mode, v);
6750 return t1;
6751}
6752
6753/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6754 HIGH (exclusive). */
6755void
6756aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6757{
6758 HOST_WIDE_INT lane;
6759 gcc_assert (GET_CODE (operand) == CONST_INT);
6760 lane = INTVAL (operand);
6761
6762 if (lane < low || lane >= high)
6763 error ("lane out of range");
6764}
6765
6766void
6767aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6768{
6769 gcc_assert (GET_CODE (operand) == CONST_INT);
6770 HOST_WIDE_INT lane = INTVAL (operand);
6771
6772 if (lane < low || lane >= high)
6773 error ("constant out of range");
6774}
6775
6776/* Emit code to reinterpret one AdvSIMD type as another,
6777 without altering bits. */
6778void
6779aarch64_simd_reinterpret (rtx dest, rtx src)
6780{
6781 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6782}
6783
6784/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6785 registers). */
6786void
6787aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6788 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6789 rtx op1)
6790{
6791 rtx mem = gen_rtx_MEM (mode, destaddr);
6792 rtx tmp1 = gen_reg_rtx (mode);
6793 rtx tmp2 = gen_reg_rtx (mode);
6794
6795 emit_insn (intfn (tmp1, op1, tmp2));
6796
6797 emit_move_insn (mem, tmp1);
6798 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6799 emit_move_insn (mem, tmp2);
6800}
6801
6802/* Return TRUE if OP is a valid vector addressing mode. */
6803bool
6804aarch64_simd_mem_operand_p (rtx op)
6805{
6806 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6807 || GET_CODE (XEXP (op, 0)) == REG);
6808}
6809
6810/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6811 not to early-clobber SRC registers in the process.
6812
6813 We assume that the operands described by SRC and DEST represent a
6814 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6815 number of components into which the copy has been decomposed. */
6816void
6817aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6818 rtx *src, unsigned int count)
6819{
6820 unsigned int i;
6821
6822 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6823 || REGNO (operands[0]) < REGNO (operands[1]))
6824 {
6825 for (i = 0; i < count; i++)
6826 {
6827 operands[2 * i] = dest[i];
6828 operands[2 * i + 1] = src[i];
6829 }
6830 }
6831 else
6832 {
6833 for (i = 0; i < count; i++)
6834 {
6835 operands[2 * i] = dest[count - i - 1];
6836 operands[2 * i + 1] = src[count - i - 1];
6837 }
6838 }
6839}
6840
6841/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6842 one of VSTRUCT modes: OI, CI or XI. */
6843int
6844aarch64_simd_attr_length_move (rtx insn)
6845{
43e9d192
IB
6846 enum machine_mode mode;
6847
6848 extract_insn_cached (insn);
6849
6850 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6851 {
6852 mode = GET_MODE (recog_data.operand[0]);
6853 switch (mode)
6854 {
6855 case OImode:
6856 return 8;
6857 case CImode:
6858 return 12;
6859 case XImode:
6860 return 16;
6861 default:
6862 gcc_unreachable ();
6863 }
6864 }
6865 return 4;
6866}
6867
db0253a4
TB
6868/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6869 alignment of a vector to 128 bits. */
6870static HOST_WIDE_INT
6871aarch64_simd_vector_alignment (const_tree type)
6872{
9439e9a1 6873 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
6874 return MIN (align, 128);
6875}
6876
6877/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6878static bool
6879aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6880{
6881 if (is_packed)
6882 return false;
6883
6884 /* We guarantee alignment for vectors up to 128-bits. */
6885 if (tree_int_cst_compare (TYPE_SIZE (type),
6886 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6887 return false;
6888
6889 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6890 return true;
6891}
6892
4369c11e
TB
6893/* If VALS is a vector constant that can be loaded into a register
6894 using DUP, generate instructions to do so and return an RTX to
6895 assign to the register. Otherwise return NULL_RTX. */
6896static rtx
6897aarch64_simd_dup_constant (rtx vals)
6898{
6899 enum machine_mode mode = GET_MODE (vals);
6900 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6901 int n_elts = GET_MODE_NUNITS (mode);
6902 bool all_same = true;
6903 rtx x;
6904 int i;
6905
6906 if (GET_CODE (vals) != CONST_VECTOR)
6907 return NULL_RTX;
6908
6909 for (i = 1; i < n_elts; ++i)
6910 {
6911 x = CONST_VECTOR_ELT (vals, i);
6912 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6913 all_same = false;
6914 }
6915
6916 if (!all_same)
6917 return NULL_RTX;
6918
6919 /* We can load this constant by using DUP and a constant in a
6920 single ARM register. This will be cheaper than a vector
6921 load. */
6922 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6923 return gen_rtx_VEC_DUPLICATE (mode, x);
6924}
6925
6926
6927/* Generate code to load VALS, which is a PARALLEL containing only
6928 constants (for vec_init) or CONST_VECTOR, efficiently into a
6929 register. Returns an RTX to copy into the register, or NULL_RTX
6930 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6931static rtx
4369c11e
TB
6932aarch64_simd_make_constant (rtx vals)
6933{
6934 enum machine_mode mode = GET_MODE (vals);
6935 rtx const_dup;
6936 rtx const_vec = NULL_RTX;
6937 int n_elts = GET_MODE_NUNITS (mode);
6938 int n_const = 0;
6939 int i;
6940
6941 if (GET_CODE (vals) == CONST_VECTOR)
6942 const_vec = vals;
6943 else if (GET_CODE (vals) == PARALLEL)
6944 {
6945 /* A CONST_VECTOR must contain only CONST_INTs and
6946 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6947 Only store valid constants in a CONST_VECTOR. */
6948 for (i = 0; i < n_elts; ++i)
6949 {
6950 rtx x = XVECEXP (vals, 0, i);
6951 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6952 n_const++;
6953 }
6954 if (n_const == n_elts)
6955 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6956 }
6957 else
6958 gcc_unreachable ();
6959
6960 if (const_vec != NULL_RTX
48063b9d 6961 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
6962 /* Load using MOVI/MVNI. */
6963 return const_vec;
6964 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6965 /* Loaded using DUP. */
6966 return const_dup;
6967 else if (const_vec != NULL_RTX)
6968 /* Load from constant pool. We can not take advantage of single-cycle
6969 LD1 because we need a PC-relative addressing mode. */
6970 return const_vec;
6971 else
6972 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6973 We can not construct an initializer. */
6974 return NULL_RTX;
6975}
6976
6977void
6978aarch64_expand_vector_init (rtx target, rtx vals)
6979{
6980 enum machine_mode mode = GET_MODE (target);
6981 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6982 int n_elts = GET_MODE_NUNITS (mode);
6983 int n_var = 0, one_var = -1;
6984 bool all_same = true;
6985 rtx x, mem;
6986 int i;
6987
6988 x = XVECEXP (vals, 0, 0);
6989 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6990 n_var = 1, one_var = 0;
6991
6992 for (i = 1; i < n_elts; ++i)
6993 {
6994 x = XVECEXP (vals, 0, i);
6995 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6996 ++n_var, one_var = i;
6997
6998 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6999 all_same = false;
7000 }
7001
7002 if (n_var == 0)
7003 {
7004 rtx constant = aarch64_simd_make_constant (vals);
7005 if (constant != NULL_RTX)
7006 {
7007 emit_move_insn (target, constant);
7008 return;
7009 }
7010 }
7011
7012 /* Splat a single non-constant element if we can. */
7013 if (all_same)
7014 {
7015 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7016 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7017 return;
7018 }
7019
7020 /* One field is non-constant. Load constant then overwrite varying
7021 field. This is more efficient than using the stack. */
7022 if (n_var == 1)
7023 {
7024 rtx copy = copy_rtx (vals);
7025 rtx index = GEN_INT (one_var);
7026 enum insn_code icode;
7027
7028 /* Load constant part of vector, substitute neighboring value for
7029 varying element. */
7030 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7031 aarch64_expand_vector_init (target, copy);
7032
7033 /* Insert variable. */
7034 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7035 icode = optab_handler (vec_set_optab, mode);
7036 gcc_assert (icode != CODE_FOR_nothing);
7037 emit_insn (GEN_FCN (icode) (target, x, index));
7038 return;
7039 }
7040
7041 /* Construct the vector in memory one field at a time
7042 and load the whole vector. */
7043 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7044 for (i = 0; i < n_elts; i++)
7045 emit_move_insn (adjust_address_nv (mem, inner_mode,
7046 i * GET_MODE_SIZE (inner_mode)),
7047 XVECEXP (vals, 0, i));
7048 emit_move_insn (target, mem);
7049
7050}
7051
43e9d192
IB
7052static unsigned HOST_WIDE_INT
7053aarch64_shift_truncation_mask (enum machine_mode mode)
7054{
7055 return
7056 (aarch64_vector_mode_supported_p (mode)
7057 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7058}
7059
7060#ifndef TLS_SECTION_ASM_FLAG
7061#define TLS_SECTION_ASM_FLAG 'T'
7062#endif
7063
7064void
7065aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7066 tree decl ATTRIBUTE_UNUSED)
7067{
7068 char flagchars[10], *f = flagchars;
7069
7070 /* If we have already declared this section, we can use an
7071 abbreviated form to switch back to it -- unless this section is
7072 part of a COMDAT groups, in which case GAS requires the full
7073 declaration every time. */
7074 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7075 && (flags & SECTION_DECLARED))
7076 {
7077 fprintf (asm_out_file, "\t.section\t%s\n", name);
7078 return;
7079 }
7080
7081 if (!(flags & SECTION_DEBUG))
7082 *f++ = 'a';
7083 if (flags & SECTION_WRITE)
7084 *f++ = 'w';
7085 if (flags & SECTION_CODE)
7086 *f++ = 'x';
7087 if (flags & SECTION_SMALL)
7088 *f++ = 's';
7089 if (flags & SECTION_MERGE)
7090 *f++ = 'M';
7091 if (flags & SECTION_STRINGS)
7092 *f++ = 'S';
7093 if (flags & SECTION_TLS)
7094 *f++ = TLS_SECTION_ASM_FLAG;
7095 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7096 *f++ = 'G';
7097 *f = '\0';
7098
7099 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7100
7101 if (!(flags & SECTION_NOTYPE))
7102 {
7103 const char *type;
7104 const char *format;
7105
7106 if (flags & SECTION_BSS)
7107 type = "nobits";
7108 else
7109 type = "progbits";
7110
7111#ifdef TYPE_OPERAND_FMT
7112 format = "," TYPE_OPERAND_FMT;
7113#else
7114 format = ",@%s";
7115#endif
7116
7117 fprintf (asm_out_file, format, type);
7118
7119 if (flags & SECTION_ENTSIZE)
7120 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7121 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7122 {
7123 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7124 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7125 else
7126 fprintf (asm_out_file, ",%s,comdat",
7127 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7128 }
7129 }
7130
7131 putc ('\n', asm_out_file);
7132}
7133
7134/* Select a format to encode pointers in exception handling data. */
7135int
7136aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7137{
7138 int type;
7139 switch (aarch64_cmodel)
7140 {
7141 case AARCH64_CMODEL_TINY:
7142 case AARCH64_CMODEL_TINY_PIC:
7143 case AARCH64_CMODEL_SMALL:
7144 case AARCH64_CMODEL_SMALL_PIC:
7145 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7146 for everything. */
7147 type = DW_EH_PE_sdata4;
7148 break;
7149 default:
7150 /* No assumptions here. 8-byte relocs required. */
7151 type = DW_EH_PE_sdata8;
7152 break;
7153 }
7154 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7155}
7156
0462169c
SN
7157/* Emit load exclusive. */
7158
7159static void
7160aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7161 rtx mem, rtx model_rtx)
7162{
7163 rtx (*gen) (rtx, rtx, rtx);
7164
7165 switch (mode)
7166 {
7167 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7168 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7169 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7170 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7171 default:
7172 gcc_unreachable ();
7173 }
7174
7175 emit_insn (gen (rval, mem, model_rtx));
7176}
7177
7178/* Emit store exclusive. */
7179
7180static void
7181aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7182 rtx rval, rtx mem, rtx model_rtx)
7183{
7184 rtx (*gen) (rtx, rtx, rtx, rtx);
7185
7186 switch (mode)
7187 {
7188 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7189 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7190 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7191 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7192 default:
7193 gcc_unreachable ();
7194 }
7195
7196 emit_insn (gen (bval, rval, mem, model_rtx));
7197}
7198
7199/* Mark the previous jump instruction as unlikely. */
7200
7201static void
7202aarch64_emit_unlikely_jump (rtx insn)
7203{
e5af9ddd 7204 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
7205
7206 insn = emit_jump_insn (insn);
e5af9ddd 7207 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
7208}
7209
7210/* Expand a compare and swap pattern. */
7211
7212void
7213aarch64_expand_compare_and_swap (rtx operands[])
7214{
7215 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7216 enum machine_mode mode, cmp_mode;
7217 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7218
7219 bval = operands[0];
7220 rval = operands[1];
7221 mem = operands[2];
7222 oldval = operands[3];
7223 newval = operands[4];
7224 is_weak = operands[5];
7225 mod_s = operands[6];
7226 mod_f = operands[7];
7227 mode = GET_MODE (mem);
7228 cmp_mode = mode;
7229
7230 /* Normally the succ memory model must be stronger than fail, but in the
7231 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7232 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7233
7234 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7235 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7236 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7237
7238 switch (mode)
7239 {
7240 case QImode:
7241 case HImode:
7242 /* For short modes, we're going to perform the comparison in SImode,
7243 so do the zero-extension now. */
7244 cmp_mode = SImode;
7245 rval = gen_reg_rtx (SImode);
7246 oldval = convert_modes (SImode, mode, oldval, true);
7247 /* Fall through. */
7248
7249 case SImode:
7250 case DImode:
7251 /* Force the value into a register if needed. */
7252 if (!aarch64_plus_operand (oldval, mode))
7253 oldval = force_reg (cmp_mode, oldval);
7254 break;
7255
7256 default:
7257 gcc_unreachable ();
7258 }
7259
7260 switch (mode)
7261 {
7262 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7263 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7264 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7265 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7266 default:
7267 gcc_unreachable ();
7268 }
7269
7270 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7271
7272 if (mode == QImode || mode == HImode)
7273 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7274
7275 x = gen_rtx_REG (CCmode, CC_REGNUM);
7276 x = gen_rtx_EQ (SImode, x, const0_rtx);
7277 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7278}
7279
7280/* Split a compare and swap pattern. */
7281
7282void
7283aarch64_split_compare_and_swap (rtx operands[])
7284{
7285 rtx rval, mem, oldval, newval, scratch;
7286 enum machine_mode mode;
0462169c
SN
7287 bool is_weak;
7288 rtx label1, label2, x, cond;
7289
7290 rval = operands[0];
7291 mem = operands[1];
7292 oldval = operands[2];
7293 newval = operands[3];
7294 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7295 scratch = operands[7];
7296 mode = GET_MODE (mem);
7297
7298 label1 = NULL_RTX;
7299 if (!is_weak)
7300 {
7301 label1 = gen_label_rtx ();
7302 emit_label (label1);
7303 }
7304 label2 = gen_label_rtx ();
7305
7306 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7307
7308 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7309 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7310 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7311 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7312 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7313
7314 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7315
7316 if (!is_weak)
7317 {
7318 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7319 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7320 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7321 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7322 }
7323 else
7324 {
7325 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7326 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7327 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7328 }
7329
7330 emit_label (label2);
7331}
7332
7333/* Split an atomic operation. */
7334
7335void
7336aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7337 rtx value, rtx model_rtx, rtx cond)
7338{
7339 enum machine_mode mode = GET_MODE (mem);
7340 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7341 rtx label, x;
7342
7343 label = gen_label_rtx ();
7344 emit_label (label);
7345
7346 if (new_out)
7347 new_out = gen_lowpart (wmode, new_out);
7348 if (old_out)
7349 old_out = gen_lowpart (wmode, old_out);
7350 else
7351 old_out = new_out;
7352 value = simplify_gen_subreg (wmode, value, mode, 0);
7353
7354 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7355
7356 switch (code)
7357 {
7358 case SET:
7359 new_out = value;
7360 break;
7361
7362 case NOT:
7363 x = gen_rtx_AND (wmode, old_out, value);
7364 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7365 x = gen_rtx_NOT (wmode, new_out);
7366 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7367 break;
7368
7369 case MINUS:
7370 if (CONST_INT_P (value))
7371 {
7372 value = GEN_INT (-INTVAL (value));
7373 code = PLUS;
7374 }
7375 /* Fall through. */
7376
7377 default:
7378 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7379 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7380 break;
7381 }
7382
7383 aarch64_emit_store_exclusive (mode, cond, mem,
7384 gen_lowpart (mode, new_out), model_rtx);
7385
7386 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7387 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7388 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7389 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7390}
7391
95ca411e
YZ
7392static void
7393aarch64_print_extension (void)
7394{
7395 const struct aarch64_option_extension *opt = NULL;
7396
7397 for (opt = all_extensions; opt->name != NULL; opt++)
7398 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7399 asm_fprintf (asm_out_file, "+%s", opt->name);
7400
7401 asm_fprintf (asm_out_file, "\n");
7402}
7403
43e9d192
IB
7404static void
7405aarch64_start_file (void)
7406{
7407 if (selected_arch)
95ca411e
YZ
7408 {
7409 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7410 aarch64_print_extension ();
7411 }
43e9d192 7412 else if (selected_cpu)
95ca411e
YZ
7413 {
7414 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7415 aarch64_print_extension ();
7416 }
43e9d192
IB
7417 default_file_start();
7418}
7419
7420/* Target hook for c_mode_for_suffix. */
7421static enum machine_mode
7422aarch64_c_mode_for_suffix (char suffix)
7423{
7424 if (suffix == 'q')
7425 return TFmode;
7426
7427 return VOIDmode;
7428}
7429
3520f7cc
JG
7430/* We can only represent floating point constants which will fit in
7431 "quarter-precision" values. These values are characterised by
7432 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7433 by:
7434
7435 (-1)^s * (n/16) * 2^r
7436
7437 Where:
7438 's' is the sign bit.
7439 'n' is an integer in the range 16 <= n <= 31.
7440 'r' is an integer in the range -3 <= r <= 4. */
7441
7442/* Return true iff X can be represented by a quarter-precision
7443 floating point immediate operand X. Note, we cannot represent 0.0. */
7444bool
7445aarch64_float_const_representable_p (rtx x)
7446{
7447 /* This represents our current view of how many bits
7448 make up the mantissa. */
7449 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7450 int exponent;
3520f7cc
JG
7451 unsigned HOST_WIDE_INT mantissa, mask;
7452 HOST_WIDE_INT m1, m2;
7453 REAL_VALUE_TYPE r, m;
7454
7455 if (!CONST_DOUBLE_P (x))
7456 return false;
7457
7458 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7459
7460 /* We cannot represent infinities, NaNs or +/-zero. We won't
7461 know if we have +zero until we analyse the mantissa, but we
7462 can reject the other invalid values. */
7463 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7464 || REAL_VALUE_MINUS_ZERO (r))
7465 return false;
7466
ba96cdfb 7467 /* Extract exponent. */
3520f7cc
JG
7468 r = real_value_abs (&r);
7469 exponent = REAL_EXP (&r);
7470
7471 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7472 highest (sign) bit, with a fixed binary point at bit point_pos.
7473 m1 holds the low part of the mantissa, m2 the high part.
7474 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7475 bits for the mantissa, this can fail (low bits will be lost). */
7476 real_ldexp (&m, &r, point_pos - exponent);
7477 REAL_VALUE_TO_INT (&m1, &m2, m);
7478
7479 /* If the low part of the mantissa has bits set we cannot represent
7480 the value. */
7481 if (m1 != 0)
7482 return false;
7483 /* We have rejected the lower HOST_WIDE_INT, so update our
7484 understanding of how many bits lie in the mantissa and
7485 look only at the high HOST_WIDE_INT. */
7486 mantissa = m2;
7487 point_pos -= HOST_BITS_PER_WIDE_INT;
7488
7489 /* We can only represent values with a mantissa of the form 1.xxxx. */
7490 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7491 if ((mantissa & mask) != 0)
7492 return false;
7493
7494 /* Having filtered unrepresentable values, we may now remove all
7495 but the highest 5 bits. */
7496 mantissa >>= point_pos - 5;
7497
7498 /* We cannot represent the value 0.0, so reject it. This is handled
7499 elsewhere. */
7500 if (mantissa == 0)
7501 return false;
7502
7503 /* Then, as bit 4 is always set, we can mask it off, leaving
7504 the mantissa in the range [0, 15]. */
7505 mantissa &= ~(1 << 4);
7506 gcc_assert (mantissa <= 15);
7507
7508 /* GCC internally does not use IEEE754-like encoding (where normalized
7509 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7510 Our mantissa values are shifted 4 places to the left relative to
7511 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7512 by 5 places to correct for GCC's representation. */
7513 exponent = 5 - exponent;
7514
7515 return (exponent >= 0 && exponent <= 7);
7516}
7517
7518char*
81c2dfb9 7519aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7520 enum machine_mode mode,
7521 unsigned width)
7522{
3ea63f60 7523 bool is_valid;
3520f7cc 7524 static char templ[40];
3520f7cc 7525 const char *mnemonic;
e4f0f84d 7526 const char *shift_op;
3520f7cc 7527 unsigned int lane_count = 0;
81c2dfb9 7528 char element_char;
3520f7cc 7529
e4f0f84d 7530 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
7531
7532 /* This will return true to show const_vector is legal for use as either
7533 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7534 also update INFO to show how the immediate should be generated. */
81c2dfb9 7535 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7536 gcc_assert (is_valid);
7537
81c2dfb9 7538 element_char = sizetochar (info.element_width);
48063b9d
IB
7539 lane_count = width / info.element_width;
7540
3520f7cc
JG
7541 mode = GET_MODE_INNER (mode);
7542 if (mode == SFmode || mode == DFmode)
7543 {
48063b9d
IB
7544 gcc_assert (info.shift == 0 && ! info.mvn);
7545 if (aarch64_float_const_zero_rtx_p (info.value))
7546 info.value = GEN_INT (0);
7547 else
7548 {
7549#define buf_size 20
7550 REAL_VALUE_TYPE r;
7551 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7552 char float_buf[buf_size] = {'\0'};
7553 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7554#undef buf_size
7555
7556 if (lane_count == 1)
7557 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7558 else
7559 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7560 lane_count, element_char, float_buf);
48063b9d
IB
7561 return templ;
7562 }
3520f7cc 7563 }
3520f7cc 7564
48063b9d 7565 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 7566 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
7567
7568 if (lane_count == 1)
48063b9d
IB
7569 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7570 mnemonic, UINTVAL (info.value));
7571 else if (info.shift)
7572 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
7573 ", %s %d", mnemonic, lane_count, element_char,
7574 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 7575 else
48063b9d 7576 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7577 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7578 return templ;
7579}
7580
b7342d25
IB
7581char*
7582aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7583 enum machine_mode mode)
7584{
7585 enum machine_mode vmode;
7586
7587 gcc_assert (!VECTOR_MODE_P (mode));
7588 vmode = aarch64_simd_container_mode (mode, 64);
7589 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7590 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7591}
7592
88b08073
JG
7593/* Split operands into moves from op[1] + op[2] into op[0]. */
7594
7595void
7596aarch64_split_combinev16qi (rtx operands[3])
7597{
7598 unsigned int dest = REGNO (operands[0]);
7599 unsigned int src1 = REGNO (operands[1]);
7600 unsigned int src2 = REGNO (operands[2]);
7601 enum machine_mode halfmode = GET_MODE (operands[1]);
7602 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7603 rtx destlo, desthi;
7604
7605 gcc_assert (halfmode == V16QImode);
7606
7607 if (src1 == dest && src2 == dest + halfregs)
7608 {
7609 /* No-op move. Can't split to nothing; emit something. */
7610 emit_note (NOTE_INSN_DELETED);
7611 return;
7612 }
7613
7614 /* Preserve register attributes for variable tracking. */
7615 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7616 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7617 GET_MODE_SIZE (halfmode));
7618
7619 /* Special case of reversed high/low parts. */
7620 if (reg_overlap_mentioned_p (operands[2], destlo)
7621 && reg_overlap_mentioned_p (operands[1], desthi))
7622 {
7623 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7624 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7625 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7626 }
7627 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7628 {
7629 /* Try to avoid unnecessary moves if part of the result
7630 is in the right place already. */
7631 if (src1 != dest)
7632 emit_move_insn (destlo, operands[1]);
7633 if (src2 != dest + halfregs)
7634 emit_move_insn (desthi, operands[2]);
7635 }
7636 else
7637 {
7638 if (src2 != dest + halfregs)
7639 emit_move_insn (desthi, operands[2]);
7640 if (src1 != dest)
7641 emit_move_insn (destlo, operands[1]);
7642 }
7643}
7644
7645/* vec_perm support. */
7646
7647#define MAX_VECT_LEN 16
7648
7649struct expand_vec_perm_d
7650{
7651 rtx target, op0, op1;
7652 unsigned char perm[MAX_VECT_LEN];
7653 enum machine_mode vmode;
7654 unsigned char nelt;
7655 bool one_vector_p;
7656 bool testing_p;
7657};
7658
7659/* Generate a variable permutation. */
7660
7661static void
7662aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7663{
7664 enum machine_mode vmode = GET_MODE (target);
7665 bool one_vector_p = rtx_equal_p (op0, op1);
7666
7667 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7668 gcc_checking_assert (GET_MODE (op0) == vmode);
7669 gcc_checking_assert (GET_MODE (op1) == vmode);
7670 gcc_checking_assert (GET_MODE (sel) == vmode);
7671 gcc_checking_assert (TARGET_SIMD);
7672
7673 if (one_vector_p)
7674 {
7675 if (vmode == V8QImode)
7676 {
7677 /* Expand the argument to a V16QI mode by duplicating it. */
7678 rtx pair = gen_reg_rtx (V16QImode);
7679 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7680 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7681 }
7682 else
7683 {
7684 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7685 }
7686 }
7687 else
7688 {
7689 rtx pair;
7690
7691 if (vmode == V8QImode)
7692 {
7693 pair = gen_reg_rtx (V16QImode);
7694 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7695 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7696 }
7697 else
7698 {
7699 pair = gen_reg_rtx (OImode);
7700 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7701 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7702 }
7703 }
7704}
7705
7706void
7707aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7708{
7709 enum machine_mode vmode = GET_MODE (target);
7710 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7711 bool one_vector_p = rtx_equal_p (op0, op1);
7712 rtx rmask[MAX_VECT_LEN], mask;
7713
7714 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7715
7716 /* The TBL instruction does not use a modulo index, so we must take care
7717 of that ourselves. */
7718 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7719 for (i = 0; i < nelt; ++i)
7720 rmask[i] = mask;
7721 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7722 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7723
7724 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7725}
7726
cc4d934f
JG
7727/* Recognize patterns suitable for the TRN instructions. */
7728static bool
7729aarch64_evpc_trn (struct expand_vec_perm_d *d)
7730{
7731 unsigned int i, odd, mask, nelt = d->nelt;
7732 rtx out, in0, in1, x;
7733 rtx (*gen) (rtx, rtx, rtx);
7734 enum machine_mode vmode = d->vmode;
7735
7736 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7737 return false;
7738
7739 /* Note that these are little-endian tests.
7740 We correct for big-endian later. */
7741 if (d->perm[0] == 0)
7742 odd = 0;
7743 else if (d->perm[0] == 1)
7744 odd = 1;
7745 else
7746 return false;
7747 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7748
7749 for (i = 0; i < nelt; i += 2)
7750 {
7751 if (d->perm[i] != i + odd)
7752 return false;
7753 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7754 return false;
7755 }
7756
7757 /* Success! */
7758 if (d->testing_p)
7759 return true;
7760
7761 in0 = d->op0;
7762 in1 = d->op1;
7763 if (BYTES_BIG_ENDIAN)
7764 {
7765 x = in0, in0 = in1, in1 = x;
7766 odd = !odd;
7767 }
7768 out = d->target;
7769
7770 if (odd)
7771 {
7772 switch (vmode)
7773 {
7774 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7775 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7776 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7777 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7778 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7779 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7780 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7781 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7782 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7783 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7784 default:
7785 return false;
7786 }
7787 }
7788 else
7789 {
7790 switch (vmode)
7791 {
7792 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7793 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7794 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7795 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7796 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7797 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7798 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7799 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7800 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7801 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7802 default:
7803 return false;
7804 }
7805 }
7806
7807 emit_insn (gen (out, in0, in1));
7808 return true;
7809}
7810
7811/* Recognize patterns suitable for the UZP instructions. */
7812static bool
7813aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7814{
7815 unsigned int i, odd, mask, nelt = d->nelt;
7816 rtx out, in0, in1, x;
7817 rtx (*gen) (rtx, rtx, rtx);
7818 enum machine_mode vmode = d->vmode;
7819
7820 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7821 return false;
7822
7823 /* Note that these are little-endian tests.
7824 We correct for big-endian later. */
7825 if (d->perm[0] == 0)
7826 odd = 0;
7827 else if (d->perm[0] == 1)
7828 odd = 1;
7829 else
7830 return false;
7831 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7832
7833 for (i = 0; i < nelt; i++)
7834 {
7835 unsigned elt = (i * 2 + odd) & mask;
7836 if (d->perm[i] != elt)
7837 return false;
7838 }
7839
7840 /* Success! */
7841 if (d->testing_p)
7842 return true;
7843
7844 in0 = d->op0;
7845 in1 = d->op1;
7846 if (BYTES_BIG_ENDIAN)
7847 {
7848 x = in0, in0 = in1, in1 = x;
7849 odd = !odd;
7850 }
7851 out = d->target;
7852
7853 if (odd)
7854 {
7855 switch (vmode)
7856 {
7857 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7858 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7859 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7860 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7861 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7862 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7863 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7864 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7865 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7866 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7867 default:
7868 return false;
7869 }
7870 }
7871 else
7872 {
7873 switch (vmode)
7874 {
7875 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7876 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7877 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7878 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7879 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7880 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7881 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7882 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7883 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7884 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7885 default:
7886 return false;
7887 }
7888 }
7889
7890 emit_insn (gen (out, in0, in1));
7891 return true;
7892}
7893
7894/* Recognize patterns suitable for the ZIP instructions. */
7895static bool
7896aarch64_evpc_zip (struct expand_vec_perm_d *d)
7897{
7898 unsigned int i, high, mask, nelt = d->nelt;
7899 rtx out, in0, in1, x;
7900 rtx (*gen) (rtx, rtx, rtx);
7901 enum machine_mode vmode = d->vmode;
7902
7903 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7904 return false;
7905
7906 /* Note that these are little-endian tests.
7907 We correct for big-endian later. */
7908 high = nelt / 2;
7909 if (d->perm[0] == high)
7910 /* Do Nothing. */
7911 ;
7912 else if (d->perm[0] == 0)
7913 high = 0;
7914 else
7915 return false;
7916 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7917
7918 for (i = 0; i < nelt / 2; i++)
7919 {
7920 unsigned elt = (i + high) & mask;
7921 if (d->perm[i * 2] != elt)
7922 return false;
7923 elt = (elt + nelt) & mask;
7924 if (d->perm[i * 2 + 1] != elt)
7925 return false;
7926 }
7927
7928 /* Success! */
7929 if (d->testing_p)
7930 return true;
7931
7932 in0 = d->op0;
7933 in1 = d->op1;
7934 if (BYTES_BIG_ENDIAN)
7935 {
7936 x = in0, in0 = in1, in1 = x;
7937 high = !high;
7938 }
7939 out = d->target;
7940
7941 if (high)
7942 {
7943 switch (vmode)
7944 {
7945 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7946 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7947 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7948 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7949 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7950 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7951 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7952 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7953 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7954 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7955 default:
7956 return false;
7957 }
7958 }
7959 else
7960 {
7961 switch (vmode)
7962 {
7963 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7964 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7965 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7966 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7967 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7968 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7969 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7970 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7971 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7972 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7973 default:
7974 return false;
7975 }
7976 }
7977
7978 emit_insn (gen (out, in0, in1));
7979 return true;
7980}
7981
91bd4114
JG
7982static bool
7983aarch64_evpc_dup (struct expand_vec_perm_d *d)
7984{
7985 rtx (*gen) (rtx, rtx, rtx);
7986 rtx out = d->target;
7987 rtx in0;
7988 enum machine_mode vmode = d->vmode;
7989 unsigned int i, elt, nelt = d->nelt;
7990 rtx lane;
7991
7992 /* TODO: This may not be big-endian safe. */
7993 if (BYTES_BIG_ENDIAN)
7994 return false;
7995
7996 elt = d->perm[0];
7997 for (i = 1; i < nelt; i++)
7998 {
7999 if (elt != d->perm[i])
8000 return false;
8001 }
8002
8003 /* The generic preparation in aarch64_expand_vec_perm_const_1
8004 swaps the operand order and the permute indices if it finds
8005 d->perm[0] to be in the second operand. Thus, we can always
8006 use d->op0 and need not do any extra arithmetic to get the
8007 correct lane number. */
8008 in0 = d->op0;
8009 lane = GEN_INT (elt);
8010
8011 switch (vmode)
8012 {
8013 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8014 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8015 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8016 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8017 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8018 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8019 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8020 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8021 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8022 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8023 default:
8024 return false;
8025 }
8026
8027 emit_insn (gen (out, in0, lane));
8028 return true;
8029}
8030
88b08073
JG
8031static bool
8032aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8033{
8034 rtx rperm[MAX_VECT_LEN], sel;
8035 enum machine_mode vmode = d->vmode;
8036 unsigned int i, nelt = d->nelt;
8037
8038 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8039 numbering of elements for big-endian, we must reverse the order. */
8040 if (BYTES_BIG_ENDIAN)
8041 return false;
8042
8043 if (d->testing_p)
8044 return true;
8045
8046 /* Generic code will try constant permutation twice. Once with the
8047 original mode and again with the elements lowered to QImode.
8048 So wait and don't do the selector expansion ourselves. */
8049 if (vmode != V8QImode && vmode != V16QImode)
8050 return false;
8051
8052 for (i = 0; i < nelt; ++i)
8053 rperm[i] = GEN_INT (d->perm[i]);
8054 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8055 sel = force_reg (vmode, sel);
8056
8057 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8058 return true;
8059}
8060
8061static bool
8062aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8063{
8064 /* The pattern matching functions above are written to look for a small
8065 number to begin the sequence (0, 1, N/2). If we begin with an index
8066 from the second operand, we can swap the operands. */
8067 if (d->perm[0] >= d->nelt)
8068 {
8069 unsigned i, nelt = d->nelt;
8070 rtx x;
8071
8072 for (i = 0; i < nelt; ++i)
8073 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8074
8075 x = d->op0;
8076 d->op0 = d->op1;
8077 d->op1 = x;
8078 }
8079
8080 if (TARGET_SIMD)
cc4d934f
JG
8081 {
8082 if (aarch64_evpc_zip (d))
8083 return true;
8084 else if (aarch64_evpc_uzp (d))
8085 return true;
8086 else if (aarch64_evpc_trn (d))
8087 return true;
91bd4114
JG
8088 else if (aarch64_evpc_dup (d))
8089 return true;
cc4d934f
JG
8090 return aarch64_evpc_tbl (d);
8091 }
88b08073
JG
8092 return false;
8093}
8094
8095/* Expand a vec_perm_const pattern. */
8096
8097bool
8098aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8099{
8100 struct expand_vec_perm_d d;
8101 int i, nelt, which;
8102
8103 d.target = target;
8104 d.op0 = op0;
8105 d.op1 = op1;
8106
8107 d.vmode = GET_MODE (target);
8108 gcc_assert (VECTOR_MODE_P (d.vmode));
8109 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8110 d.testing_p = false;
8111
8112 for (i = which = 0; i < nelt; ++i)
8113 {
8114 rtx e = XVECEXP (sel, 0, i);
8115 int ei = INTVAL (e) & (2 * nelt - 1);
8116 which |= (ei < nelt ? 1 : 2);
8117 d.perm[i] = ei;
8118 }
8119
8120 switch (which)
8121 {
8122 default:
8123 gcc_unreachable ();
8124
8125 case 3:
8126 d.one_vector_p = false;
8127 if (!rtx_equal_p (op0, op1))
8128 break;
8129
8130 /* The elements of PERM do not suggest that only the first operand
8131 is used, but both operands are identical. Allow easier matching
8132 of the permutation by folding the permutation into the single
8133 input vector. */
8134 /* Fall Through. */
8135 case 2:
8136 for (i = 0; i < nelt; ++i)
8137 d.perm[i] &= nelt - 1;
8138 d.op0 = op1;
8139 d.one_vector_p = true;
8140 break;
8141
8142 case 1:
8143 d.op1 = op0;
8144 d.one_vector_p = true;
8145 break;
8146 }
8147
8148 return aarch64_expand_vec_perm_const_1 (&d);
8149}
8150
8151static bool
8152aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8153 const unsigned char *sel)
8154{
8155 struct expand_vec_perm_d d;
8156 unsigned int i, nelt, which;
8157 bool ret;
8158
8159 d.vmode = vmode;
8160 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8161 d.testing_p = true;
8162 memcpy (d.perm, sel, nelt);
8163
8164 /* Calculate whether all elements are in one vector. */
8165 for (i = which = 0; i < nelt; ++i)
8166 {
8167 unsigned char e = d.perm[i];
8168 gcc_assert (e < 2 * nelt);
8169 which |= (e < nelt ? 1 : 2);
8170 }
8171
8172 /* If all elements are from the second vector, reindex as if from the
8173 first vector. */
8174 if (which == 2)
8175 for (i = 0; i < nelt; ++i)
8176 d.perm[i] -= nelt;
8177
8178 /* Check whether the mask can be applied to a single vector. */
8179 d.one_vector_p = (which != 3);
8180
8181 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8182 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8183 if (!d.one_vector_p)
8184 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8185
8186 start_sequence ();
8187 ret = aarch64_expand_vec_perm_const_1 (&d);
8188 end_sequence ();
8189
8190 return ret;
8191}
8192
43e9d192
IB
8193#undef TARGET_ADDRESS_COST
8194#define TARGET_ADDRESS_COST aarch64_address_cost
8195
8196/* This hook will determines whether unnamed bitfields affect the alignment
8197 of the containing structure. The hook returns true if the structure
8198 should inherit the alignment requirements of an unnamed bitfield's
8199 type. */
8200#undef TARGET_ALIGN_ANON_BITFIELD
8201#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8202
8203#undef TARGET_ASM_ALIGNED_DI_OP
8204#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8205
8206#undef TARGET_ASM_ALIGNED_HI_OP
8207#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8208
8209#undef TARGET_ASM_ALIGNED_SI_OP
8210#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8211
8212#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8213#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8214 hook_bool_const_tree_hwi_hwi_const_tree_true
8215
8216#undef TARGET_ASM_FILE_START
8217#define TARGET_ASM_FILE_START aarch64_start_file
8218
8219#undef TARGET_ASM_OUTPUT_MI_THUNK
8220#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8221
8222#undef TARGET_ASM_SELECT_RTX_SECTION
8223#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8224
8225#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8226#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8227
8228#undef TARGET_BUILD_BUILTIN_VA_LIST
8229#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8230
8231#undef TARGET_CALLEE_COPIES
8232#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8233
8234#undef TARGET_CAN_ELIMINATE
8235#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8236
8237#undef TARGET_CANNOT_FORCE_CONST_MEM
8238#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8239
8240#undef TARGET_CONDITIONAL_REGISTER_USAGE
8241#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8242
8243/* Only the least significant bit is used for initialization guard
8244 variables. */
8245#undef TARGET_CXX_GUARD_MASK_BIT
8246#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8247
8248#undef TARGET_C_MODE_FOR_SUFFIX
8249#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8250
8251#ifdef TARGET_BIG_ENDIAN_DEFAULT
8252#undef TARGET_DEFAULT_TARGET_FLAGS
8253#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8254#endif
8255
8256#undef TARGET_CLASS_MAX_NREGS
8257#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8258
119103ca
JG
8259#undef TARGET_BUILTIN_DECL
8260#define TARGET_BUILTIN_DECL aarch64_builtin_decl
8261
43e9d192
IB
8262#undef TARGET_EXPAND_BUILTIN
8263#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8264
8265#undef TARGET_EXPAND_BUILTIN_VA_START
8266#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8267
9697e620
JG
8268#undef TARGET_FOLD_BUILTIN
8269#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8270
43e9d192
IB
8271#undef TARGET_FUNCTION_ARG
8272#define TARGET_FUNCTION_ARG aarch64_function_arg
8273
8274#undef TARGET_FUNCTION_ARG_ADVANCE
8275#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8276
8277#undef TARGET_FUNCTION_ARG_BOUNDARY
8278#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8279
8280#undef TARGET_FUNCTION_OK_FOR_SIBCALL
8281#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8282
8283#undef TARGET_FUNCTION_VALUE
8284#define TARGET_FUNCTION_VALUE aarch64_function_value
8285
8286#undef TARGET_FUNCTION_VALUE_REGNO_P
8287#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8288
8289#undef TARGET_FRAME_POINTER_REQUIRED
8290#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8291
0ac198d3
JG
8292#undef TARGET_GIMPLE_FOLD_BUILTIN
8293#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8294
43e9d192
IB
8295#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8296#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8297
8298#undef TARGET_INIT_BUILTINS
8299#define TARGET_INIT_BUILTINS aarch64_init_builtins
8300
8301#undef TARGET_LEGITIMATE_ADDRESS_P
8302#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8303
8304#undef TARGET_LEGITIMATE_CONSTANT_P
8305#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8306
8307#undef TARGET_LIBGCC_CMP_RETURN_MODE
8308#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8309
38e8f663
YR
8310#undef TARGET_LRA_P
8311#define TARGET_LRA_P aarch64_lra_p
8312
ac2b960f
YZ
8313#undef TARGET_MANGLE_TYPE
8314#define TARGET_MANGLE_TYPE aarch64_mangle_type
8315
43e9d192
IB
8316#undef TARGET_MEMORY_MOVE_COST
8317#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8318
8319#undef TARGET_MUST_PASS_IN_STACK
8320#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8321
8322/* This target hook should return true if accesses to volatile bitfields
8323 should use the narrowest mode possible. It should return false if these
8324 accesses should use the bitfield container type. */
8325#undef TARGET_NARROW_VOLATILE_BITFIELD
8326#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8327
8328#undef TARGET_OPTION_OVERRIDE
8329#define TARGET_OPTION_OVERRIDE aarch64_override_options
8330
8331#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8332#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8333 aarch64_override_options_after_change
8334
8335#undef TARGET_PASS_BY_REFERENCE
8336#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8337
8338#undef TARGET_PREFERRED_RELOAD_CLASS
8339#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8340
8341#undef TARGET_SECONDARY_RELOAD
8342#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8343
8344#undef TARGET_SHIFT_TRUNCATION_MASK
8345#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8346
8347#undef TARGET_SETUP_INCOMING_VARARGS
8348#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8349
8350#undef TARGET_STRUCT_VALUE_RTX
8351#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8352
8353#undef TARGET_REGISTER_MOVE_COST
8354#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8355
8356#undef TARGET_RETURN_IN_MEMORY
8357#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8358
8359#undef TARGET_RETURN_IN_MSB
8360#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8361
8362#undef TARGET_RTX_COSTS
8363#define TARGET_RTX_COSTS aarch64_rtx_costs
8364
8365#undef TARGET_TRAMPOLINE_INIT
8366#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8367
8368#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8369#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8370
8371#undef TARGET_VECTOR_MODE_SUPPORTED_P
8372#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8373
8374#undef TARGET_ARRAY_MODE_SUPPORTED_P
8375#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8376
8990e73a
TB
8377#undef TARGET_VECTORIZE_ADD_STMT_COST
8378#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8379
8380#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8381#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8382 aarch64_builtin_vectorization_cost
8383
43e9d192
IB
8384#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8385#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8386
42fc9a7f
JG
8387#undef TARGET_VECTORIZE_BUILTINS
8388#define TARGET_VECTORIZE_BUILTINS
8389
8390#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8391#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8392 aarch64_builtin_vectorized_function
8393
3b357264
JG
8394#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8395#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8396 aarch64_autovectorize_vector_sizes
8397
43e9d192
IB
8398/* Section anchor support. */
8399
8400#undef TARGET_MIN_ANCHOR_OFFSET
8401#define TARGET_MIN_ANCHOR_OFFSET -256
8402
8403/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8404 byte offset; we can do much more for larger data types, but have no way
8405 to determine the size of the access. We assume accesses are aligned. */
8406#undef TARGET_MAX_ANCHOR_OFFSET
8407#define TARGET_MAX_ANCHOR_OFFSET 4095
8408
db0253a4
TB
8409#undef TARGET_VECTOR_ALIGNMENT
8410#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8411
8412#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8413#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8414 aarch64_simd_vector_alignment_reachable
8415
88b08073
JG
8416/* vec_perm support. */
8417
8418#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8419#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8420 aarch64_vectorize_vec_perm_const_ok
8421
70f09188 8422
706b2314 8423#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8424#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8425
43e9d192
IB
8426struct gcc_target targetm = TARGET_INITIALIZER;
8427
8428#include "gt-aarch64.h"