]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
Daily bump.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
43e9d192 1/* Machine description for AArch64 architecture.
d1e082c2 2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
29#include "regs.h"
30#include "df.h"
31#include "hard-reg-set.h"
32#include "output.h"
33#include "expr.h"
34#include "reload.h"
35#include "toplev.h"
36#include "target.h"
37#include "target-def.h"
38#include "targhooks.h"
39#include "ggc.h"
40#include "function.h"
41#include "tm_p.h"
42#include "recog.h"
43#include "langhooks.h"
44#include "diagnostic-core.h"
45#include "gimple.h"
46#include "optabs.h"
47#include "dwarf2.h"
48
49/* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80};
81
82struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88};
89
90/* The current code model. */
91enum aarch64_code_model aarch64_cmodel;
92
93#ifdef HAVE_AS_TLS
94#undef TARGET_HAVE_TLS
95#define TARGET_HAVE_TLS 1
96#endif
97
98static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192
IB
105static void aarch64_override_options_after_change (void);
106static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108static bool aarch64_vector_mode_supported_p (enum machine_mode);
109static unsigned bit_count (unsigned HOST_WIDE_INT);
110static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
112
88b08073
JG
113static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 const unsigned char *sel);
115
43e9d192
IB
116/* The processor for which instructions should be scheduled. */
117enum aarch64_processor aarch64_tune = generic;
118
119/* The current tuning set. */
120const struct tune_params *aarch64_tune_params;
121
122/* Mask to specify which instructions we are allowed to generate. */
123unsigned long aarch64_isa_flags = 0;
124
125/* Mask to specify which instruction scheduling options should be used. */
126unsigned long aarch64_tune_flags = 0;
127
128/* Tuning parameters. */
129
130#if HAVE_DESIGNATED_INITIALIZERS
131#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132#else
133#define NAMED_PARAM(NAME, VAL) (VAL)
134#endif
135
136#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137__extension__
138#endif
139static const struct cpu_rtx_cost_table generic_rtx_cost_table =
140{
141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
153};
154
155#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156__extension__
157#endif
158static const struct cpu_addrcost_table generic_addrcost_table =
159{
160 NAMED_PARAM (pre_modify, 0),
161 NAMED_PARAM (post_modify, 0),
162 NAMED_PARAM (register_offset, 0),
163 NAMED_PARAM (register_extend, 0),
164 NAMED_PARAM (imm_offset, 0)
165};
166
167#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168__extension__
169#endif
170static const struct cpu_regmove_cost generic_regmove_cost =
171{
172 NAMED_PARAM (GP2GP, 1),
173 NAMED_PARAM (GP2FP, 2),
174 NAMED_PARAM (FP2GP, 2),
175 /* We currently do not provide direct support for TFmode Q->Q move.
176 Therefore we need to raise the cost above 2 in order to have
177 reload handle the situation. */
178 NAMED_PARAM (FP2FP, 4)
179};
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct tune_params generic_tunings =
185{
186 &generic_rtx_cost_table,
187 &generic_addrcost_table,
188 &generic_regmove_cost,
189 NAMED_PARAM (memmov_cost, 4)
190};
191
192/* A processor implementing AArch64. */
193struct processor
194{
195 const char *const name;
196 enum aarch64_processor core;
197 const char *arch;
198 const unsigned long flags;
199 const struct tune_params *const tune;
200};
201
202/* Processor cores implementing AArch64. */
203static const struct processor all_cores[] =
204{
205#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207#include "aarch64-cores.def"
208#undef AARCH64_CORE
209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210 {NULL, aarch64_none, NULL, 0, NULL}
211};
212
213/* Architectures implementing AArch64. */
214static const struct processor all_architectures[] =
215{
216#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217 {NAME, CORE, #ARCH, FLAGS, NULL},
218#include "aarch64-arches.def"
219#undef AARCH64_ARCH
220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221 {NULL, aarch64_none, NULL, 0, NULL}
222};
223
224/* Target specification. These are populated as commandline arguments
225 are processed, or NULL if not specified. */
226static const struct processor *selected_arch;
227static const struct processor *selected_cpu;
228static const struct processor *selected_tune;
229
230#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
231
232/* An ISA extension in the co-processor and main instruction set space. */
233struct aarch64_option_extension
234{
235 const char *const name;
236 const unsigned long flags_on;
237 const unsigned long flags_off;
238};
239
240/* ISA extensions in AArch64. */
241static const struct aarch64_option_extension all_extensions[] =
242{
243#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244 {NAME, FLAGS_ON, FLAGS_OFF},
245#include "aarch64-option-extensions.def"
246#undef AARCH64_OPT_EXTENSION
247 {NULL, 0, 0}
248};
249
250/* Used to track the size of an address when generating a pre/post
251 increment address. */
252static enum machine_mode aarch64_memory_reference_mode;
253
254/* Used to force GTY into this file. */
255static GTY(()) int gty_dummy;
256
257/* A table of valid AArch64 "bitmask immediate" values for
258 logical instructions. */
259
260#define AARCH64_NUM_BITMASKS 5334
261static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
262
263/* Did we set flag_omit_frame_pointer just so
264 aarch64_frame_pointer_required would be called? */
265static bool faked_omit_frame_pointer;
266
267typedef enum aarch64_cond_code
268{
269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
272}
273aarch64_cc;
274
275#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
276
277/* The condition codes of the processor, and the inverse function. */
278static const char * const aarch64_condition_codes[] =
279{
280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
282};
283
284/* Provide a mapping from gcc register numbers to dwarf register numbers. */
285unsigned
286aarch64_dbx_register_number (unsigned regno)
287{
288 if (GP_REGNUM_P (regno))
289 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290 else if (regno == SP_REGNUM)
291 return AARCH64_DWARF_SP;
292 else if (FP_REGNUM_P (regno))
293 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
294
295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296 equivalent DWARF register. */
297 return DWARF_FRAME_REGISTERS;
298}
299
300/* Return TRUE if MODE is any of the large INT modes. */
301static bool
302aarch64_vect_struct_mode_p (enum machine_mode mode)
303{
304 return mode == OImode || mode == CImode || mode == XImode;
305}
306
307/* Return TRUE if MODE is any of the vector modes. */
308static bool
309aarch64_vector_mode_p (enum machine_mode mode)
310{
311 return aarch64_vector_mode_supported_p (mode)
312 || aarch64_vect_struct_mode_p (mode);
313}
314
315/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
316static bool
317aarch64_array_mode_supported_p (enum machine_mode mode,
318 unsigned HOST_WIDE_INT nelems)
319{
320 if (TARGET_SIMD
321 && AARCH64_VALID_SIMD_QREG_MODE (mode)
322 && (nelems >= 2 && nelems <= 4))
323 return true;
324
325 return false;
326}
327
328/* Implement HARD_REGNO_NREGS. */
329
330int
331aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
332{
333 switch (aarch64_regno_regclass (regno))
334 {
335 case FP_REGS:
336 case FP_LO_REGS:
337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338 default:
339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
340 }
341 gcc_unreachable ();
342}
343
344/* Implement HARD_REGNO_MODE_OK. */
345
346int
347aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
348{
349 if (GET_MODE_CLASS (mode) == MODE_CC)
350 return regno == CC_REGNUM;
351
352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353 || regno == ARG_POINTER_REGNUM)
354 return mode == Pmode;
355
356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357 return 1;
358
359 if (FP_REGNUM_P (regno))
360 {
361 if (aarch64_vect_struct_mode_p (mode))
362 return
363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364 else
365 return 1;
366 }
367
368 return 0;
369}
370
371/* Return true if calls to DECL should be treated as
372 long-calls (ie called via a register). */
373static bool
374aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
375{
376 return false;
377}
378
379/* Return true if calls to symbol-ref SYM should be treated as
380 long-calls (ie called via a register). */
381bool
382aarch64_is_long_call_p (rtx sym)
383{
384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
385}
386
387/* Return true if the offsets to a zero/sign-extract operation
388 represent an expression that matches an extend operation. The
389 operands represent the paramters from
390
391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
392bool
393aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 rtx extract_imm)
395{
396 HOST_WIDE_INT mult_val, extract_val;
397
398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399 return false;
400
401 mult_val = INTVAL (mult_imm);
402 extract_val = INTVAL (extract_imm);
403
404 if (extract_val > 8
405 && extract_val < GET_MODE_BITSIZE (mode)
406 && exact_log2 (extract_val & ~7) > 0
407 && (extract_val & 7) <= 4
408 && mult_val == (1 << (extract_val & 7)))
409 return true;
410
411 return false;
412}
413
414/* Emit an insn that's a simple single-set. Both the operands must be
415 known to be valid. */
416inline static rtx
417emit_set_insn (rtx x, rtx y)
418{
419 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
420}
421
422/* X and Y are two things to compare using CODE. Emit the compare insn and
423 return the rtx for register 0 in the proper mode. */
424rtx
425aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
426{
427 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
429
430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431 return cc_reg;
432}
433
434/* Build the SYMBOL_REF for __tls_get_addr. */
435
436static GTY(()) rtx tls_get_addr_libfunc;
437
438rtx
439aarch64_tls_get_addr (void)
440{
441 if (!tls_get_addr_libfunc)
442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443 return tls_get_addr_libfunc;
444}
445
446/* Return the TLS model to use for ADDR. */
447
448static enum tls_model
449tls_symbolic_operand_type (rtx addr)
450{
451 enum tls_model tls_kind = TLS_MODEL_NONE;
452 rtx sym, addend;
453
454 if (GET_CODE (addr) == CONST)
455 {
456 split_const (addr, &sym, &addend);
457 if (GET_CODE (sym) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
459 }
460 else if (GET_CODE (addr) == SYMBOL_REF)
461 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
462
463 return tls_kind;
464}
465
466/* We'll allow lo_sum's in addresses in our legitimate addresses
467 so that combine would take care of combining addresses where
468 necessary, but for generation purposes, we'll generate the address
469 as :
470 RTL Absolute
471 tmp = hi (symbol_ref); adrp x1, foo
472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
473 nop
474
475 PIC TLS
476 adrp x1, :got:foo adrp tmp, :tlsgd:foo
477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
478 bl __tls_get_addr
479 nop
480
481 Load TLS symbol, depending on TLS mechanism and TLS access model.
482
483 Global Dynamic - Traditional TLS:
484 adrp tmp, :tlsgd:imm
485 add dest, tmp, #:tlsgd_lo12:imm
486 bl __tls_get_addr
487
488 Global Dynamic - TLS Descriptors:
489 adrp dest, :tlsdesc:imm
490 ldr tmp, [dest, #:tlsdesc_lo12:imm]
491 add dest, dest, #:tlsdesc_lo12:imm
492 blr tmp
493 mrs tp, tpidr_el0
494 add dest, dest, tp
495
496 Initial Exec:
497 mrs tp, tpidr_el0
498 adrp tmp, :gottprel:imm
499 ldr dest, [tmp, #:gottprel_lo12:imm]
500 add dest, dest, tp
501
502 Local Exec:
503 mrs tp, tpidr_el0
504 add t0, tp, #:tprel_hi12:imm
505 add t0, #:tprel_lo12_nc:imm
506*/
507
508static void
509aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 enum aarch64_symbol_type type)
511{
512 switch (type)
513 {
514 case SYMBOL_SMALL_ABSOLUTE:
515 {
516 rtx tmp_reg = dest;
517 if (can_create_pseudo_p ())
518 {
519 tmp_reg = gen_reg_rtx (Pmode);
520 }
521
522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 return;
525 }
526
527 case SYMBOL_SMALL_GOT:
528 {
529 rtx tmp_reg = dest;
530 if (can_create_pseudo_p ())
531 {
532 tmp_reg = gen_reg_rtx (Pmode);
533 }
534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 return;
537 }
538
539 case SYMBOL_SMALL_TLSGD:
540 {
541 rtx insns;
542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
543
544 start_sequence ();
545 emit_call_insn (gen_tlsgd_small (result, imm));
546 insns = get_insns ();
547 end_sequence ();
548
549 RTL_CONST_CALL_P (insns) = 1;
550 emit_libcall_block (insns, dest, result, imm);
551 return;
552 }
553
554 case SYMBOL_SMALL_TLSDESC:
555 {
556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 rtx tp;
558
559 emit_insn (gen_tlsdesc_small (imm));
560 tp = aarch64_load_tp (NULL);
561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 return;
564 }
565
566 case SYMBOL_SMALL_GOTTPREL:
567 {
568 rtx tmp_reg = gen_reg_rtx (Pmode);
569 rtx tp = aarch64_load_tp (NULL);
570 emit_insn (gen_tlsie_small (tmp_reg, imm));
571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 return;
574 }
575
576 case SYMBOL_SMALL_TPREL:
577 {
578 rtx tp = aarch64_load_tp (NULL);
579 emit_insn (gen_tlsle_small (dest, tp, imm));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
582 }
583
584 default:
585 gcc_unreachable ();
586 }
587}
588
589/* Emit a move from SRC to DEST. Assume that the move expanders can
590 handle all moves if !can_create_pseudo_p (). The distinction is
591 important because, unlike emit_move_insn, the move expanders know
592 how to force Pmode objects into the constant pool even when the
593 constant pool address is not itself legitimate. */
594static rtx
595aarch64_emit_move (rtx dest, rtx src)
596{
597 return (can_create_pseudo_p ()
598 ? emit_move_insn (dest, src)
599 : emit_move_insn_1 (dest, src));
600}
601
602void
603aarch64_split_128bit_move (rtx dst, rtx src)
604{
605 rtx low_dst;
606
607 gcc_assert (GET_MODE (dst) == TImode);
608
609 if (REG_P (dst) && REG_P (src))
610 {
611 int src_regno = REGNO (src);
612 int dst_regno = REGNO (dst);
613
614 gcc_assert (GET_MODE (src) == TImode);
615
616 /* Handle r -> w, w -> r. */
617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
618 {
619 emit_insn (gen_aarch64_movtilow_di (dst,
620 gen_lowpart (word_mode, src)));
621 emit_insn (gen_aarch64_movtihigh_di (dst,
622 gen_highpart (word_mode, src)));
623 return;
624 }
625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
626 {
627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 src));
629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 src));
631 return;
632 }
633 /* Fall through to r -> r cases. */
634 }
635
636 low_dst = gen_lowpart (word_mode, dst);
637 if (REG_P (low_dst)
638 && reg_overlap_mentioned_p (low_dst, src))
639 {
640 aarch64_emit_move (gen_highpart (word_mode, dst),
641 gen_highpart_mode (word_mode, TImode, src));
642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
643 }
644 else
645 {
646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647 aarch64_emit_move (gen_highpart (word_mode, dst),
648 gen_highpart_mode (word_mode, TImode, src));
649 }
650}
651
652bool
653aarch64_split_128bit_move_p (rtx dst, rtx src)
654{
655 return (! REG_P (src)
656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
657}
658
659static rtx
660aarch64_force_temporary (rtx x, rtx value)
661{
662 if (can_create_pseudo_p ())
663 return force_reg (Pmode, value);
664 else
665 {
666 x = aarch64_emit_move (x, value);
667 return x;
668 }
669}
670
671
672static rtx
673aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
674{
675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
676 {
677 rtx high;
678 /* Load the full offset into a register. This
679 might be improvable in the future. */
680 high = GEN_INT (offset);
681 offset = 0;
682 high = aarch64_force_temporary (temp, high);
683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
684 }
685 return plus_constant (mode, reg, offset);
686}
687
688void
689aarch64_expand_mov_immediate (rtx dest, rtx imm)
690{
691 enum machine_mode mode = GET_MODE (dest);
692 unsigned HOST_WIDE_INT mask;
693 int i;
694 bool first;
695 unsigned HOST_WIDE_INT val;
696 bool subtargets;
697 rtx subtarget;
698 int one_match, zero_match;
699
700 gcc_assert (mode == SImode || mode == DImode);
701
702 /* Check on what type of symbol it is. */
703 if (GET_CODE (imm) == SYMBOL_REF
704 || GET_CODE (imm) == LABEL_REF
705 || GET_CODE (imm) == CONST)
706 {
707 rtx mem, base, offset;
708 enum aarch64_symbol_type sty;
709
710 /* If we have (const (plus symbol offset)), separate out the offset
711 before we start classifying the symbol. */
712 split_const (imm, &base, &offset);
713
714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715 switch (sty)
716 {
717 case SYMBOL_FORCE_TO_MEM:
718 if (offset != const0_rtx
719 && targetm.cannot_force_const_mem (mode, imm))
720 {
721 gcc_assert(can_create_pseudo_p ());
722 base = aarch64_force_temporary (dest, base);
723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 aarch64_emit_move (dest, base);
725 return;
726 }
727 mem = force_const_mem (mode, imm);
728 gcc_assert (mem);
729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 return;
731
732 case SYMBOL_SMALL_TLSGD:
733 case SYMBOL_SMALL_TLSDESC:
734 case SYMBOL_SMALL_GOTTPREL:
735 case SYMBOL_SMALL_GOT:
736 if (offset != const0_rtx)
737 {
738 gcc_assert(can_create_pseudo_p ());
739 base = aarch64_force_temporary (dest, base);
740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 aarch64_emit_move (dest, base);
742 return;
743 }
744 /* FALLTHRU */
745
746 case SYMBOL_SMALL_TPREL:
747 case SYMBOL_SMALL_ABSOLUTE:
748 aarch64_load_symref_appropriately (dest, imm, sty);
749 return;
750
751 default:
752 gcc_unreachable ();
753 }
754 }
755
756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
757 {
758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759 return;
760 }
761
762 if (!CONST_INT_P (imm))
763 {
764 if (GET_CODE (imm) == HIGH)
765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766 else
767 {
768 rtx mem = force_const_mem (mode, imm);
769 gcc_assert (mem);
770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
771 }
772
773 return;
774 }
775
776 if (mode == SImode)
777 {
778 /* We know we can't do this in 1 insn, and we must be able to do it
779 in two; so don't mess around looking for sequences that don't buy
780 us anything. */
781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784 return;
785 }
786
787 /* Remaining cases are all for DImode. */
788
789 val = INTVAL (imm);
790 subtargets = optimize && can_create_pseudo_p ();
791
792 one_match = 0;
793 zero_match = 0;
794 mask = 0xffff;
795
796 for (i = 0; i < 64; i += 16, mask <<= 16)
797 {
798 if ((val & mask) == 0)
799 zero_match++;
800 else if ((val & mask) == mask)
801 one_match++;
802 }
803
804 if (one_match == 2)
805 {
806 mask = 0xffff;
807 for (i = 0; i < 64; i += 16, mask <<= 16)
808 {
809 if ((val & mask) != mask)
810 {
811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 GEN_INT ((val >> i) & 0xffff)));
814 return;
815 }
816 }
817 gcc_unreachable ();
818 }
819
820 if (zero_match == 2)
821 goto simple_sequence;
822
823 mask = 0x0ffff0000UL;
824 for (i = 16; i < 64; i += 16, mask <<= 16)
825 {
826 HOST_WIDE_INT comp = mask & ~(mask - 1);
827
828 if (aarch64_uimm12_shift (val - (val & mask)))
829 {
830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
831
832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 emit_insn (gen_adddi3 (dest, subtarget,
834 GEN_INT (val - (val & mask))));
835 return;
836 }
837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
838 {
839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
840
841 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 GEN_INT ((val + comp) & mask)));
843 emit_insn (gen_adddi3 (dest, subtarget,
844 GEN_INT (val - ((val + comp) & mask))));
845 return;
846 }
847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
848 {
849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
850
851 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 GEN_INT ((val - comp) | ~mask)));
853 emit_insn (gen_adddi3 (dest, subtarget,
854 GEN_INT (val - ((val - comp) | ~mask))));
855 return;
856 }
857 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
858 {
859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
860
861 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 GEN_INT (val | ~mask)));
863 emit_insn (gen_adddi3 (dest, subtarget,
864 GEN_INT (val - (val | ~mask))));
865 return;
866 }
867 }
868
869 /* See if we can do it by arithmetically combining two
870 immediates. */
871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
872 {
873 int j;
874 mask = 0xffff;
875
876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
878 {
879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 GEN_INT (aarch64_bitmasks[i])));
882 emit_insn (gen_adddi3 (dest, subtarget,
883 GEN_INT (val - aarch64_bitmasks[i])));
884 return;
885 }
886
887 for (j = 0; j < 64; j += 16, mask <<= 16)
888 {
889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
890 {
891 emit_insn (gen_rtx_SET (VOIDmode, dest,
892 GEN_INT (aarch64_bitmasks[i])));
893 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 GEN_INT ((val >> j) & 0xffff)));
895 return;
896 }
897 }
898 }
899
900 /* See if we can do it by logically combining two immediates. */
901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
902 {
903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
904 {
905 int j;
906
907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
909 {
910 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 GEN_INT (aarch64_bitmasks[i])));
913 emit_insn (gen_iordi3 (dest, subtarget,
914 GEN_INT (aarch64_bitmasks[j])));
915 return;
916 }
917 }
918 else if ((val & aarch64_bitmasks[i]) == val)
919 {
920 int j;
921
922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
924 {
925
926 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 GEN_INT (aarch64_bitmasks[j])));
929 emit_insn (gen_anddi3 (dest, subtarget,
930 GEN_INT (aarch64_bitmasks[i])));
931 return;
932 }
933 }
934 }
935
936 simple_sequence:
937 first = true;
938 mask = 0xffff;
939 for (i = 0; i < 64; i += 16, mask <<= 16)
940 {
941 if ((val & mask) != 0)
942 {
943 if (first)
944 {
945 emit_insn (gen_rtx_SET (VOIDmode, dest,
946 GEN_INT (val & mask)));
947 first = false;
948 }
949 else
950 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 GEN_INT ((val >> i) & 0xffff)));
952 }
953 }
954}
955
956static bool
957aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
958{
959 /* Indirect calls are not currently supported. */
960 if (decl == NULL)
961 return false;
962
963 /* Cannot tail-call to long-calls, since these are outside of the
964 range of a branch instruction (we could handle this if we added
965 support for indirect tail-calls. */
966 if (aarch64_decl_is_long_call_p (decl))
967 return false;
968
969 return true;
970}
971
972/* Implement TARGET_PASS_BY_REFERENCE. */
973
974static bool
975aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 enum machine_mode mode,
977 const_tree type,
978 bool named ATTRIBUTE_UNUSED)
979{
980 HOST_WIDE_INT size;
981 enum machine_mode dummymode;
982 int nregs;
983
984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
985 size = (mode == BLKmode && type)
986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
987
988 if (type)
989 {
990 /* Arrays always passed by reference. */
991 if (TREE_CODE (type) == ARRAY_TYPE)
992 return true;
993 /* Other aggregates based on their size. */
994 if (AGGREGATE_TYPE_P (type))
995 size = int_size_in_bytes (type);
996 }
997
998 /* Variable sized arguments are always returned by reference. */
999 if (size < 0)
1000 return true;
1001
1002 /* Can this be a candidate to be passed in fp/simd register(s)? */
1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 &dummymode, &nregs,
1005 NULL))
1006 return false;
1007
1008 /* Arguments which are variable sized or larger than 2 registers are
1009 passed by reference unless they are a homogenous floating point
1010 aggregate. */
1011 return size > 2 * UNITS_PER_WORD;
1012}
1013
1014/* Return TRUE if VALTYPE is padded to its least significant bits. */
1015static bool
1016aarch64_return_in_msb (const_tree valtype)
1017{
1018 enum machine_mode dummy_mode;
1019 int dummy_int;
1020
1021 /* Never happens in little-endian mode. */
1022 if (!BYTES_BIG_ENDIAN)
1023 return false;
1024
1025 /* Only composite types smaller than or equal to 16 bytes can
1026 be potentially returned in registers. */
1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028 || int_size_in_bytes (valtype) <= 0
1029 || int_size_in_bytes (valtype) > 16)
1030 return false;
1031
1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034 is always passed/returned in the least significant bits of fp/simd
1035 register(s). */
1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 &dummy_mode, &dummy_int, NULL))
1038 return false;
1039
1040 return true;
1041}
1042
1043/* Implement TARGET_FUNCTION_VALUE.
1044 Define how to find the value returned by a function. */
1045
1046static rtx
1047aarch64_function_value (const_tree type, const_tree func,
1048 bool outgoing ATTRIBUTE_UNUSED)
1049{
1050 enum machine_mode mode;
1051 int unsignedp;
1052 int count;
1053 enum machine_mode ag_mode;
1054
1055 mode = TYPE_MODE (type);
1056 if (INTEGRAL_TYPE_P (type))
1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059 if (aarch64_return_in_msb (type))
1060 {
1061 HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063 if (size % UNITS_PER_WORD != 0)
1064 {
1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067 }
1068 }
1069
1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 &ag_mode, &count, NULL))
1072 {
1073 if (!aarch64_composite_type_p (type, mode))
1074 {
1075 gcc_assert (count == 1 && mode == ag_mode);
1076 return gen_rtx_REG (mode, V0_REGNUM);
1077 }
1078 else
1079 {
1080 int i;
1081 rtx par;
1082
1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 for (i = 0; i < count; i++)
1085 {
1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 XVECEXP (par, 0, i) = tmp;
1090 }
1091 return par;
1092 }
1093 }
1094 else
1095 return gen_rtx_REG (mode, R0_REGNUM);
1096}
1097
1098/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099 Return true if REGNO is the number of a hard register in which the values
1100 of called function may come back. */
1101
1102static bool
1103aarch64_function_value_regno_p (const unsigned int regno)
1104{
1105 /* Maximum of 16 bytes can be returned in the general registers. Examples
1106 of 16-byte return values are: 128-bit integers and 16-byte small
1107 structures (excluding homogeneous floating-point aggregates). */
1108 if (regno == R0_REGNUM || regno == R1_REGNUM)
1109 return true;
1110
1111 /* Up to four fp/simd registers can return a function value, e.g. a
1112 homogeneous floating-point aggregate having four members. */
1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114 return !TARGET_GENERAL_REGS_ONLY;
1115
1116 return false;
1117}
1118
1119/* Implement TARGET_RETURN_IN_MEMORY.
1120
1121 If the type T of the result of a function is such that
1122 void func (T arg)
1123 would require that arg be passed as a value in a register (or set of
1124 registers) according to the parameter passing rules, then the result
1125 is returned in the same registers as would be used for such an
1126 argument. */
1127
1128static bool
1129aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130{
1131 HOST_WIDE_INT size;
1132 enum machine_mode ag_mode;
1133 int count;
1134
1135 if (!AGGREGATE_TYPE_P (type)
1136 && TREE_CODE (type) != COMPLEX_TYPE
1137 && TREE_CODE (type) != VECTOR_TYPE)
1138 /* Simple scalar types always returned in registers. */
1139 return false;
1140
1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 type,
1143 &ag_mode,
1144 &count,
1145 NULL))
1146 return false;
1147
1148 /* Types larger than 2 registers returned in memory. */
1149 size = int_size_in_bytes (type);
1150 return (size < 0 || size > 2 * UNITS_PER_WORD);
1151}
1152
1153static bool
1154aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 const_tree type, int *nregs)
1156{
1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158 return aarch64_vfp_is_call_or_return_candidate (mode,
1159 type,
1160 &pcum->aapcs_vfp_rmode,
1161 nregs,
1162 NULL);
1163}
1164
1165/* Given MODE and TYPE of a function argument, return the alignment in
1166 bits. The idea is to suppress any stronger alignment requested by
1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168 This is a helper function for local use only. */
1169
1170static unsigned int
1171aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172{
1173 unsigned int alignment;
1174
1175 if (type)
1176 {
1177 if (!integer_zerop (TYPE_SIZE (type)))
1178 {
1179 if (TYPE_MODE (type) == mode)
1180 alignment = TYPE_ALIGN (type);
1181 else
1182 alignment = GET_MODE_ALIGNMENT (mode);
1183 }
1184 else
1185 alignment = 0;
1186 }
1187 else
1188 alignment = GET_MODE_ALIGNMENT (mode);
1189
1190 return alignment;
1191}
1192
1193/* Layout a function argument according to the AAPCS64 rules. The rule
1194 numbers refer to the rule numbers in the AAPCS64. */
1195
1196static void
1197aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 const_tree type,
1199 bool named ATTRIBUTE_UNUSED)
1200{
1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202 int ncrn, nvrn, nregs;
1203 bool allocate_ncrn, allocate_nvrn;
1204
1205 /* We need to do this once per argument. */
1206 if (pcum->aapcs_arg_processed)
1207 return;
1208
1209 pcum->aapcs_arg_processed = true;
1210
1211 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213 mode,
1214 type,
1215 &nregs);
1216
1217 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218 The following code thus handles passing by SIMD/FP registers first. */
1219
1220 nvrn = pcum->aapcs_nvrn;
1221
1222 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223 and homogenous short-vector aggregates (HVA). */
1224 if (allocate_nvrn)
1225 {
1226 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227 {
1228 pcum->aapcs_nextnvrn = nvrn + nregs;
1229 if (!aarch64_composite_type_p (type, mode))
1230 {
1231 gcc_assert (nregs == 1);
1232 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233 }
1234 else
1235 {
1236 rtx par;
1237 int i;
1238 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239 for (i = 0; i < nregs; i++)
1240 {
1241 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242 V0_REGNUM + nvrn + i);
1243 tmp = gen_rtx_EXPR_LIST
1244 (VOIDmode, tmp,
1245 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246 XVECEXP (par, 0, i) = tmp;
1247 }
1248 pcum->aapcs_reg = par;
1249 }
1250 return;
1251 }
1252 else
1253 {
1254 /* C.3 NSRN is set to 8. */
1255 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256 goto on_stack;
1257 }
1258 }
1259
1260 ncrn = pcum->aapcs_ncrn;
1261 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263
1264
1265 /* C6 - C9. though the sign and zero extension semantics are
1266 handled elsewhere. This is the case where the argument fits
1267 entirely general registers. */
1268 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269 {
1270 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271
1272 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273
1274 /* C.8 if the argument has an alignment of 16 then the NGRN is
1275 rounded up to the next even number. */
1276 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277 {
1278 ++ncrn;
1279 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280 }
1281 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282 A reg is still generated for it, but the caller should be smart
1283 enough not to use it. */
1284 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285 {
1286 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287 }
1288 else
1289 {
1290 rtx par;
1291 int i;
1292
1293 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294 for (i = 0; i < nregs; i++)
1295 {
1296 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 GEN_INT (i * UNITS_PER_WORD));
1299 XVECEXP (par, 0, i) = tmp;
1300 }
1301 pcum->aapcs_reg = par;
1302 }
1303
1304 pcum->aapcs_nextncrn = ncrn + nregs;
1305 return;
1306 }
1307
1308 /* C.11 */
1309 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310
1311 /* The argument is passed on stack; record the needed number of words for
1312 this argument (we can re-use NREGS) and align the total size if
1313 necessary. */
1314on_stack:
1315 pcum->aapcs_stack_words = nregs;
1316 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318 16 / UNITS_PER_WORD) + 1;
1319 return;
1320}
1321
1322/* Implement TARGET_FUNCTION_ARG. */
1323
1324static rtx
1325aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326 const_tree type, bool named)
1327{
1328 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330
1331 if (mode == VOIDmode)
1332 return NULL_RTX;
1333
1334 aarch64_layout_arg (pcum_v, mode, type, named);
1335 return pcum->aapcs_reg;
1336}
1337
1338void
1339aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340 const_tree fntype ATTRIBUTE_UNUSED,
1341 rtx libname ATTRIBUTE_UNUSED,
1342 const_tree fndecl ATTRIBUTE_UNUSED,
1343 unsigned n_named ATTRIBUTE_UNUSED)
1344{
1345 pcum->aapcs_ncrn = 0;
1346 pcum->aapcs_nvrn = 0;
1347 pcum->aapcs_nextncrn = 0;
1348 pcum->aapcs_nextnvrn = 0;
1349 pcum->pcs_variant = ARM_PCS_AAPCS64;
1350 pcum->aapcs_reg = NULL_RTX;
1351 pcum->aapcs_arg_processed = false;
1352 pcum->aapcs_stack_words = 0;
1353 pcum->aapcs_stack_size = 0;
1354
1355 return;
1356}
1357
1358static void
1359aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360 enum machine_mode mode,
1361 const_tree type,
1362 bool named)
1363{
1364 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366 {
1367 aarch64_layout_arg (pcum_v, mode, type, named);
1368 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369 != (pcum->aapcs_stack_words != 0));
1370 pcum->aapcs_arg_processed = false;
1371 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374 pcum->aapcs_stack_words = 0;
1375 pcum->aapcs_reg = NULL_RTX;
1376 }
1377}
1378
1379bool
1380aarch64_function_arg_regno_p (unsigned regno)
1381{
1382 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384}
1385
1386/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1387 PARM_BOUNDARY bits of alignment, but will be given anything up
1388 to STACK_BOUNDARY bits if the type requires it. This makes sure
1389 that both before and after the layout of each argument, the Next
1390 Stacked Argument Address (NSAA) will have a minimum alignment of
1391 8 bytes. */
1392
1393static unsigned int
1394aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395{
1396 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397
1398 if (alignment < PARM_BOUNDARY)
1399 alignment = PARM_BOUNDARY;
1400 if (alignment > STACK_BOUNDARY)
1401 alignment = STACK_BOUNDARY;
1402 return alignment;
1403}
1404
1405/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406
1407 Return true if an argument passed on the stack should be padded upwards,
1408 i.e. if the least-significant byte of the stack slot has useful data.
1409
1410 Small aggregate types are placed in the lowest memory address.
1411
1412 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1413
1414bool
1415aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416{
1417 /* On little-endian targets, the least significant byte of every stack
1418 argument is passed at the lowest byte address of the stack slot. */
1419 if (!BYTES_BIG_ENDIAN)
1420 return true;
1421
1422 /* Otherwise, integral types and floating point types are padded downward:
1423 the least significant byte of a stack argument is passed at the highest
1424 byte address of the stack slot. */
1425 if (type
1426 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428 return false;
1429
1430 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1431 return true;
1432}
1433
1434/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435
1436 It specifies padding for the last (may also be the only)
1437 element of a block move between registers and memory. If
1438 assuming the block is in the memory, padding upward means that
1439 the last element is padded after its highest significant byte,
1440 while in downward padding, the last element is padded at the
1441 its least significant byte side.
1442
1443 Small aggregates and small complex types are always padded
1444 upwards.
1445
1446 We don't need to worry about homogeneous floating-point or
1447 short-vector aggregates; their move is not affected by the
1448 padding direction determined here. Regardless of endianness,
1449 each element of such an aggregate is put in the least
1450 significant bits of a fp/simd register.
1451
1452 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453 register has useful data, and return the opposite if the most
1454 significant byte does. */
1455
1456bool
1457aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458 bool first ATTRIBUTE_UNUSED)
1459{
1460
1461 /* Small composite types are always padded upward. */
1462 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463 {
1464 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465 : GET_MODE_SIZE (mode));
1466 if (size < 2 * UNITS_PER_WORD)
1467 return true;
1468 }
1469
1470 /* Otherwise, use the default padding. */
1471 return !BYTES_BIG_ENDIAN;
1472}
1473
1474static enum machine_mode
1475aarch64_libgcc_cmp_return_mode (void)
1476{
1477 return SImode;
1478}
1479
1480static bool
1481aarch64_frame_pointer_required (void)
1482{
1483 /* If the function contains dynamic stack allocations, we need to
1484 use the frame pointer to access the static parts of the frame. */
1485 if (cfun->calls_alloca)
1486 return true;
1487
1488 /* We may have turned flag_omit_frame_pointer on in order to have this
1489 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490 and we'll check it here.
1491 If we really did set flag_omit_frame_pointer normally, then we return false
1492 (no frame pointer required) in all cases. */
1493
1494 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495 return false;
1496 else if (flag_omit_leaf_frame_pointer)
1497 return !crtl->is_leaf;
1498 return true;
1499}
1500
1501/* Mark the registers that need to be saved by the callee and calculate
1502 the size of the callee-saved registers area and frame record (both FP
1503 and LR may be omitted). */
1504static void
1505aarch64_layout_frame (void)
1506{
1507 HOST_WIDE_INT offset = 0;
1508 int regno;
1509
1510 if (reload_completed && cfun->machine->frame.laid_out)
1511 return;
1512
1513 cfun->machine->frame.fp_lr_offset = 0;
1514
1515 /* First mark all the registers that really need to be saved... */
1516 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1518
1519 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520 cfun->machine->frame.reg_offset[regno] = -1;
1521
1522 /* ... that includes the eh data registers (if needed)... */
1523 if (crtl->calls_eh_return)
1524 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526
1527 /* ... and any callee saved register that dataflow says is live. */
1528 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529 if (df_regs_ever_live_p (regno)
1530 && !call_used_regs[regno])
1531 cfun->machine->frame.reg_offset[regno] = 0;
1532
1533 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534 if (df_regs_ever_live_p (regno)
1535 && !call_used_regs[regno])
1536 cfun->machine->frame.reg_offset[regno] = 0;
1537
1538 if (frame_pointer_needed)
1539 {
1540 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543 }
1544
1545 /* Now assign stack slots for them. */
1546 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547 if (cfun->machine->frame.reg_offset[regno] != -1)
1548 {
1549 cfun->machine->frame.reg_offset[regno] = offset;
1550 offset += UNITS_PER_WORD;
1551 }
1552
1553 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554 if (cfun->machine->frame.reg_offset[regno] != -1)
1555 {
1556 cfun->machine->frame.reg_offset[regno] = offset;
1557 offset += UNITS_PER_WORD;
1558 }
1559
1560 if (frame_pointer_needed)
1561 {
1562 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563 offset += UNITS_PER_WORD;
1564 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565 }
1566
1567 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568 {
1569 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570 offset += UNITS_PER_WORD;
1571 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572 }
1573
1574 cfun->machine->frame.padding0 =
1575 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577
1578 cfun->machine->frame.saved_regs_size = offset;
1579 cfun->machine->frame.laid_out = true;
1580}
1581
1582/* Make the last instruction frame-related and note that it performs
1583 the operation described by FRAME_PATTERN. */
1584
1585static void
1586aarch64_set_frame_expr (rtx frame_pattern)
1587{
1588 rtx insn;
1589
1590 insn = get_last_insn ();
1591 RTX_FRAME_RELATED_P (insn) = 1;
1592 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594 frame_pattern,
1595 REG_NOTES (insn));
1596}
1597
1598static bool
1599aarch64_register_saved_on_entry (int regno)
1600{
1601 return cfun->machine->frame.reg_offset[regno] != -1;
1602}
1603
1604
1605static void
1606aarch64_save_or_restore_fprs (int start_offset, int increment,
1607 bool restore, rtx base_rtx)
1608
1609{
1610 unsigned regno;
1611 unsigned regno2;
1612 rtx insn;
1613 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614
1615
1616 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617 {
1618 if (aarch64_register_saved_on_entry (regno))
1619 {
1620 rtx mem;
1621 mem = gen_mem_ref (DFmode,
1622 plus_constant (Pmode,
1623 base_rtx,
1624 start_offset));
1625
1626 for (regno2 = regno + 1;
1627 regno2 <= V31_REGNUM
1628 && !aarch64_register_saved_on_entry (regno2);
1629 regno2++)
1630 {
1631 /* Empty loop. */
1632 }
1633 if (regno2 <= V31_REGNUM &&
1634 aarch64_register_saved_on_entry (regno2))
1635 {
1636 rtx mem2;
1637 /* Next highest register to be saved. */
1638 mem2 = gen_mem_ref (DFmode,
1639 plus_constant
1640 (Pmode,
1641 base_rtx,
1642 start_offset + increment));
1643 if (restore == false)
1644 {
1645 insn = emit_insn
1646 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647 mem2, gen_rtx_REG (DFmode, regno2)));
1648
1649 }
1650 else
1651 {
1652 insn = emit_insn
1653 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654 gen_rtx_REG (DFmode, regno2), mem2));
1655
1656 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658 }
1659
1660 /* The first part of a frame-related parallel insn
1661 is always assumed to be relevant to the frame
1662 calculations; subsequent parts, are only
1663 frame-related if explicitly marked. */
1664 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665 1)) = 1;
1666 regno = regno2;
1667 start_offset += increment * 2;
1668 }
1669 else
1670 {
1671 if (restore == false)
1672 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673 else
1674 {
1675 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677 }
1678 start_offset += increment;
1679 }
1680 RTX_FRAME_RELATED_P (insn) = 1;
1681 }
1682 }
1683
1684}
1685
1686
1687/* offset from the stack pointer of where the saves and
1688 restore's have to happen. */
1689static void
1690aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691 bool restore)
1692{
1693 rtx insn;
1694 rtx base_rtx = stack_pointer_rtx;
1695 HOST_WIDE_INT start_offset = offset;
1696 HOST_WIDE_INT increment = UNITS_PER_WORD;
1697 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699 unsigned regno;
1700 unsigned regno2;
1701
1702 for (regno = R0_REGNUM; regno <= limit; regno++)
1703 {
1704 if (aarch64_register_saved_on_entry (regno))
1705 {
1706 rtx mem;
1707 mem = gen_mem_ref (Pmode,
1708 plus_constant (Pmode,
1709 base_rtx,
1710 start_offset));
1711
1712 for (regno2 = regno + 1;
1713 regno2 <= limit
1714 && !aarch64_register_saved_on_entry (regno2);
1715 regno2++)
1716 {
1717 /* Empty loop. */
1718 }
1719 if (regno2 <= limit &&
1720 aarch64_register_saved_on_entry (regno2))
1721 {
1722 rtx mem2;
1723 /* Next highest register to be saved. */
1724 mem2 = gen_mem_ref (Pmode,
1725 plus_constant
1726 (Pmode,
1727 base_rtx,
1728 start_offset + increment));
1729 if (restore == false)
1730 {
1731 insn = emit_insn
1732 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733 mem2, gen_rtx_REG (DImode, regno2)));
1734
1735 }
1736 else
1737 {
1738 insn = emit_insn
1739 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740 gen_rtx_REG (DImode, regno2), mem2));
1741
1742 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744 }
1745
1746 /* The first part of a frame-related parallel insn
1747 is always assumed to be relevant to the frame
1748 calculations; subsequent parts, are only
1749 frame-related if explicitly marked. */
1750 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751 1)) = 1;
1752 regno = regno2;
1753 start_offset += increment * 2;
1754 }
1755 else
1756 {
1757 if (restore == false)
1758 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759 else
1760 {
1761 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763 }
1764 start_offset += increment;
1765 }
1766 RTX_FRAME_RELATED_P (insn) = 1;
1767 }
1768 }
1769
1770 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771
1772}
1773
1774/* AArch64 stack frames generated by this compiler look like:
1775
1776 +-------------------------------+
1777 | |
1778 | incoming stack arguments |
1779 | |
1780 +-------------------------------+ <-- arg_pointer_rtx
1781 | |
1782 | callee-allocated save area |
1783 | for register varargs |
1784 | |
1785 +-------------------------------+
1786 | |
1787 | local variables |
1788 | |
1789 +-------------------------------+ <-- frame_pointer_rtx
1790 | |
1791 | callee-saved registers |
1792 | |
1793 +-------------------------------+
1794 | LR' |
1795 +-------------------------------+
1796 | FP' |
1797 P +-------------------------------+ <-- hard_frame_pointer_rtx
1798 | dynamic allocation |
1799 +-------------------------------+
1800 | |
1801 | outgoing stack arguments |
1802 | |
1803 +-------------------------------+ <-- stack_pointer_rtx
1804
1805 Dynamic stack allocations such as alloca insert data at point P.
1806 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807 hard_frame_pointer_rtx unchanged. */
1808
1809/* Generate the prologue instructions for entry into a function.
1810 Establish the stack frame by decreasing the stack pointer with a
1811 properly calculated size and, if necessary, create a frame record
1812 filled with the values of LR and previous frame pointer. The
1813 current FP is also set up is it is in use. */
1814
1815void
1816aarch64_expand_prologue (void)
1817{
1818 /* sub sp, sp, #<frame_size>
1819 stp {fp, lr}, [sp, #<frame_size> - 16]
1820 add fp, sp, #<frame_size> - hardfp_offset
1821 stp {cs_reg}, [fp, #-16] etc.
1822
1823 sub sp, sp, <final_adjustment_if_any>
1824 */
1825 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1826 HOST_WIDE_INT frame_size, offset;
1827 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1828 rtx insn;
1829
1830 aarch64_layout_frame ();
1831 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835 + crtl->outgoing_args_size);
1836 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837 STACK_BOUNDARY / BITS_PER_UNIT);
1838
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = frame_size;
1841
1842 fp_offset = (offset
1843 - original_frame_size
1844 - cfun->machine->frame.saved_regs_size);
1845
44c0e7b9 1846 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
1847 if (offset >= 512)
1848 {
1849 /* When the frame has a large size, an initial decrease is done on
1850 the stack pointer to jump over the callee-allocated save area for
1851 register varargs, the local variable area and/or the callee-saved
1852 register area. This will allow the pre-index write-back
1853 store pair instructions to be used for setting up the stack frame
1854 efficiently. */
1855 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856 if (offset >= 512)
1857 offset = cfun->machine->frame.saved_regs_size;
1858
1859 frame_size -= (offset + crtl->outgoing_args_size);
1860 fp_offset = 0;
1861
1862 if (frame_size >= 0x1000000)
1863 {
1864 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865 emit_move_insn (op0, GEN_INT (-frame_size));
1866 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867 aarch64_set_frame_expr (gen_rtx_SET
1868 (Pmode, stack_pointer_rtx,
1869 gen_rtx_PLUS (Pmode,
1870 stack_pointer_rtx,
1871 GEN_INT (-frame_size))));
1872 }
1873 else if (frame_size > 0)
1874 {
1875 if ((frame_size & 0xfff) != frame_size)
1876 {
1877 insn = emit_insn (gen_add2_insn
1878 (stack_pointer_rtx,
1879 GEN_INT (-(frame_size
1880 & ~(HOST_WIDE_INT)0xfff))));
1881 RTX_FRAME_RELATED_P (insn) = 1;
1882 }
1883 if ((frame_size & 0xfff) != 0)
1884 {
1885 insn = emit_insn (gen_add2_insn
1886 (stack_pointer_rtx,
1887 GEN_INT (-(frame_size
1888 & (HOST_WIDE_INT)0xfff))));
1889 RTX_FRAME_RELATED_P (insn) = 1;
1890 }
1891 }
1892 }
1893 else
1894 frame_size = -1;
1895
1896 if (offset > 0)
1897 {
1898 /* Save the frame pointer and lr if the frame pointer is needed
1899 first. Make the frame pointer point to the location of the
1900 old frame pointer on the stack. */
1901 if (frame_pointer_needed)
1902 {
1903 rtx mem_fp, mem_lr;
1904
1905 if (fp_offset)
1906 {
1907 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908 GEN_INT (-offset)));
1909 RTX_FRAME_RELATED_P (insn) = 1;
1910 aarch64_set_frame_expr (gen_rtx_SET
1911 (Pmode, stack_pointer_rtx,
1912 gen_rtx_MINUS (Pmode,
1913 stack_pointer_rtx,
1914 GEN_INT (offset))));
1915 mem_fp = gen_frame_mem (DImode,
1916 plus_constant (Pmode,
1917 stack_pointer_rtx,
1918 fp_offset));
1919 mem_lr = gen_frame_mem (DImode,
1920 plus_constant (Pmode,
1921 stack_pointer_rtx,
1922 fp_offset
1923 + UNITS_PER_WORD));
1924 insn = emit_insn (gen_store_pairdi (mem_fp,
1925 hard_frame_pointer_rtx,
1926 mem_lr,
1927 gen_rtx_REG (DImode,
1928 LR_REGNUM)));
1929 }
1930 else
1931 {
1932 insn = emit_insn (gen_storewb_pairdi_di
1933 (stack_pointer_rtx, stack_pointer_rtx,
1934 hard_frame_pointer_rtx,
1935 gen_rtx_REG (DImode, LR_REGNUM),
1936 GEN_INT (-offset),
1937 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939 }
1940
1941 /* The first part of a frame-related parallel insn is always
1942 assumed to be relevant to the frame calculations;
1943 subsequent parts, are only frame-related if explicitly
1944 marked. */
1945 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946 RTX_FRAME_RELATED_P (insn) = 1;
1947
1948 /* Set up frame pointer to point to the location of the
1949 previous frame pointer on the stack. */
1950 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951 stack_pointer_rtx,
1952 GEN_INT (fp_offset)));
1953 aarch64_set_frame_expr (gen_rtx_SET
1954 (Pmode, hard_frame_pointer_rtx,
1955 gen_rtx_PLUS (Pmode,
1956 stack_pointer_rtx,
1957 GEN_INT (fp_offset))));
1958 RTX_FRAME_RELATED_P (insn) = 1;
1959 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960 hard_frame_pointer_rtx));
1961 }
1962 else
1963 {
1964 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965 GEN_INT (-offset)));
1966 RTX_FRAME_RELATED_P (insn) = 1;
1967 }
1968
1969 aarch64_save_or_restore_callee_save_registers
1970 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971 }
1972
1973 /* when offset >= 512,
1974 sub sp, sp, #<outgoing_args_size> */
1975 if (frame_size > -1)
1976 {
1977 if (crtl->outgoing_args_size > 0)
1978 {
1979 insn = emit_insn (gen_add2_insn
1980 (stack_pointer_rtx,
1981 GEN_INT (- crtl->outgoing_args_size)));
1982 RTX_FRAME_RELATED_P (insn) = 1;
1983 }
1984 }
1985}
1986
1987/* Generate the epilogue instructions for returning from a function. */
1988void
1989aarch64_expand_epilogue (bool for_sibcall)
1990{
1991 HOST_WIDE_INT original_frame_size, frame_size, offset;
1992 HOST_WIDE_INT fp_offset;
1993 rtx insn;
44c0e7b9 1994 rtx cfa_reg;
43e9d192
IB
1995
1996 aarch64_layout_frame ();
1997 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999 + crtl->outgoing_args_size);
2000 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001 STACK_BOUNDARY / BITS_PER_UNIT);
2002
2003 fp_offset = (offset
2004 - original_frame_size
2005 - cfun->machine->frame.saved_regs_size);
2006
44c0e7b9
YZ
2007 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008
2009 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2010 if (offset >= 512)
2011 {
2012 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013 if (offset >= 512)
2014 offset = cfun->machine->frame.saved_regs_size;
2015
2016 frame_size -= (offset + crtl->outgoing_args_size);
2017 fp_offset = 0;
2018 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019 {
2020 insn = emit_insn (gen_add2_insn
2021 (stack_pointer_rtx,
2022 GEN_INT (crtl->outgoing_args_size)));
2023 RTX_FRAME_RELATED_P (insn) = 1;
2024 }
2025 }
2026 else
2027 frame_size = -1;
2028
2029 /* If there were outgoing arguments or we've done dynamic stack
2030 allocation, then restore the stack pointer from the frame
2031 pointer. This is at most one insn and more efficient than using
2032 GCC's internal mechanism. */
2033 if (frame_pointer_needed
2034 && (crtl->outgoing_args_size || cfun->calls_alloca))
2035 {
2036 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037 hard_frame_pointer_rtx,
2038 GEN_INT (- fp_offset)));
2039 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2040 /* As SP is set to (FP - fp_offset), according to the rules in
2041 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042 from the value of SP from now on. */
2043 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2044 }
2045
2046 aarch64_save_or_restore_callee_save_registers
2047 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048
2049 /* Restore the frame pointer and lr if the frame pointer is needed. */
2050 if (offset > 0)
2051 {
2052 if (frame_pointer_needed)
2053 {
2054 rtx mem_fp, mem_lr;
2055
2056 if (fp_offset)
2057 {
2058 mem_fp = gen_frame_mem (DImode,
2059 plus_constant (Pmode,
2060 stack_pointer_rtx,
2061 fp_offset));
2062 mem_lr = gen_frame_mem (DImode,
2063 plus_constant (Pmode,
2064 stack_pointer_rtx,
2065 fp_offset
2066 + UNITS_PER_WORD));
2067 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068 mem_fp,
2069 gen_rtx_REG (DImode,
2070 LR_REGNUM),
2071 mem_lr));
2072 }
2073 else
2074 {
2075 insn = emit_insn (gen_loadwb_pairdi_di
2076 (stack_pointer_rtx,
2077 stack_pointer_rtx,
2078 hard_frame_pointer_rtx,
2079 gen_rtx_REG (DImode, LR_REGNUM),
2080 GEN_INT (offset),
2081 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2083 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2085 plus_constant (Pmode, cfa_reg,
2086 offset))));
43e9d192
IB
2087 }
2088
2089 /* The first part of a frame-related parallel insn
2090 is always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096 add_reg_note (insn, REG_CFA_RESTORE,
2097 gen_rtx_REG (DImode, LR_REGNUM));
2098
2099 if (fp_offset)
2100 {
2101 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102 GEN_INT (offset)));
2103 RTX_FRAME_RELATED_P (insn) = 1;
2104 }
2105 }
43e9d192
IB
2106 else
2107 {
2108 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109 GEN_INT (offset)));
2110 RTX_FRAME_RELATED_P (insn) = 1;
2111 }
2112 }
2113
2114 /* Stack adjustment for exception handler. */
2115 if (crtl->calls_eh_return)
2116 {
2117 /* We need to unwind the stack by the offset computed by
2118 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2119 based on SP. Ideally we would update the SP and define the
2120 CFA along the lines of:
2121
2122 SP = SP + EH_RETURN_STACKADJ_RTX
2123 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124
2125 However the dwarf emitter only understands a constant
2126 register offset.
2127
2128 The solution choosen here is to use the otherwise unused IP0
2129 as a temporary register to hold the current SP value. The
2130 CFA is described using IP0 then SP is modified. */
2131
2132 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133
2134 insn = emit_move_insn (ip0, stack_pointer_rtx);
2135 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136 RTX_FRAME_RELATED_P (insn) = 1;
2137
2138 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139
2140 /* Ensure the assignment to IP0 does not get optimized away. */
2141 emit_use (ip0);
2142 }
2143
2144 if (frame_size > -1)
2145 {
2146 if (frame_size >= 0x1000000)
2147 {
2148 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149 emit_move_insn (op0, GEN_INT (frame_size));
2150 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151 aarch64_set_frame_expr (gen_rtx_SET
2152 (Pmode, stack_pointer_rtx,
2153 gen_rtx_PLUS (Pmode,
2154 stack_pointer_rtx,
2155 GEN_INT (frame_size))));
2156 }
2157 else if (frame_size > 0)
2158 {
2159 if ((frame_size & 0xfff) != 0)
2160 {
2161 insn = emit_insn (gen_add2_insn
2162 (stack_pointer_rtx,
2163 GEN_INT ((frame_size
2164 & (HOST_WIDE_INT) 0xfff))));
2165 RTX_FRAME_RELATED_P (insn) = 1;
2166 }
2167 if ((frame_size & 0xfff) != frame_size)
2168 {
2169 insn = emit_insn (gen_add2_insn
2170 (stack_pointer_rtx,
2171 GEN_INT ((frame_size
2172 & ~ (HOST_WIDE_INT) 0xfff))));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2174 }
2175 }
2176
2177 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178 gen_rtx_PLUS (Pmode,
2179 stack_pointer_rtx,
2180 GEN_INT (offset))));
2181 }
2182
2183 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184 if (!for_sibcall)
2185 emit_jump_insn (ret_rtx);
2186}
2187
2188/* Return the place to copy the exception unwinding return address to.
2189 This will probably be a stack slot, but could (in theory be the
2190 return register). */
2191rtx
2192aarch64_final_eh_return_addr (void)
2193{
2194 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195 aarch64_layout_frame ();
2196 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 + crtl->outgoing_args_size);
2199 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 STACK_BOUNDARY / BITS_PER_UNIT);
2201 fp_offset = offset
2202 - original_frame_size
2203 - cfun->machine->frame.saved_regs_size;
2204
2205 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206 return gen_rtx_REG (DImode, LR_REGNUM);
2207
2208 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2209 result in a store to save LR introduced by builtin_eh_return () being
2210 incorrectly deleted because the alias is not detected.
2211 So in the calculation of the address to copy the exception unwinding
2212 return address to, we note 2 cases.
2213 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214 we return a SP-relative location since all the addresses are SP-relative
2215 in this case. This prevents the store from being optimized away.
2216 If the fp_offset is not 0, then the addresses will be FP-relative and
2217 therefore we return a FP-relative location. */
2218
2219 if (frame_pointer_needed)
2220 {
2221 if (fp_offset)
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224 else
2225 return gen_frame_mem (DImode,
2226 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227 }
2228
2229 /* If FP is not needed, we calculate the location of LR, which would be
2230 at the top of the saved registers block. */
2231
2232 return gen_frame_mem (DImode,
2233 plus_constant (Pmode,
2234 stack_pointer_rtx,
2235 fp_offset
2236 + cfun->machine->frame.saved_regs_size
2237 - 2 * UNITS_PER_WORD));
2238}
2239
2240/* Output code to build up a constant in a register. */
2241static void
d9600ae5 2242aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2243{
2244 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2245 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2246 else
2247 {
2248 int i;
2249 int ncount = 0;
2250 int zcount = 0;
2251 HOST_WIDE_INT valp = val >> 16;
2252 HOST_WIDE_INT valm;
2253 HOST_WIDE_INT tval;
2254
2255 for (i = 16; i < 64; i += 16)
2256 {
2257 valm = (valp & 0xffff);
2258
2259 if (valm != 0)
2260 ++ zcount;
2261
2262 if (valm != 0xffff)
2263 ++ ncount;
2264
2265 valp >>= 16;
2266 }
2267
2268 /* zcount contains the number of additional MOVK instructions
2269 required if the constant is built up with an initial MOVZ instruction,
2270 while ncount is the number of MOVK instructions required if starting
2271 with a MOVN instruction. Choose the sequence that yields the fewest
2272 number of instructions, preferring MOVZ instructions when they are both
2273 the same. */
2274 if (ncount < zcount)
2275 {
d9600ae5
SN
2276 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277 GEN_INT ((~val) & 0xffff));
43e9d192
IB
2278 tval = 0xffff;
2279 }
2280 else
2281 {
d9600ae5
SN
2282 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283 GEN_INT (val & 0xffff));
43e9d192
IB
2284 tval = 0;
2285 }
2286
2287 val >>= 16;
2288
2289 for (i = 16; i < 64; i += 16)
2290 {
2291 if ((val & 0xffff) != tval)
d9600ae5
SN
2292 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2294 val >>= 16;
2295 }
2296 }
2297}
2298
2299static void
d9600ae5 2300aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2301{
2302 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2303 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2305
2306 if (mdelta < 0)
2307 mdelta = -mdelta;
2308
2309 if (mdelta >= 4096 * 4096)
2310 {
d9600ae5
SN
2311 aarch64_build_constant (scratchreg, delta);
2312 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2313 }
2314 else if (mdelta > 0)
2315 {
43e9d192 2316 if (mdelta >= 4096)
d9600ae5
SN
2317 {
2318 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320 if (delta < 0)
2321 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323 else
2324 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326 }
43e9d192 2327 if (mdelta % 4096 != 0)
d9600ae5
SN
2328 {
2329 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332 }
43e9d192
IB
2333 }
2334}
2335
2336/* Output code to add DELTA to the first argument, and then jump
2337 to FUNCTION. Used for C++ multiple inheritance. */
2338static void
2339aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340 HOST_WIDE_INT delta,
2341 HOST_WIDE_INT vcall_offset,
2342 tree function)
2343{
2344 /* The this pointer is always in x0. Note that this differs from
2345 Arm where the this pointer maybe bumped to r1 if r0 is required
2346 to return a pointer to an aggregate. On AArch64 a result value
2347 pointer will be in x8. */
2348 int this_regno = R0_REGNUM;
75f1d6fc 2349 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2350
75f1d6fc
SN
2351 reload_completed = 1;
2352 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2353
2354 if (vcall_offset == 0)
d9600ae5 2355 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2356 else
2357 {
2358 gcc_assert ((vcall_offset & 0x7) == 0);
2359
75f1d6fc
SN
2360 this_rtx = gen_rtx_REG (Pmode, this_regno);
2361 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2363
75f1d6fc
SN
2364 addr = this_rtx;
2365 if (delta != 0)
2366 {
2367 if (delta >= -256 && delta < 256)
2368 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369 plus_constant (Pmode, this_rtx, delta));
2370 else
d9600ae5 2371 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2372 }
2373
75f1d6fc
SN
2374 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375
43e9d192 2376 if (vcall_offset >= -256 && vcall_offset < 32768)
75f1d6fc 2377 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2378 else
2379 {
d9600ae5 2380 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2381 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2382 }
2383
75f1d6fc
SN
2384 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2386 }
2387
75f1d6fc
SN
2388 /* Generate a tail call to the target function. */
2389 if (!TREE_USED (function))
2390 {
2391 assemble_external (function);
2392 TREE_USED (function) = 1;
2393 }
2394 funexp = XEXP (DECL_RTL (function), 0);
2395 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397 SIBLING_CALL_P (insn) = 1;
2398
2399 insn = get_insns ();
2400 shorten_branches (insn);
2401 final_start_function (insn, file, 1);
2402 final (insn, file, 1);
43e9d192 2403 final_end_function ();
75f1d6fc
SN
2404
2405 /* Stop pretending to be a post-reload pass. */
2406 reload_completed = 0;
43e9d192
IB
2407}
2408
43e9d192
IB
2409static int
2410aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411{
2412 if (GET_CODE (*x) == SYMBOL_REF)
2413 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414
2415 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416 TLS offsets, not real symbol references. */
2417 if (GET_CODE (*x) == UNSPEC
2418 && XINT (*x, 1) == UNSPEC_TLS)
2419 return -1;
2420
2421 return 0;
2422}
2423
2424static bool
2425aarch64_tls_referenced_p (rtx x)
2426{
2427 if (!TARGET_HAVE_TLS)
2428 return false;
2429
2430 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431}
2432
2433
2434static int
2435aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436{
2437 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439
2440 if (*imm1 < *imm2)
2441 return -1;
2442 if (*imm1 > *imm2)
2443 return +1;
2444 return 0;
2445}
2446
2447
2448static void
2449aarch64_build_bitmask_table (void)
2450{
2451 unsigned HOST_WIDE_INT mask, imm;
2452 unsigned int log_e, e, s, r;
2453 unsigned int nimms = 0;
2454
2455 for (log_e = 1; log_e <= 6; log_e++)
2456 {
2457 e = 1 << log_e;
2458 if (e == 64)
2459 mask = ~(HOST_WIDE_INT) 0;
2460 else
2461 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462 for (s = 1; s < e; s++)
2463 {
2464 for (r = 0; r < e; r++)
2465 {
2466 /* set s consecutive bits to 1 (s < 64) */
2467 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468 /* rotate right by r */
2469 if (r != 0)
2470 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471 /* replicate the constant depending on SIMD size */
2472 switch (log_e) {
2473 case 1: imm |= (imm << 2);
2474 case 2: imm |= (imm << 4);
2475 case 3: imm |= (imm << 8);
2476 case 4: imm |= (imm << 16);
2477 case 5: imm |= (imm << 32);
2478 case 6:
2479 break;
2480 default:
2481 gcc_unreachable ();
2482 }
2483 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484 aarch64_bitmasks[nimms++] = imm;
2485 }
2486 }
2487 }
2488
2489 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491 aarch64_bitmasks_cmp);
2492}
2493
2494
2495/* Return true if val can be encoded as a 12-bit unsigned immediate with
2496 a left shift of 0 or 12 bits. */
2497bool
2498aarch64_uimm12_shift (HOST_WIDE_INT val)
2499{
2500 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502 );
2503}
2504
2505
2506/* Return true if val is an immediate that can be loaded into a
2507 register by a MOVZ instruction. */
2508static bool
2509aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510{
2511 if (GET_MODE_SIZE (mode) > 4)
2512 {
2513 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515 return 1;
2516 }
2517 else
2518 {
2519 /* Ignore sign extension. */
2520 val &= (HOST_WIDE_INT) 0xffffffff;
2521 }
2522 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524}
2525
2526
2527/* Return true if val is a valid bitmask immediate. */
2528bool
2529aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530{
2531 if (GET_MODE_SIZE (mode) < 8)
2532 {
2533 /* Replicate bit pattern. */
2534 val &= (HOST_WIDE_INT) 0xffffffff;
2535 val |= val << 32;
2536 }
2537 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539}
2540
2541
2542/* Return true if val is an immediate that can be loaded into a
2543 register in a single instruction. */
2544bool
2545aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546{
2547 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548 return 1;
2549 return aarch64_bitmask_imm (val, mode);
2550}
2551
2552static bool
2553aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554{
2555 rtx base, offset;
2556 if (GET_CODE (x) == HIGH)
2557 return true;
2558
2559 split_const (x, &base, &offset);
2560 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562
2563 return aarch64_tls_referenced_p (x);
2564}
2565
2566/* Return true if register REGNO is a valid index register.
2567 STRICT_P is true if REG_OK_STRICT is in effect. */
2568
2569bool
2570aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571{
2572 if (!HARD_REGISTER_NUM_P (regno))
2573 {
2574 if (!strict_p)
2575 return true;
2576
2577 if (!reg_renumber)
2578 return false;
2579
2580 regno = reg_renumber[regno];
2581 }
2582 return GP_REGNUM_P (regno);
2583}
2584
2585/* Return true if register REGNO is a valid base register for mode MODE.
2586 STRICT_P is true if REG_OK_STRICT is in effect. */
2587
2588bool
2589aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590{
2591 if (!HARD_REGISTER_NUM_P (regno))
2592 {
2593 if (!strict_p)
2594 return true;
2595
2596 if (!reg_renumber)
2597 return false;
2598
2599 regno = reg_renumber[regno];
2600 }
2601
2602 /* The fake registers will be eliminated to either the stack or
2603 hard frame pointer, both of which are usually valid base registers.
2604 Reload deals with the cases where the eliminated form isn't valid. */
2605 return (GP_REGNUM_P (regno)
2606 || regno == SP_REGNUM
2607 || regno == FRAME_POINTER_REGNUM
2608 || regno == ARG_POINTER_REGNUM);
2609}
2610
2611/* Return true if X is a valid base register for mode MODE.
2612 STRICT_P is true if REG_OK_STRICT is in effect. */
2613
2614static bool
2615aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616{
2617 if (!strict_p && GET_CODE (x) == SUBREG)
2618 x = SUBREG_REG (x);
2619
2620 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621}
2622
2623/* Return true if address offset is a valid index. If it is, fill in INFO
2624 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2625
2626static bool
2627aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628 enum machine_mode mode, bool strict_p)
2629{
2630 enum aarch64_address_type type;
2631 rtx index;
2632 int shift;
2633
2634 /* (reg:P) */
2635 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636 && GET_MODE (x) == Pmode)
2637 {
2638 type = ADDRESS_REG_REG;
2639 index = x;
2640 shift = 0;
2641 }
2642 /* (sign_extend:DI (reg:SI)) */
2643 else if ((GET_CODE (x) == SIGN_EXTEND
2644 || GET_CODE (x) == ZERO_EXTEND)
2645 && GET_MODE (x) == DImode
2646 && GET_MODE (XEXP (x, 0)) == SImode)
2647 {
2648 type = (GET_CODE (x) == SIGN_EXTEND)
2649 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650 index = XEXP (x, 0);
2651 shift = 0;
2652 }
2653 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654 else if (GET_CODE (x) == MULT
2655 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657 && GET_MODE (XEXP (x, 0)) == DImode
2658 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659 && CONST_INT_P (XEXP (x, 1)))
2660 {
2661 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663 index = XEXP (XEXP (x, 0), 0);
2664 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665 }
2666 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667 else if (GET_CODE (x) == ASHIFT
2668 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670 && GET_MODE (XEXP (x, 0)) == DImode
2671 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672 && CONST_INT_P (XEXP (x, 1)))
2673 {
2674 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676 index = XEXP (XEXP (x, 0), 0);
2677 shift = INTVAL (XEXP (x, 1));
2678 }
2679 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680 else if ((GET_CODE (x) == SIGN_EXTRACT
2681 || GET_CODE (x) == ZERO_EXTRACT)
2682 && GET_MODE (x) == DImode
2683 && GET_CODE (XEXP (x, 0)) == MULT
2684 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686 {
2687 type = (GET_CODE (x) == SIGN_EXTRACT)
2688 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689 index = XEXP (XEXP (x, 0), 0);
2690 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691 if (INTVAL (XEXP (x, 1)) != 32 + shift
2692 || INTVAL (XEXP (x, 2)) != 0)
2693 shift = -1;
2694 }
2695 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696 (const_int 0xffffffff<<shift)) */
2697 else if (GET_CODE (x) == AND
2698 && GET_MODE (x) == DImode
2699 && GET_CODE (XEXP (x, 0)) == MULT
2700 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702 && CONST_INT_P (XEXP (x, 1)))
2703 {
2704 type = ADDRESS_REG_UXTW;
2705 index = XEXP (XEXP (x, 0), 0);
2706 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708 shift = -1;
2709 }
2710 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711 else if ((GET_CODE (x) == SIGN_EXTRACT
2712 || GET_CODE (x) == ZERO_EXTRACT)
2713 && GET_MODE (x) == DImode
2714 && GET_CODE (XEXP (x, 0)) == ASHIFT
2715 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717 {
2718 type = (GET_CODE (x) == SIGN_EXTRACT)
2719 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720 index = XEXP (XEXP (x, 0), 0);
2721 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722 if (INTVAL (XEXP (x, 1)) != 32 + shift
2723 || INTVAL (XEXP (x, 2)) != 0)
2724 shift = -1;
2725 }
2726 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727 (const_int 0xffffffff<<shift)) */
2728 else if (GET_CODE (x) == AND
2729 && GET_MODE (x) == DImode
2730 && GET_CODE (XEXP (x, 0)) == ASHIFT
2731 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733 && CONST_INT_P (XEXP (x, 1)))
2734 {
2735 type = ADDRESS_REG_UXTW;
2736 index = XEXP (XEXP (x, 0), 0);
2737 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739 shift = -1;
2740 }
2741 /* (mult:P (reg:P) (const_int scale)) */
2742 else if (GET_CODE (x) == MULT
2743 && GET_MODE (x) == Pmode
2744 && GET_MODE (XEXP (x, 0)) == Pmode
2745 && CONST_INT_P (XEXP (x, 1)))
2746 {
2747 type = ADDRESS_REG_REG;
2748 index = XEXP (x, 0);
2749 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750 }
2751 /* (ashift:P (reg:P) (const_int shift)) */
2752 else if (GET_CODE (x) == ASHIFT
2753 && GET_MODE (x) == Pmode
2754 && GET_MODE (XEXP (x, 0)) == Pmode
2755 && CONST_INT_P (XEXP (x, 1)))
2756 {
2757 type = ADDRESS_REG_REG;
2758 index = XEXP (x, 0);
2759 shift = INTVAL (XEXP (x, 1));
2760 }
2761 else
2762 return false;
2763
2764 if (GET_CODE (index) == SUBREG)
2765 index = SUBREG_REG (index);
2766
2767 if ((shift == 0 ||
2768 (shift > 0 && shift <= 3
2769 && (1 << shift) == GET_MODE_SIZE (mode)))
2770 && REG_P (index)
2771 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772 {
2773 info->type = type;
2774 info->offset = index;
2775 info->shift = shift;
2776 return true;
2777 }
2778
2779 return false;
2780}
2781
2782static inline bool
2783offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784{
2785 return (offset >= -64 * GET_MODE_SIZE (mode)
2786 && offset < 64 * GET_MODE_SIZE (mode)
2787 && offset % GET_MODE_SIZE (mode) == 0);
2788}
2789
2790static inline bool
2791offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792 HOST_WIDE_INT offset)
2793{
2794 return offset >= -256 && offset < 256;
2795}
2796
2797static inline bool
2798offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799{
2800 return (offset >= 0
2801 && offset < 4096 * GET_MODE_SIZE (mode)
2802 && offset % GET_MODE_SIZE (mode) == 0);
2803}
2804
2805/* Return true if X is a valid address for machine mode MODE. If it is,
2806 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2807 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2808
2809static bool
2810aarch64_classify_address (struct aarch64_address_info *info,
2811 rtx x, enum machine_mode mode,
2812 RTX_CODE outer_code, bool strict_p)
2813{
2814 enum rtx_code code = GET_CODE (x);
2815 rtx op0, op1;
2816 bool allow_reg_index_p =
2817 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818
2819 /* Don't support anything other than POST_INC or REG addressing for
2820 AdvSIMD. */
2821 if (aarch64_vector_mode_p (mode)
2822 && (code != POST_INC && code != REG))
2823 return false;
2824
2825 switch (code)
2826 {
2827 case REG:
2828 case SUBREG:
2829 info->type = ADDRESS_REG_IMM;
2830 info->base = x;
2831 info->offset = const0_rtx;
2832 return aarch64_base_register_rtx_p (x, strict_p);
2833
2834 case PLUS:
2835 op0 = XEXP (x, 0);
2836 op1 = XEXP (x, 1);
2837 if (GET_MODE_SIZE (mode) != 0
2838 && CONST_INT_P (op1)
2839 && aarch64_base_register_rtx_p (op0, strict_p))
2840 {
2841 HOST_WIDE_INT offset = INTVAL (op1);
2842
2843 info->type = ADDRESS_REG_IMM;
2844 info->base = op0;
2845 info->offset = op1;
2846
2847 /* TImode and TFmode values are allowed in both pairs of X
2848 registers and individual Q registers. The available
2849 address modes are:
2850 X,X: 7-bit signed scaled offset
2851 Q: 9-bit signed offset
2852 We conservatively require an offset representable in either mode.
2853 */
2854 if (mode == TImode || mode == TFmode)
2855 return (offset_7bit_signed_scaled_p (mode, offset)
2856 && offset_9bit_signed_unscaled_p (mode, offset));
2857
2858 if (outer_code == PARALLEL)
2859 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860 && offset_7bit_signed_scaled_p (mode, offset));
2861 else
2862 return (offset_9bit_signed_unscaled_p (mode, offset)
2863 || offset_12bit_unsigned_scaled_p (mode, offset));
2864 }
2865
2866 if (allow_reg_index_p)
2867 {
2868 /* Look for base + (scaled/extended) index register. */
2869 if (aarch64_base_register_rtx_p (op0, strict_p)
2870 && aarch64_classify_index (info, op1, mode, strict_p))
2871 {
2872 info->base = op0;
2873 return true;
2874 }
2875 if (aarch64_base_register_rtx_p (op1, strict_p)
2876 && aarch64_classify_index (info, op0, mode, strict_p))
2877 {
2878 info->base = op1;
2879 return true;
2880 }
2881 }
2882
2883 return false;
2884
2885 case POST_INC:
2886 case POST_DEC:
2887 case PRE_INC:
2888 case PRE_DEC:
2889 info->type = ADDRESS_REG_WB;
2890 info->base = XEXP (x, 0);
2891 info->offset = NULL_RTX;
2892 return aarch64_base_register_rtx_p (info->base, strict_p);
2893
2894 case POST_MODIFY:
2895 case PRE_MODIFY:
2896 info->type = ADDRESS_REG_WB;
2897 info->base = XEXP (x, 0);
2898 if (GET_CODE (XEXP (x, 1)) == PLUS
2899 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901 && aarch64_base_register_rtx_p (info->base, strict_p))
2902 {
2903 HOST_WIDE_INT offset;
2904 info->offset = XEXP (XEXP (x, 1), 1);
2905 offset = INTVAL (info->offset);
2906
2907 /* TImode and TFmode values are allowed in both pairs of X
2908 registers and individual Q registers. The available
2909 address modes are:
2910 X,X: 7-bit signed scaled offset
2911 Q: 9-bit signed offset
2912 We conservatively require an offset representable in either mode.
2913 */
2914 if (mode == TImode || mode == TFmode)
2915 return (offset_7bit_signed_scaled_p (mode, offset)
2916 && offset_9bit_signed_unscaled_p (mode, offset));
2917
2918 if (outer_code == PARALLEL)
2919 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920 && offset_7bit_signed_scaled_p (mode, offset));
2921 else
2922 return offset_9bit_signed_unscaled_p (mode, offset);
2923 }
2924 return false;
2925
2926 case CONST:
2927 case SYMBOL_REF:
2928 case LABEL_REF:
2929 /* load literal: pc-relative constant pool entry. */
2930 info->type = ADDRESS_SYMBOLIC;
2931 if (outer_code != PARALLEL)
2932 {
2933 rtx sym, addend;
2934
2935 split_const (x, &sym, &addend);
2936 return (GET_CODE (sym) == LABEL_REF
2937 || (GET_CODE (sym) == SYMBOL_REF
2938 && CONSTANT_POOL_ADDRESS_P (sym)));
2939 }
2940 return false;
2941
2942 case LO_SUM:
2943 info->type = ADDRESS_LO_SUM;
2944 info->base = XEXP (x, 0);
2945 info->offset = XEXP (x, 1);
2946 if (allow_reg_index_p
2947 && aarch64_base_register_rtx_p (info->base, strict_p))
2948 {
2949 rtx sym, offs;
2950 split_const (info->offset, &sym, &offs);
2951 if (GET_CODE (sym) == SYMBOL_REF
2952 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2953 == SYMBOL_SMALL_ABSOLUTE))
2954 {
2955 /* The symbol and offset must be aligned to the access size. */
2956 unsigned int align;
2957 unsigned int ref_size;
2958
2959 if (CONSTANT_POOL_ADDRESS_P (sym))
2960 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2961 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2962 {
2963 tree exp = SYMBOL_REF_DECL (sym);
2964 align = TYPE_ALIGN (TREE_TYPE (exp));
2965 align = CONSTANT_ALIGNMENT (exp, align);
2966 }
2967 else if (SYMBOL_REF_DECL (sym))
2968 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2969 else
2970 align = BITS_PER_UNIT;
2971
2972 ref_size = GET_MODE_SIZE (mode);
2973 if (ref_size == 0)
2974 ref_size = GET_MODE_SIZE (DImode);
2975
2976 return ((INTVAL (offs) & (ref_size - 1)) == 0
2977 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2978 }
2979 }
2980 return false;
2981
2982 default:
2983 return false;
2984 }
2985}
2986
2987bool
2988aarch64_symbolic_address_p (rtx x)
2989{
2990 rtx offset;
2991
2992 split_const (x, &x, &offset);
2993 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2994}
2995
2996/* Classify the base of symbolic expression X, given that X appears in
2997 context CONTEXT. */
2998static enum aarch64_symbol_type
2999aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3000{
3001 rtx offset;
3002 split_const (x, &x, &offset);
3003 return aarch64_classify_symbol (x, context);
3004}
3005
3006
3007/* Return TRUE if X is a legitimate address for accessing memory in
3008 mode MODE. */
3009static bool
3010aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3011{
3012 struct aarch64_address_info addr;
3013
3014 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3015}
3016
3017/* Return TRUE if X is a legitimate address for accessing memory in
3018 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3019 pair operation. */
3020bool
3021aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3022 RTX_CODE outer_code, bool strict_p)
3023{
3024 struct aarch64_address_info addr;
3025
3026 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3027}
3028
3029/* Return TRUE if rtx X is immediate constant 0.0 */
3030bool
3520f7cc 3031aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3032{
3033 REAL_VALUE_TYPE r;
3034
3035 if (GET_MODE (x) == VOIDmode)
3036 return false;
3037
3038 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3039 if (REAL_VALUE_MINUS_ZERO (r))
3040 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3041 return REAL_VALUES_EQUAL (r, dconst0);
3042}
3043
70f09188
AP
3044/* Return the fixed registers used for condition codes. */
3045
3046static bool
3047aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3048{
3049 *p1 = CC_REGNUM;
3050 *p2 = INVALID_REGNUM;
3051 return true;
3052}
3053
43e9d192
IB
3054enum machine_mode
3055aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3056{
3057 /* All floating point compares return CCFP if it is an equality
3058 comparison, and CCFPE otherwise. */
3059 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3060 {
3061 switch (code)
3062 {
3063 case EQ:
3064 case NE:
3065 case UNORDERED:
3066 case ORDERED:
3067 case UNLT:
3068 case UNLE:
3069 case UNGT:
3070 case UNGE:
3071 case UNEQ:
3072 case LTGT:
3073 return CCFPmode;
3074
3075 case LT:
3076 case LE:
3077 case GT:
3078 case GE:
3079 return CCFPEmode;
3080
3081 default:
3082 gcc_unreachable ();
3083 }
3084 }
3085
3086 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3087 && y == const0_rtx
3088 && (code == EQ || code == NE || code == LT || code == GE)
3089 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS))
3090 return CC_NZmode;
3091
3092 /* A compare with a shifted operand. Because of canonicalization,
3093 the comparison will have to be swapped when we emit the assembly
3094 code. */
3095 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3096 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3097 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3098 || GET_CODE (x) == LSHIFTRT
3099 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3100 return CC_SWPmode;
3101
3102 /* A compare of a mode narrower than SI mode against zero can be done
3103 by extending the value in the comparison. */
3104 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3105 && y == const0_rtx)
3106 /* Only use sign-extension if we really need it. */
3107 return ((code == GT || code == GE || code == LE || code == LT)
3108 ? CC_SESWPmode : CC_ZESWPmode);
3109
3110 /* For everything else, return CCmode. */
3111 return CCmode;
3112}
3113
3114static unsigned
3115aarch64_get_condition_code (rtx x)
3116{
3117 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3118 enum rtx_code comp_code = GET_CODE (x);
3119
3120 if (GET_MODE_CLASS (mode) != MODE_CC)
3121 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3122
3123 switch (mode)
3124 {
3125 case CCFPmode:
3126 case CCFPEmode:
3127 switch (comp_code)
3128 {
3129 case GE: return AARCH64_GE;
3130 case GT: return AARCH64_GT;
3131 case LE: return AARCH64_LS;
3132 case LT: return AARCH64_MI;
3133 case NE: return AARCH64_NE;
3134 case EQ: return AARCH64_EQ;
3135 case ORDERED: return AARCH64_VC;
3136 case UNORDERED: return AARCH64_VS;
3137 case UNLT: return AARCH64_LT;
3138 case UNLE: return AARCH64_LE;
3139 case UNGT: return AARCH64_HI;
3140 case UNGE: return AARCH64_PL;
3141 default: gcc_unreachable ();
3142 }
3143 break;
3144
3145 case CCmode:
3146 switch (comp_code)
3147 {
3148 case NE: return AARCH64_NE;
3149 case EQ: return AARCH64_EQ;
3150 case GE: return AARCH64_GE;
3151 case GT: return AARCH64_GT;
3152 case LE: return AARCH64_LE;
3153 case LT: return AARCH64_LT;
3154 case GEU: return AARCH64_CS;
3155 case GTU: return AARCH64_HI;
3156 case LEU: return AARCH64_LS;
3157 case LTU: return AARCH64_CC;
3158 default: gcc_unreachable ();
3159 }
3160 break;
3161
3162 case CC_SWPmode:
3163 case CC_ZESWPmode:
3164 case CC_SESWPmode:
3165 switch (comp_code)
3166 {
3167 case NE: return AARCH64_NE;
3168 case EQ: return AARCH64_EQ;
3169 case GE: return AARCH64_LE;
3170 case GT: return AARCH64_LT;
3171 case LE: return AARCH64_GE;
3172 case LT: return AARCH64_GT;
3173 case GEU: return AARCH64_LS;
3174 case GTU: return AARCH64_CC;
3175 case LEU: return AARCH64_CS;
3176 case LTU: return AARCH64_HI;
3177 default: gcc_unreachable ();
3178 }
3179 break;
3180
3181 case CC_NZmode:
3182 switch (comp_code)
3183 {
3184 case NE: return AARCH64_NE;
3185 case EQ: return AARCH64_EQ;
3186 case GE: return AARCH64_PL;
3187 case LT: return AARCH64_MI;
3188 default: gcc_unreachable ();
3189 }
3190 break;
3191
3192 default:
3193 gcc_unreachable ();
3194 break;
3195 }
3196}
3197
3198static unsigned
3199bit_count (unsigned HOST_WIDE_INT value)
3200{
3201 unsigned count = 0;
3202
3203 while (value)
3204 {
3205 count++;
3206 value &= value - 1;
3207 }
3208
3209 return count;
3210}
3211
3212void
3213aarch64_print_operand (FILE *f, rtx x, char code)
3214{
3215 switch (code)
3216 {
3217 case 'e':
3218 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3219 {
3220 int n;
3221
3222 if (GET_CODE (x) != CONST_INT
3223 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3224 {
3225 output_operand_lossage ("invalid operand for '%%%c'", code);
3226 return;
3227 }
3228
3229 switch (n)
3230 {
3231 case 3:
3232 fputc ('b', f);
3233 break;
3234 case 4:
3235 fputc ('h', f);
3236 break;
3237 case 5:
3238 fputc ('w', f);
3239 break;
3240 default:
3241 output_operand_lossage ("invalid operand for '%%%c'", code);
3242 return;
3243 }
3244 }
3245 break;
3246
3247 case 'p':
3248 {
3249 int n;
3250
3251 /* Print N such that 2^N == X. */
3252 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3253 {
3254 output_operand_lossage ("invalid operand for '%%%c'", code);
3255 return;
3256 }
3257
3258 asm_fprintf (f, "%d", n);
3259 }
3260 break;
3261
3262 case 'P':
3263 /* Print the number of non-zero bits in X (a const_int). */
3264 if (GET_CODE (x) != CONST_INT)
3265 {
3266 output_operand_lossage ("invalid operand for '%%%c'", code);
3267 return;
3268 }
3269
3270 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3271 break;
3272
3273 case 'H':
3274 /* Print the higher numbered register of a pair (TImode) of regs. */
3275 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3276 {
3277 output_operand_lossage ("invalid operand for '%%%c'", code);
3278 return;
3279 }
3280
01a3a324 3281 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3282 break;
3283
3284 case 'Q':
3285 /* Print the least significant register of a pair (TImode) of regs. */
3286 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3287 {
3288 output_operand_lossage ("invalid operand for '%%%c'", code);
3289 return;
3290 }
01a3a324 3291 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
43e9d192
IB
3292 break;
3293
3294 case 'R':
3295 /* Print the most significant register of a pair (TImode) of regs. */
3296 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3297 {
3298 output_operand_lossage ("invalid operand for '%%%c'", code);
3299 return;
3300 }
01a3a324 3301 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
43e9d192
IB
3302 break;
3303
3304 case 'm':
3305 /* Print a condition (eq, ne, etc). */
3306
3307 /* CONST_TRUE_RTX means always -- that's the default. */
3308 if (x == const_true_rtx)
3309 return;
3310
3311 if (!COMPARISON_P (x))
3312 {
3313 output_operand_lossage ("invalid operand for '%%%c'", code);
3314 return;
3315 }
3316
3317 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3318 break;
3319
3320 case 'M':
3321 /* Print the inverse of a condition (eq <-> ne, etc). */
3322
3323 /* CONST_TRUE_RTX means never -- that's the default. */
3324 if (x == const_true_rtx)
3325 {
3326 fputs ("nv", f);
3327 return;
3328 }
3329
3330 if (!COMPARISON_P (x))
3331 {
3332 output_operand_lossage ("invalid operand for '%%%c'", code);
3333 return;
3334 }
3335
3336 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3337 (aarch64_get_condition_code (x))], f);
3338 break;
3339
3340 case 'b':
3341 case 'h':
3342 case 's':
3343 case 'd':
3344 case 'q':
3345 /* Print a scalar FP/SIMD register name. */
3346 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3347 {
3348 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3349 return;
3350 }
3351 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code, REGNO (x) - V0_REGNUM);
3352 break;
3353
3354 case 'S':
3355 case 'T':
3356 case 'U':
3357 case 'V':
3358 /* Print the first FP/SIMD register name in a list. */
3359 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3360 {
3361 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3362 return;
3363 }
3364 asm_fprintf (f, "%sv%d", REGISTER_PREFIX,
3365 REGNO (x) - V0_REGNUM + (code - 'S'));
3366 break;
3367
3368 case 'w':
3369 case 'x':
3370 /* Print a general register name or the zero register (32-bit or
3371 64-bit). */
3520f7cc
JG
3372 if (x == const0_rtx
3373 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192
IB
3374 {
3375 asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
3376 break;
3377 }
3378
3379 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3380 {
3381 asm_fprintf (f, "%s%c%d", REGISTER_PREFIX, code,
3382 REGNO (x) - R0_REGNUM);
3383 break;
3384 }
3385
3386 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3387 {
3388 asm_fprintf (f, "%s%ssp", REGISTER_PREFIX, code == 'w' ? "w" : "");
3389 break;
3390 }
3391
3392 /* Fall through */
3393
3394 case 0:
3395 /* Print a normal operand, if it's a general register, then we
3396 assume DImode. */
3397 if (x == NULL)
3398 {
3399 output_operand_lossage ("missing operand");
3400 return;
3401 }
3402
3403 switch (GET_CODE (x))
3404 {
3405 case REG:
01a3a324 3406 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3407 break;
3408
3409 case MEM:
3410 aarch64_memory_reference_mode = GET_MODE (x);
3411 output_address (XEXP (x, 0));
3412 break;
3413
3414 case LABEL_REF:
3415 case SYMBOL_REF:
3416 output_addr_const (asm_out_file, x);
3417 break;
3418
3419 case CONST_INT:
3420 asm_fprintf (f, "%wd", INTVAL (x));
3421 break;
3422
3423 case CONST_VECTOR:
3520f7cc
JG
3424 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3425 {
3426 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3427 HOST_WIDE_INT_MIN,
3428 HOST_WIDE_INT_MAX));
3429 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3430 }
3431 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3432 {
3433 fputc ('0', f);
3434 }
3435 else
3436 gcc_unreachable ();
43e9d192
IB
3437 break;
3438
3520f7cc
JG
3439 case CONST_DOUBLE:
3440 /* CONST_DOUBLE can represent a double-width integer.
3441 In this case, the mode of x is VOIDmode. */
3442 if (GET_MODE (x) == VOIDmode)
3443 ; /* Do Nothing. */
3444 else if (aarch64_float_const_zero_rtx_p (x))
3445 {
3446 fputc ('0', f);
3447 break;
3448 }
3449 else if (aarch64_float_const_representable_p (x))
3450 {
3451#define buf_size 20
3452 char float_buf[buf_size] = {'\0'};
3453 REAL_VALUE_TYPE r;
3454 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3455 real_to_decimal_for_mode (float_buf, &r,
3456 buf_size, buf_size,
3457 1, GET_MODE (x));
3458 asm_fprintf (asm_out_file, "%s", float_buf);
3459 break;
3460#undef buf_size
3461 }
3462 output_operand_lossage ("invalid constant");
3463 return;
43e9d192
IB
3464 default:
3465 output_operand_lossage ("invalid operand");
3466 return;
3467 }
3468 break;
3469
3470 case 'A':
3471 if (GET_CODE (x) == HIGH)
3472 x = XEXP (x, 0);
3473
3474 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3475 {
3476 case SYMBOL_SMALL_GOT:
3477 asm_fprintf (asm_out_file, ":got:");
3478 break;
3479
3480 case SYMBOL_SMALL_TLSGD:
3481 asm_fprintf (asm_out_file, ":tlsgd:");
3482 break;
3483
3484 case SYMBOL_SMALL_TLSDESC:
3485 asm_fprintf (asm_out_file, ":tlsdesc:");
3486 break;
3487
3488 case SYMBOL_SMALL_GOTTPREL:
3489 asm_fprintf (asm_out_file, ":gottprel:");
3490 break;
3491
3492 case SYMBOL_SMALL_TPREL:
3493 asm_fprintf (asm_out_file, ":tprel:");
3494 break;
3495
3496 default:
3497 break;
3498 }
3499 output_addr_const (asm_out_file, x);
3500 break;
3501
3502 case 'L':
3503 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3504 {
3505 case SYMBOL_SMALL_GOT:
3506 asm_fprintf (asm_out_file, ":lo12:");
3507 break;
3508
3509 case SYMBOL_SMALL_TLSGD:
3510 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3511 break;
3512
3513 case SYMBOL_SMALL_TLSDESC:
3514 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3515 break;
3516
3517 case SYMBOL_SMALL_GOTTPREL:
3518 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3519 break;
3520
3521 case SYMBOL_SMALL_TPREL:
3522 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3523 break;
3524
3525 default:
3526 break;
3527 }
3528 output_addr_const (asm_out_file, x);
3529 break;
3530
3531 case 'G':
3532
3533 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3534 {
3535 case SYMBOL_SMALL_TPREL:
3536 asm_fprintf (asm_out_file, ":tprel_hi12:");
3537 break;
3538 default:
3539 break;
3540 }
3541 output_addr_const (asm_out_file, x);
3542 break;
3543
3544 default:
3545 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3546 return;
3547 }
3548}
3549
3550void
3551aarch64_print_operand_address (FILE *f, rtx x)
3552{
3553 struct aarch64_address_info addr;
3554
3555 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3556 MEM, true))
3557 switch (addr.type)
3558 {
3559 case ADDRESS_REG_IMM:
3560 if (addr.offset == const0_rtx)
01a3a324 3561 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3562 else
01a3a324 3563 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3564 INTVAL (addr.offset));
3565 return;
3566
3567 case ADDRESS_REG_REG:
3568 if (addr.shift == 0)
01a3a324
N
3569 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3570 reg_names [REGNO (addr.offset)]);
43e9d192 3571 else
01a3a324
N
3572 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3573 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3574 return;
3575
3576 case ADDRESS_REG_UXTW:
3577 if (addr.shift == 0)
01a3a324 3578 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3579 REGNO (addr.offset) - R0_REGNUM);
3580 else
01a3a324 3581 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3582 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3583 return;
3584
3585 case ADDRESS_REG_SXTW:
3586 if (addr.shift == 0)
01a3a324 3587 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3588 REGNO (addr.offset) - R0_REGNUM);
3589 else
01a3a324 3590 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3591 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3592 return;
3593
3594 case ADDRESS_REG_WB:
3595 switch (GET_CODE (x))
3596 {
3597 case PRE_INC:
01a3a324 3598 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3599 GET_MODE_SIZE (aarch64_memory_reference_mode));
3600 return;
3601 case POST_INC:
01a3a324 3602 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3603 GET_MODE_SIZE (aarch64_memory_reference_mode));
3604 return;
3605 case PRE_DEC:
01a3a324 3606 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3607 GET_MODE_SIZE (aarch64_memory_reference_mode));
3608 return;
3609 case POST_DEC:
01a3a324 3610 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3611 GET_MODE_SIZE (aarch64_memory_reference_mode));
3612 return;
3613 case PRE_MODIFY:
01a3a324 3614 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3615 INTVAL (addr.offset));
3616 return;
3617 case POST_MODIFY:
01a3a324 3618 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3619 INTVAL (addr.offset));
3620 return;
3621 default:
3622 break;
3623 }
3624 break;
3625
3626 case ADDRESS_LO_SUM:
01a3a324 3627 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3628 output_addr_const (f, addr.offset);
3629 asm_fprintf (f, "]");
3630 return;
3631
3632 case ADDRESS_SYMBOLIC:
3633 break;
3634 }
3635
3636 output_addr_const (f, x);
3637}
3638
3639void
3640aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3641 int labelno ATTRIBUTE_UNUSED)
3642{
3643 sorry ("function profiling");
3644}
3645
3646bool
3647aarch64_label_mentioned_p (rtx x)
3648{
3649 const char *fmt;
3650 int i;
3651
3652 if (GET_CODE (x) == LABEL_REF)
3653 return true;
3654
3655 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3656 referencing instruction, but they are constant offsets, not
3657 symbols. */
3658 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3659 return false;
3660
3661 fmt = GET_RTX_FORMAT (GET_CODE (x));
3662 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3663 {
3664 if (fmt[i] == 'E')
3665 {
3666 int j;
3667
3668 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3669 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3670 return 1;
3671 }
3672 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3673 return 1;
3674 }
3675
3676 return 0;
3677}
3678
3679/* Implement REGNO_REG_CLASS. */
3680
3681enum reg_class
3682aarch64_regno_regclass (unsigned regno)
3683{
3684 if (GP_REGNUM_P (regno))
3685 return CORE_REGS;
3686
3687 if (regno == SP_REGNUM)
3688 return STACK_REG;
3689
3690 if (regno == FRAME_POINTER_REGNUM
3691 || regno == ARG_POINTER_REGNUM)
3692 return CORE_REGS;
3693
3694 if (FP_REGNUM_P (regno))
3695 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3696
3697 return NO_REGS;
3698}
3699
3700/* Try a machine-dependent way of reloading an illegitimate address
3701 operand. If we find one, push the reload and return the new rtx. */
3702
3703rtx
3704aarch64_legitimize_reload_address (rtx *x_p,
3705 enum machine_mode mode,
3706 int opnum, int type,
3707 int ind_levels ATTRIBUTE_UNUSED)
3708{
3709 rtx x = *x_p;
3710
3711 /* Do not allow mem (plus (reg, const)) if vector mode. */
3712 if (aarch64_vector_mode_p (mode)
3713 && GET_CODE (x) == PLUS
3714 && REG_P (XEXP (x, 0))
3715 && CONST_INT_P (XEXP (x, 1)))
3716 {
3717 rtx orig_rtx = x;
3718 x = copy_rtx (x);
3719 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3720 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3721 opnum, (enum reload_type) type);
3722 return x;
3723 }
3724
3725 /* We must recognize output that we have already generated ourselves. */
3726 if (GET_CODE (x) == PLUS
3727 && GET_CODE (XEXP (x, 0)) == PLUS
3728 && REG_P (XEXP (XEXP (x, 0), 0))
3729 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3730 && CONST_INT_P (XEXP (x, 1)))
3731 {
3732 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3733 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3734 opnum, (enum reload_type) type);
3735 return x;
3736 }
3737
3738 /* We wish to handle large displacements off a base register by splitting
3739 the addend across an add and the mem insn. This can cut the number of
3740 extra insns needed from 3 to 1. It is only useful for load/store of a
3741 single register with 12 bit offset field. */
3742 if (GET_CODE (x) == PLUS
3743 && REG_P (XEXP (x, 0))
3744 && CONST_INT_P (XEXP (x, 1))
3745 && HARD_REGISTER_P (XEXP (x, 0))
3746 && mode != TImode
3747 && mode != TFmode
3748 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3749 {
3750 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3751 HOST_WIDE_INT low = val & 0xfff;
3752 HOST_WIDE_INT high = val - low;
3753 HOST_WIDE_INT offs;
3754 rtx cst;
3755
3756 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3757 BLKmode alignment. */
3758 if (GET_MODE_SIZE (mode) == 0)
3759 return NULL_RTX;
3760
3761 offs = low % GET_MODE_SIZE (mode);
3762
3763 /* Align misaligned offset by adjusting high part to compensate. */
3764 if (offs != 0)
3765 {
3766 if (aarch64_uimm12_shift (high + offs))
3767 {
3768 /* Align down. */
3769 low = low - offs;
3770 high = high + offs;
3771 }
3772 else
3773 {
3774 /* Align up. */
3775 offs = GET_MODE_SIZE (mode) - offs;
3776 low = low + offs;
3777 high = high + (low & 0x1000) - offs;
3778 low &= 0xfff;
3779 }
3780 }
3781
3782 /* Check for overflow. */
3783 if (high + low != val)
3784 return NULL_RTX;
3785
3786 cst = GEN_INT (high);
3787 if (!aarch64_uimm12_shift (high))
3788 cst = force_const_mem (Pmode, cst);
3789
3790 /* Reload high part into base reg, leaving the low part
3791 in the mem instruction. */
3792 x = gen_rtx_PLUS (Pmode,
3793 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3794 GEN_INT (low));
3795
3796 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3797 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3798 opnum, (enum reload_type) type);
3799 return x;
3800 }
3801
3802 return NULL_RTX;
3803}
3804
3805
3806static reg_class_t
3807aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3808 reg_class_t rclass,
3809 enum machine_mode mode,
3810 secondary_reload_info *sri)
3811{
3812 /* Address expressions of the form PLUS (SP, large_offset) need two
3813 scratch registers, one for the constant, and one for holding a
3814 copy of SP, since SP cannot be used on the RHS of an add-reg
3815 instruction. */
3816 if (mode == DImode
3817 && GET_CODE (x) == PLUS
3818 && XEXP (x, 0) == stack_pointer_rtx
3819 && CONST_INT_P (XEXP (x, 1))
3820 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3821 {
3822 sri->icode = CODE_FOR_reload_sp_immediate;
3823 return NO_REGS;
3824 }
3825
3826 /* Without the TARGET_SIMD instructions we cannot move a Q register
3827 to a Q register directly. We need a scratch. */
3828 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3829 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3830 && reg_class_subset_p (rclass, FP_REGS))
3831 {
3832 if (mode == TFmode)
3833 sri->icode = CODE_FOR_aarch64_reload_movtf;
3834 else if (mode == TImode)
3835 sri->icode = CODE_FOR_aarch64_reload_movti;
3836 return NO_REGS;
3837 }
3838
3839 /* A TFmode or TImode memory access should be handled via an FP_REGS
3840 because AArch64 has richer addressing modes for LDR/STR instructions
3841 than LDP/STP instructions. */
3842 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3843 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3844 return FP_REGS;
3845
3846 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3847 return CORE_REGS;
3848
3849 return NO_REGS;
3850}
3851
3852static bool
3853aarch64_can_eliminate (const int from, const int to)
3854{
3855 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3856 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3857
3858 if (frame_pointer_needed)
3859 {
3860 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3861 return true;
3862 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3863 return false;
3864 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3865 && !cfun->calls_alloca)
3866 return true;
3867 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3868 return true;
3869 return false;
3870 }
3871 else
3872 {
3873 /* If we decided that we didn't need a frame pointer but then used
3874 LR in the function, then we do need a frame pointer after all, so
3875 prevent this elimination to ensure a frame pointer is used. */
3876
3877 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3878 && df_regs_ever_live_p (LR_REGNUM))
3879 return false;
3880 }
3881 return true;
3882}
3883
3884HOST_WIDE_INT
3885aarch64_initial_elimination_offset (unsigned from, unsigned to)
3886{
3887 HOST_WIDE_INT frame_size;
3888 HOST_WIDE_INT offset;
3889
3890 aarch64_layout_frame ();
3891 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3892 + crtl->outgoing_args_size
3893 + cfun->machine->saved_varargs_size);
3894
3895 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3896 offset = frame_size;
3897
3898 if (to == HARD_FRAME_POINTER_REGNUM)
3899 {
3900 if (from == ARG_POINTER_REGNUM)
3901 return offset - crtl->outgoing_args_size;
3902
3903 if (from == FRAME_POINTER_REGNUM)
3904 return cfun->machine->frame.saved_regs_size;
3905 }
3906
3907 if (to == STACK_POINTER_REGNUM)
3908 {
3909 if (from == FRAME_POINTER_REGNUM)
3910 {
3911 HOST_WIDE_INT elim = crtl->outgoing_args_size
3912 + cfun->machine->frame.saved_regs_size
3913 - cfun->machine->frame.fp_lr_offset;
3914 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3915 return elim;
3916 }
3917 }
3918
3919 return offset;
3920}
3921
3922
3923/* Implement RETURN_ADDR_RTX. We do not support moving back to a
3924 previous frame. */
3925
3926rtx
3927aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3928{
3929 if (count != 0)
3930 return const0_rtx;
3931 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3932}
3933
3934
3935static void
3936aarch64_asm_trampoline_template (FILE *f)
3937{
01a3a324
N
3938 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3939 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3940 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192
IB
3941 assemble_aligned_integer (4, const0_rtx);
3942 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3943 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3944}
3945
3946unsigned
3947aarch64_trampoline_size (void)
3948{
3949 return 32; /* 3 insns + padding + 2 dwords. */
3950}
3951
3952static void
3953aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3954{
3955 rtx fnaddr, mem, a_tramp;
3956
3957 /* Don't need to copy the trailing D-words, we fill those in below. */
3958 emit_block_move (m_tramp, assemble_trampoline_template (),
3959 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3960 mem = adjust_address (m_tramp, DImode, 16);
3961 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3962 emit_move_insn (mem, fnaddr);
3963
3964 mem = adjust_address (m_tramp, DImode, 24);
3965 emit_move_insn (mem, chain_value);
3966
3967 /* XXX We should really define a "clear_cache" pattern and use
3968 gen_clear_cache(). */
3969 a_tramp = XEXP (m_tramp, 0);
3970 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3971 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3972 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3973}
3974
3975static unsigned char
3976aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3977{
3978 switch (regclass)
3979 {
3980 case CORE_REGS:
3981 case POINTER_REGS:
3982 case GENERAL_REGS:
3983 case ALL_REGS:
3984 case FP_REGS:
3985 case FP_LO_REGS:
3986 return
3987 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
3988 (GET_MODE_SIZE (mode) + 7) / 8;
3989 case STACK_REG:
3990 return 1;
3991
3992 case NO_REGS:
3993 return 0;
3994
3995 default:
3996 break;
3997 }
3998 gcc_unreachable ();
3999}
4000
4001static reg_class_t
4002aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4003{
4004 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4005 ? GENERAL_REGS : regclass);
4006}
4007
4008void
4009aarch64_asm_output_labelref (FILE* f, const char *name)
4010{
4011 asm_fprintf (f, "%U%s", name);
4012}
4013
4014static void
4015aarch64_elf_asm_constructor (rtx symbol, int priority)
4016{
4017 if (priority == DEFAULT_INIT_PRIORITY)
4018 default_ctor_section_asm_out_constructor (symbol, priority);
4019 else
4020 {
4021 section *s;
4022 char buf[18];
4023 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4024 s = get_section (buf, SECTION_WRITE, NULL);
4025 switch_to_section (s);
4026 assemble_align (POINTER_SIZE);
4027 fputs ("\t.dword\t", asm_out_file);
4028 output_addr_const (asm_out_file, symbol);
4029 fputc ('\n', asm_out_file);
4030 }
4031}
4032
4033static void
4034aarch64_elf_asm_destructor (rtx symbol, int priority)
4035{
4036 if (priority == DEFAULT_INIT_PRIORITY)
4037 default_dtor_section_asm_out_destructor (symbol, priority);
4038 else
4039 {
4040 section *s;
4041 char buf[18];
4042 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4043 s = get_section (buf, SECTION_WRITE, NULL);
4044 switch_to_section (s);
4045 assemble_align (POINTER_SIZE);
4046 fputs ("\t.dword\t", asm_out_file);
4047 output_addr_const (asm_out_file, symbol);
4048 fputc ('\n', asm_out_file);
4049 }
4050}
4051
4052const char*
4053aarch64_output_casesi (rtx *operands)
4054{
4055 char buf[100];
4056 char label[100];
4057 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4058 int index;
4059 static const char *const patterns[4][2] =
4060 {
4061 {
4062 "ldrb\t%w3, [%0,%w1,uxtw]",
4063 "add\t%3, %4, %w3, sxtb #2"
4064 },
4065 {
4066 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4067 "add\t%3, %4, %w3, sxth #2"
4068 },
4069 {
4070 "ldr\t%w3, [%0,%w1,uxtw #2]",
4071 "add\t%3, %4, %w3, sxtw #2"
4072 },
4073 /* We assume that DImode is only generated when not optimizing and
4074 that we don't really need 64-bit address offsets. That would
4075 imply an object file with 8GB of code in a single function! */
4076 {
4077 "ldr\t%w3, [%0,%w1,uxtw #2]",
4078 "add\t%3, %4, %w3, sxtw #2"
4079 }
4080 };
4081
4082 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4083
4084 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4085
4086 gcc_assert (index >= 0 && index <= 3);
4087
4088 /* Need to implement table size reduction, by chaning the code below. */
4089 output_asm_insn (patterns[index][0], operands);
4090 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4091 snprintf (buf, sizeof (buf),
4092 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4093 output_asm_insn (buf, operands);
4094 output_asm_insn (patterns[index][1], operands);
4095 output_asm_insn ("br\t%3", operands);
4096 assemble_label (asm_out_file, label);
4097 return "";
4098}
4099
4100
4101/* Return size in bits of an arithmetic operand which is shifted/scaled and
4102 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4103 operator. */
4104
4105int
4106aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4107{
4108 if (shift >= 0 && shift <= 3)
4109 {
4110 int size;
4111 for (size = 8; size <= 32; size *= 2)
4112 {
4113 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4114 if (mask == bits << shift)
4115 return size;
4116 }
4117 }
4118 return 0;
4119}
4120
4121static bool
4122aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4123 const_rtx x ATTRIBUTE_UNUSED)
4124{
4125 /* We can't use blocks for constants when we're using a per-function
4126 constant pool. */
4127 return false;
4128}
4129
4130static section *
4131aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4132 rtx x ATTRIBUTE_UNUSED,
4133 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4134{
4135 /* Force all constant pool entries into the current function section. */
4136 return function_section (current_function_decl);
4137}
4138
4139
4140/* Costs. */
4141
4142/* Helper function for rtx cost calculation. Strip a shift expression
4143 from X. Returns the inner operand if successful, or the original
4144 expression on failure. */
4145static rtx
4146aarch64_strip_shift (rtx x)
4147{
4148 rtx op = x;
4149
4150 if ((GET_CODE (op) == ASHIFT
4151 || GET_CODE (op) == ASHIFTRT
4152 || GET_CODE (op) == LSHIFTRT)
4153 && CONST_INT_P (XEXP (op, 1)))
4154 return XEXP (op, 0);
4155
4156 if (GET_CODE (op) == MULT
4157 && CONST_INT_P (XEXP (op, 1))
4158 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4159 return XEXP (op, 0);
4160
4161 return x;
4162}
4163
4164/* Helper function for rtx cost calculation. Strip a shift or extend
4165 expression from X. Returns the inner operand if successful, or the
4166 original expression on failure. We deal with a number of possible
4167 canonicalization variations here. */
4168static rtx
4169aarch64_strip_shift_or_extend (rtx x)
4170{
4171 rtx op = x;
4172
4173 /* Zero and sign extraction of a widened value. */
4174 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4175 && XEXP (op, 2) == const0_rtx
4176 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4177 XEXP (op, 1)))
4178 return XEXP (XEXP (op, 0), 0);
4179
4180 /* It can also be represented (for zero-extend) as an AND with an
4181 immediate. */
4182 if (GET_CODE (op) == AND
4183 && GET_CODE (XEXP (op, 0)) == MULT
4184 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4185 && CONST_INT_P (XEXP (op, 1))
4186 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4187 INTVAL (XEXP (op, 1))) != 0)
4188 return XEXP (XEXP (op, 0), 0);
4189
4190 /* Now handle extended register, as this may also have an optional
4191 left shift by 1..4. */
4192 if (GET_CODE (op) == ASHIFT
4193 && CONST_INT_P (XEXP (op, 1))
4194 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4195 op = XEXP (op, 0);
4196
4197 if (GET_CODE (op) == ZERO_EXTEND
4198 || GET_CODE (op) == SIGN_EXTEND)
4199 op = XEXP (op, 0);
4200
4201 if (op != x)
4202 return op;
4203
4204 return aarch64_strip_shift (x);
4205}
4206
4207/* Calculate the cost of calculating X, storing it in *COST. Result
4208 is true if the total cost of the operation has now been calculated. */
4209static bool
4210aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4211 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4212{
4213 rtx op0, op1;
4214 const struct cpu_rtx_cost_table *extra_cost
4215 = aarch64_tune_params->insn_extra_cost;
4216
4217 switch (code)
4218 {
4219 case SET:
4220 op0 = SET_DEST (x);
4221 op1 = SET_SRC (x);
4222
4223 switch (GET_CODE (op0))
4224 {
4225 case MEM:
4226 if (speed)
4227 *cost += extra_cost->memory_store;
4228
4229 if (op1 != const0_rtx)
4230 *cost += rtx_cost (op1, SET, 1, speed);
4231 return true;
4232
4233 case SUBREG:
4234 if (! REG_P (SUBREG_REG (op0)))
4235 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4236 /* Fall through. */
4237 case REG:
4238 /* Cost is just the cost of the RHS of the set. */
4239 *cost += rtx_cost (op1, SET, 1, true);
4240 return true;
4241
4242 case ZERO_EXTRACT: /* Bit-field insertion. */
4243 case SIGN_EXTRACT:
4244 /* Strip any redundant widening of the RHS to meet the width of
4245 the target. */
4246 if (GET_CODE (op1) == SUBREG)
4247 op1 = SUBREG_REG (op1);
4248 if ((GET_CODE (op1) == ZERO_EXTEND
4249 || GET_CODE (op1) == SIGN_EXTEND)
4250 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4251 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4252 >= INTVAL (XEXP (op0, 1))))
4253 op1 = XEXP (op1, 0);
4254 *cost += rtx_cost (op1, SET, 1, speed);
4255 return true;
4256
4257 default:
4258 break;
4259 }
4260 return false;
4261
4262 case MEM:
4263 if (speed)
4264 *cost += extra_cost->memory_load;
4265
4266 return true;
4267
4268 case NEG:
4269 op0 = CONST0_RTX (GET_MODE (x));
4270 op1 = XEXP (x, 0);
4271 goto cost_minus;
4272
4273 case COMPARE:
4274 op0 = XEXP (x, 0);
4275 op1 = XEXP (x, 1);
4276
4277 if (op1 == const0_rtx
4278 && GET_CODE (op0) == AND)
4279 {
4280 x = op0;
4281 goto cost_logic;
4282 }
4283
4284 /* Comparisons can work if the order is swapped.
4285 Canonicalization puts the more complex operation first, but
4286 we want it in op1. */
4287 if (! (REG_P (op0)
4288 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4289 {
4290 op0 = XEXP (x, 1);
4291 op1 = XEXP (x, 0);
4292 }
4293 goto cost_minus;
4294
4295 case MINUS:
4296 op0 = XEXP (x, 0);
4297 op1 = XEXP (x, 1);
4298
4299 cost_minus:
4300 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4301 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4302 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4303 {
4304 if (op0 != const0_rtx)
4305 *cost += rtx_cost (op0, MINUS, 0, speed);
4306
4307 if (CONST_INT_P (op1))
4308 {
4309 if (!aarch64_uimm12_shift (INTVAL (op1)))
4310 *cost += rtx_cost (op1, MINUS, 1, speed);
4311 }
4312 else
4313 {
4314 op1 = aarch64_strip_shift_or_extend (op1);
4315 *cost += rtx_cost (op1, MINUS, 1, speed);
4316 }
4317 return true;
4318 }
4319
4320 return false;
4321
4322 case PLUS:
4323 op0 = XEXP (x, 0);
4324 op1 = XEXP (x, 1);
4325
4326 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4327 {
4328 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4329 {
4330 *cost += rtx_cost (op0, PLUS, 0, speed);
4331 }
4332 else
4333 {
4334 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4335
4336 if (new_op0 == op0
4337 && GET_CODE (op0) == MULT)
4338 {
4339 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4340 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4341 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4342 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4343 {
4344 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4345 speed)
4346 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4347 speed)
4348 + rtx_cost (op1, PLUS, 1, speed));
4349 if (speed)
4350 *cost += extra_cost->int_multiply_extend_add;
4351 return true;
4352 }
4353 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4354 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4355 + rtx_cost (op1, PLUS, 1, speed));
4356
4357 if (speed)
4358 *cost += extra_cost->int_multiply_add;
4359 }
4360
4361 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4362 + rtx_cost (op1, PLUS, 1, speed));
4363 }
4364 return true;
4365 }
4366
4367 return false;
4368
4369 case IOR:
4370 case XOR:
4371 case AND:
4372 cost_logic:
4373 op0 = XEXP (x, 0);
4374 op1 = XEXP (x, 1);
4375
4376 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4377 {
4378 if (CONST_INT_P (op1)
4379 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4380 {
4381 *cost += rtx_cost (op0, AND, 0, speed);
4382 }
4383 else
4384 {
4385 if (GET_CODE (op0) == NOT)
4386 op0 = XEXP (op0, 0);
4387 op0 = aarch64_strip_shift (op0);
4388 *cost += (rtx_cost (op0, AND, 0, speed)
4389 + rtx_cost (op1, AND, 1, speed));
4390 }
4391 return true;
4392 }
4393 return false;
4394
4395 case ZERO_EXTEND:
4396 if ((GET_MODE (x) == DImode
4397 && GET_MODE (XEXP (x, 0)) == SImode)
4398 || GET_CODE (XEXP (x, 0)) == MEM)
4399 {
4400 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4401 return true;
4402 }
4403 return false;
4404
4405 case SIGN_EXTEND:
4406 if (GET_CODE (XEXP (x, 0)) == MEM)
4407 {
4408 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4409 return true;
4410 }
4411 return false;
4412
4413 case ROTATE:
4414 if (!CONST_INT_P (XEXP (x, 1)))
4415 *cost += COSTS_N_INSNS (2);
4416 /* Fall through. */
4417 case ROTATERT:
4418 case LSHIFTRT:
4419 case ASHIFT:
4420 case ASHIFTRT:
4421
4422 /* Shifting by a register often takes an extra cycle. */
4423 if (speed && !CONST_INT_P (XEXP (x, 1)))
4424 *cost += extra_cost->register_shift;
4425
4426 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4427 return true;
4428
4429 case HIGH:
4430 if (!CONSTANT_P (XEXP (x, 0)))
4431 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4432 return true;
4433
4434 case LO_SUM:
4435 if (!CONSTANT_P (XEXP (x, 1)))
4436 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4437 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4438 return true;
4439
4440 case ZERO_EXTRACT:
4441 case SIGN_EXTRACT:
4442 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4443 return true;
4444
4445 case MULT:
4446 op0 = XEXP (x, 0);
4447 op1 = XEXP (x, 1);
4448
4449 *cost = COSTS_N_INSNS (1);
4450 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4451 {
4452 if (CONST_INT_P (op1)
4453 && exact_log2 (INTVAL (op1)) > 0)
4454 {
4455 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4456 return true;
4457 }
4458
4459 if ((GET_CODE (op0) == ZERO_EXTEND
4460 && GET_CODE (op1) == ZERO_EXTEND)
4461 || (GET_CODE (op0) == SIGN_EXTEND
4462 && GET_CODE (op1) == SIGN_EXTEND))
4463 {
4464 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4465 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4466 if (speed)
4467 *cost += extra_cost->int_multiply_extend;
4468 return true;
4469 }
4470
4471 if (speed)
4472 *cost += extra_cost->int_multiply;
4473 }
4474 else if (speed)
4475 {
4476 if (GET_MODE (x) == DFmode)
4477 *cost += extra_cost->double_multiply;
4478 else if (GET_MODE (x) == SFmode)
4479 *cost += extra_cost->float_multiply;
4480 }
4481
4482 return false; /* All arguments need to be in registers. */
4483
4484 case MOD:
4485 case UMOD:
4486 *cost = COSTS_N_INSNS (2);
4487 if (speed)
4488 {
4489 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4490 *cost += (extra_cost->int_multiply_add
4491 + extra_cost->int_divide);
4492 else if (GET_MODE (x) == DFmode)
4493 *cost += (extra_cost->double_multiply
4494 + extra_cost->double_divide);
4495 else if (GET_MODE (x) == SFmode)
4496 *cost += (extra_cost->float_multiply
4497 + extra_cost->float_divide);
4498 }
4499 return false; /* All arguments need to be in registers. */
4500
4501 case DIV:
4502 case UDIV:
4503 *cost = COSTS_N_INSNS (1);
4504 if (speed)
4505 {
4506 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4507 *cost += extra_cost->int_divide;
4508 else if (GET_MODE (x) == DFmode)
4509 *cost += extra_cost->double_divide;
4510 else if (GET_MODE (x) == SFmode)
4511 *cost += extra_cost->float_divide;
4512 }
4513 return false; /* All arguments need to be in registers. */
4514
4515 default:
4516 break;
4517 }
4518 return false;
4519}
4520
4521static int
4522aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4523 enum machine_mode mode ATTRIBUTE_UNUSED,
4524 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4525{
4526 enum rtx_code c = GET_CODE (x);
4527 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4528
4529 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4530 return addr_cost->pre_modify;
4531
4532 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4533 return addr_cost->post_modify;
4534
4535 if (c == PLUS)
4536 {
4537 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4538 return addr_cost->imm_offset;
4539 else if (GET_CODE (XEXP (x, 0)) == MULT
4540 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4541 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4542 return addr_cost->register_extend;
4543
4544 return addr_cost->register_offset;
4545 }
4546 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4547 return addr_cost->imm_offset;
4548
4549 return 0;
4550}
4551
4552static int
4553aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4554 reg_class_t from, reg_class_t to)
4555{
4556 const struct cpu_regmove_cost *regmove_cost
4557 = aarch64_tune_params->regmove_cost;
4558
4559 if (from == GENERAL_REGS && to == GENERAL_REGS)
4560 return regmove_cost->GP2GP;
4561 else if (from == GENERAL_REGS)
4562 return regmove_cost->GP2FP;
4563 else if (to == GENERAL_REGS)
4564 return regmove_cost->FP2GP;
4565
4566 /* When AdvSIMD instructions are disabled it is not possible to move
4567 a 128-bit value directly between Q registers. This is handled in
4568 secondary reload. A general register is used as a scratch to move
4569 the upper DI value and the lower DI value is moved directly,
4570 hence the cost is the sum of three moves. */
4571
4572 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4573 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4574
4575 return regmove_cost->FP2FP;
4576}
4577
4578static int
4579aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4580 reg_class_t rclass ATTRIBUTE_UNUSED,
4581 bool in ATTRIBUTE_UNUSED)
4582{
4583 return aarch64_tune_params->memmov_cost;
4584}
4585
4586static void initialize_aarch64_code_model (void);
4587
4588/* Parse the architecture extension string. */
4589
4590static void
4591aarch64_parse_extension (char *str)
4592{
4593 /* The extension string is parsed left to right. */
4594 const struct aarch64_option_extension *opt = NULL;
4595
4596 /* Flag to say whether we are adding or removing an extension. */
4597 int adding_ext = -1;
4598
4599 while (str != NULL && *str != 0)
4600 {
4601 char *ext;
4602 size_t len;
4603
4604 str++;
4605 ext = strchr (str, '+');
4606
4607 if (ext != NULL)
4608 len = ext - str;
4609 else
4610 len = strlen (str);
4611
4612 if (len >= 2 && strncmp (str, "no", 2) == 0)
4613 {
4614 adding_ext = 0;
4615 len -= 2;
4616 str += 2;
4617 }
4618 else if (len > 0)
4619 adding_ext = 1;
4620
4621 if (len == 0)
4622 {
4623 error ("missing feature modifier after %qs", "+no");
4624 return;
4625 }
4626
4627 /* Scan over the extensions table trying to find an exact match. */
4628 for (opt = all_extensions; opt->name != NULL; opt++)
4629 {
4630 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4631 {
4632 /* Add or remove the extension. */
4633 if (adding_ext)
4634 aarch64_isa_flags |= opt->flags_on;
4635 else
4636 aarch64_isa_flags &= ~(opt->flags_off);
4637 break;
4638 }
4639 }
4640
4641 if (opt->name == NULL)
4642 {
4643 /* Extension not found in list. */
4644 error ("unknown feature modifier %qs", str);
4645 return;
4646 }
4647
4648 str = ext;
4649 };
4650
4651 return;
4652}
4653
4654/* Parse the ARCH string. */
4655
4656static void
4657aarch64_parse_arch (void)
4658{
4659 char *ext;
4660 const struct processor *arch;
4661 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4662 size_t len;
4663
4664 strcpy (str, aarch64_arch_string);
4665
4666 ext = strchr (str, '+');
4667
4668 if (ext != NULL)
4669 len = ext - str;
4670 else
4671 len = strlen (str);
4672
4673 if (len == 0)
4674 {
4675 error ("missing arch name in -march=%qs", str);
4676 return;
4677 }
4678
4679 /* Loop through the list of supported ARCHs to find a match. */
4680 for (arch = all_architectures; arch->name != NULL; arch++)
4681 {
4682 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4683 {
4684 selected_arch = arch;
4685 aarch64_isa_flags = selected_arch->flags;
4686 selected_cpu = &all_cores[selected_arch->core];
4687
4688 if (ext != NULL)
4689 {
4690 /* ARCH string contains at least one extension. */
4691 aarch64_parse_extension (ext);
4692 }
4693
4694 return;
4695 }
4696 }
4697
4698 /* ARCH name not found in list. */
4699 error ("unknown value %qs for -march", str);
4700 return;
4701}
4702
4703/* Parse the CPU string. */
4704
4705static void
4706aarch64_parse_cpu (void)
4707{
4708 char *ext;
4709 const struct processor *cpu;
4710 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4711 size_t len;
4712
4713 strcpy (str, aarch64_cpu_string);
4714
4715 ext = strchr (str, '+');
4716
4717 if (ext != NULL)
4718 len = ext - str;
4719 else
4720 len = strlen (str);
4721
4722 if (len == 0)
4723 {
4724 error ("missing cpu name in -mcpu=%qs", str);
4725 return;
4726 }
4727
4728 /* Loop through the list of supported CPUs to find a match. */
4729 for (cpu = all_cores; cpu->name != NULL; cpu++)
4730 {
4731 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4732 {
4733 selected_cpu = cpu;
4734 aarch64_isa_flags = selected_cpu->flags;
4735
4736 if (ext != NULL)
4737 {
4738 /* CPU string contains at least one extension. */
4739 aarch64_parse_extension (ext);
4740 }
4741
4742 return;
4743 }
4744 }
4745
4746 /* CPU name not found in list. */
4747 error ("unknown value %qs for -mcpu", str);
4748 return;
4749}
4750
4751/* Parse the TUNE string. */
4752
4753static void
4754aarch64_parse_tune (void)
4755{
4756 const struct processor *cpu;
4757 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4758 strcpy (str, aarch64_tune_string);
4759
4760 /* Loop through the list of supported CPUs to find a match. */
4761 for (cpu = all_cores; cpu->name != NULL; cpu++)
4762 {
4763 if (strcmp (cpu->name, str) == 0)
4764 {
4765 selected_tune = cpu;
4766 return;
4767 }
4768 }
4769
4770 /* CPU name not found in list. */
4771 error ("unknown value %qs for -mtune", str);
4772 return;
4773}
4774
4775
4776/* Implement TARGET_OPTION_OVERRIDE. */
4777
4778static void
4779aarch64_override_options (void)
4780{
4781 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4782 otherwise march remains undefined. mtune can be used with either march or
4783 mcpu. */
4784
4785 if (aarch64_arch_string)
4786 {
4787 aarch64_parse_arch ();
4788 aarch64_cpu_string = NULL;
4789 }
4790
4791 if (aarch64_cpu_string)
4792 {
4793 aarch64_parse_cpu ();
4794 selected_arch = NULL;
4795 }
4796
4797 if (aarch64_tune_string)
4798 {
4799 aarch64_parse_tune ();
4800 }
4801
4802 initialize_aarch64_code_model ();
4803
4804 aarch64_build_bitmask_table ();
4805
4806 /* This target defaults to strict volatile bitfields. */
4807 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4808 flag_strict_volatile_bitfields = 1;
4809
4810 /* If the user did not specify a processor, choose the default
4811 one for them. This will be the CPU set during configuration using
4812 --with-cpu, otherwise it is "generic". */
4813 if (!selected_cpu)
4814 {
4815 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4816 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4817 }
4818
4819 gcc_assert (selected_cpu);
4820
4821 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4822 if (!selected_tune)
4823 selected_tune = &all_cores[selected_cpu->core];
4824
4825 aarch64_tune_flags = selected_tune->flags;
4826 aarch64_tune = selected_tune->core;
4827 aarch64_tune_params = selected_tune->tune;
4828
4829 aarch64_override_options_after_change ();
4830}
4831
4832/* Implement targetm.override_options_after_change. */
4833
4834static void
4835aarch64_override_options_after_change (void)
4836{
4837 faked_omit_frame_pointer = false;
4838
4839 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4840 that aarch64_frame_pointer_required will be called. We need to remember
4841 whether flag_omit_frame_pointer was turned on normally or just faked. */
4842
4843 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4844 {
4845 flag_omit_frame_pointer = true;
4846 faked_omit_frame_pointer = true;
4847 }
4848}
4849
4850static struct machine_function *
4851aarch64_init_machine_status (void)
4852{
4853 struct machine_function *machine;
4854 machine = ggc_alloc_cleared_machine_function ();
4855 return machine;
4856}
4857
4858void
4859aarch64_init_expanders (void)
4860{
4861 init_machine_status = aarch64_init_machine_status;
4862}
4863
4864/* A checking mechanism for the implementation of the various code models. */
4865static void
4866initialize_aarch64_code_model (void)
4867{
4868 if (flag_pic)
4869 {
4870 switch (aarch64_cmodel_var)
4871 {
4872 case AARCH64_CMODEL_TINY:
4873 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4874 break;
4875 case AARCH64_CMODEL_SMALL:
4876 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4877 break;
4878 case AARCH64_CMODEL_LARGE:
4879 sorry ("code model %qs with -f%s", "large",
4880 flag_pic > 1 ? "PIC" : "pic");
4881 default:
4882 gcc_unreachable ();
4883 }
4884 }
4885 else
4886 aarch64_cmodel = aarch64_cmodel_var;
4887}
4888
4889/* Return true if SYMBOL_REF X binds locally. */
4890
4891static bool
4892aarch64_symbol_binds_local_p (const_rtx x)
4893{
4894 return (SYMBOL_REF_DECL (x)
4895 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4896 : SYMBOL_REF_LOCAL_P (x));
4897}
4898
4899/* Return true if SYMBOL_REF X is thread local */
4900static bool
4901aarch64_tls_symbol_p (rtx x)
4902{
4903 if (! TARGET_HAVE_TLS)
4904 return false;
4905
4906 if (GET_CODE (x) != SYMBOL_REF)
4907 return false;
4908
4909 return SYMBOL_REF_TLS_MODEL (x) != 0;
4910}
4911
4912/* Classify a TLS symbol into one of the TLS kinds. */
4913enum aarch64_symbol_type
4914aarch64_classify_tls_symbol (rtx x)
4915{
4916 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4917
4918 switch (tls_kind)
4919 {
4920 case TLS_MODEL_GLOBAL_DYNAMIC:
4921 case TLS_MODEL_LOCAL_DYNAMIC:
4922 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4923
4924 case TLS_MODEL_INITIAL_EXEC:
4925 return SYMBOL_SMALL_GOTTPREL;
4926
4927 case TLS_MODEL_LOCAL_EXEC:
4928 return SYMBOL_SMALL_TPREL;
4929
4930 case TLS_MODEL_EMULATED:
4931 case TLS_MODEL_NONE:
4932 return SYMBOL_FORCE_TO_MEM;
4933
4934 default:
4935 gcc_unreachable ();
4936 }
4937}
4938
4939/* Return the method that should be used to access SYMBOL_REF or
4940 LABEL_REF X in context CONTEXT. */
4941enum aarch64_symbol_type
4942aarch64_classify_symbol (rtx x,
4943 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4944{
4945 if (GET_CODE (x) == LABEL_REF)
4946 {
4947 switch (aarch64_cmodel)
4948 {
4949 case AARCH64_CMODEL_LARGE:
4950 return SYMBOL_FORCE_TO_MEM;
4951
4952 case AARCH64_CMODEL_TINY_PIC:
4953 case AARCH64_CMODEL_TINY:
4954 case AARCH64_CMODEL_SMALL_PIC:
4955 case AARCH64_CMODEL_SMALL:
4956 return SYMBOL_SMALL_ABSOLUTE;
4957
4958 default:
4959 gcc_unreachable ();
4960 }
4961 }
4962
4963 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4964
4965 switch (aarch64_cmodel)
4966 {
4967 case AARCH64_CMODEL_LARGE:
4968 return SYMBOL_FORCE_TO_MEM;
4969
4970 case AARCH64_CMODEL_TINY:
4971 case AARCH64_CMODEL_SMALL:
4972
4973 /* This is needed to get DFmode, TImode constants to be loaded off
4974 the constant pool. Is it necessary to dump TImode values into
4975 the constant pool. We don't handle TImode constant loads properly
4976 yet and hence need to use the constant pool. */
4977 if (CONSTANT_POOL_ADDRESS_P (x))
4978 return SYMBOL_FORCE_TO_MEM;
4979
4980 if (aarch64_tls_symbol_p (x))
4981 return aarch64_classify_tls_symbol (x);
4982
4983 if (SYMBOL_REF_WEAK (x))
4984 return SYMBOL_FORCE_TO_MEM;
4985
4986 return SYMBOL_SMALL_ABSOLUTE;
4987
4988 case AARCH64_CMODEL_TINY_PIC:
4989 case AARCH64_CMODEL_SMALL_PIC:
4990
4991 if (CONSTANT_POOL_ADDRESS_P (x))
4992 return SYMBOL_FORCE_TO_MEM;
4993
4994 if (aarch64_tls_symbol_p (x))
4995 return aarch64_classify_tls_symbol (x);
4996
4997 if (!aarch64_symbol_binds_local_p (x))
4998 return SYMBOL_SMALL_GOT;
4999
5000 return SYMBOL_SMALL_ABSOLUTE;
5001
5002 default:
5003 gcc_unreachable ();
5004 }
5005 /* By default push everything into the constant pool. */
5006 return SYMBOL_FORCE_TO_MEM;
5007}
5008
5009/* Return true if X is a symbolic constant that can be used in context
5010 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
5011
5012bool
5013aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5014 enum aarch64_symbol_type *symbol_type)
5015{
5016 rtx offset;
5017 split_const (x, &x, &offset);
5018 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5019 *symbol_type = aarch64_classify_symbol (x, context);
5020 else
5021 return false;
5022
5023 /* No checking of offset at this point. */
5024 return true;
5025}
5026
5027bool
5028aarch64_constant_address_p (rtx x)
5029{
5030 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5031}
5032
5033bool
5034aarch64_legitimate_pic_operand_p (rtx x)
5035{
5036 if (GET_CODE (x) == SYMBOL_REF
5037 || (GET_CODE (x) == CONST
5038 && GET_CODE (XEXP (x, 0)) == PLUS
5039 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5040 return false;
5041
5042 return true;
5043}
5044
3520f7cc
JG
5045/* Return true if X holds either a quarter-precision or
5046 floating-point +0.0 constant. */
5047static bool
5048aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5049{
5050 if (!CONST_DOUBLE_P (x))
5051 return false;
5052
5053 /* TODO: We could handle moving 0.0 to a TFmode register,
5054 but first we would like to refactor the movtf_aarch64
5055 to be more amicable to split moves properly and
5056 correctly gate on TARGET_SIMD. For now - reject all
5057 constants which are not to SFmode or DFmode registers. */
5058 if (!(mode == SFmode || mode == DFmode))
5059 return false;
5060
5061 if (aarch64_float_const_zero_rtx_p (x))
5062 return true;
5063 return aarch64_float_const_representable_p (x);
5064}
5065
43e9d192
IB
5066static bool
5067aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5068{
5069 /* Do not allow vector struct mode constants. We could support
5070 0 and -1 easily, but they need support in aarch64-simd.md. */
5071 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5072 return false;
5073
5074 /* This could probably go away because
5075 we now decompose CONST_INTs according to expand_mov_immediate. */
5076 if ((GET_CODE (x) == CONST_VECTOR
5077 && aarch64_simd_valid_immediate (x, mode, false,
5078 NULL, NULL, NULL, NULL, NULL) != -1)
3520f7cc
JG
5079 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5080 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5081
5082 if (GET_CODE (x) == HIGH
5083 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5084 return true;
5085
5086 return aarch64_constant_address_p (x);
5087}
5088
a5bc806c 5089rtx
43e9d192
IB
5090aarch64_load_tp (rtx target)
5091{
5092 if (!target
5093 || GET_MODE (target) != Pmode
5094 || !register_operand (target, Pmode))
5095 target = gen_reg_rtx (Pmode);
5096
5097 /* Can return in any reg. */
5098 emit_insn (gen_aarch64_load_tp_hard (target));
5099 return target;
5100}
5101
43e9d192
IB
5102/* On AAPCS systems, this is the "struct __va_list". */
5103static GTY(()) tree va_list_type;
5104
5105/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5106 Return the type to use as __builtin_va_list.
5107
5108 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5109
5110 struct __va_list
5111 {
5112 void *__stack;
5113 void *__gr_top;
5114 void *__vr_top;
5115 int __gr_offs;
5116 int __vr_offs;
5117 }; */
5118
5119static tree
5120aarch64_build_builtin_va_list (void)
5121{
5122 tree va_list_name;
5123 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5124
5125 /* Create the type. */
5126 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5127 /* Give it the required name. */
5128 va_list_name = build_decl (BUILTINS_LOCATION,
5129 TYPE_DECL,
5130 get_identifier ("__va_list"),
5131 va_list_type);
5132 DECL_ARTIFICIAL (va_list_name) = 1;
5133 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5134 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5135
5136 /* Create the fields. */
5137 f_stack = build_decl (BUILTINS_LOCATION,
5138 FIELD_DECL, get_identifier ("__stack"),
5139 ptr_type_node);
5140 f_grtop = build_decl (BUILTINS_LOCATION,
5141 FIELD_DECL, get_identifier ("__gr_top"),
5142 ptr_type_node);
5143 f_vrtop = build_decl (BUILTINS_LOCATION,
5144 FIELD_DECL, get_identifier ("__vr_top"),
5145 ptr_type_node);
5146 f_groff = build_decl (BUILTINS_LOCATION,
5147 FIELD_DECL, get_identifier ("__gr_offs"),
5148 integer_type_node);
5149 f_vroff = build_decl (BUILTINS_LOCATION,
5150 FIELD_DECL, get_identifier ("__vr_offs"),
5151 integer_type_node);
5152
5153 DECL_ARTIFICIAL (f_stack) = 1;
5154 DECL_ARTIFICIAL (f_grtop) = 1;
5155 DECL_ARTIFICIAL (f_vrtop) = 1;
5156 DECL_ARTIFICIAL (f_groff) = 1;
5157 DECL_ARTIFICIAL (f_vroff) = 1;
5158
5159 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5160 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5161 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5162 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5163 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5164
5165 TYPE_FIELDS (va_list_type) = f_stack;
5166 DECL_CHAIN (f_stack) = f_grtop;
5167 DECL_CHAIN (f_grtop) = f_vrtop;
5168 DECL_CHAIN (f_vrtop) = f_groff;
5169 DECL_CHAIN (f_groff) = f_vroff;
5170
5171 /* Compute its layout. */
5172 layout_type (va_list_type);
5173
5174 return va_list_type;
5175}
5176
5177/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5178static void
5179aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5180{
5181 const CUMULATIVE_ARGS *cum;
5182 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5183 tree stack, grtop, vrtop, groff, vroff;
5184 tree t;
5185 int gr_save_area_size;
5186 int vr_save_area_size;
5187 int vr_offset;
5188
5189 cum = &crtl->args.info;
5190 gr_save_area_size
5191 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5192 vr_save_area_size
5193 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5194
5195 if (TARGET_GENERAL_REGS_ONLY)
5196 {
5197 if (cum->aapcs_nvrn > 0)
5198 sorry ("%qs and floating point or vector arguments",
5199 "-mgeneral-regs-only");
5200 vr_save_area_size = 0;
5201 }
5202
5203 f_stack = TYPE_FIELDS (va_list_type_node);
5204 f_grtop = DECL_CHAIN (f_stack);
5205 f_vrtop = DECL_CHAIN (f_grtop);
5206 f_groff = DECL_CHAIN (f_vrtop);
5207 f_vroff = DECL_CHAIN (f_groff);
5208
5209 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5210 NULL_TREE);
5211 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5212 NULL_TREE);
5213 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5214 NULL_TREE);
5215 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5216 NULL_TREE);
5217 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5218 NULL_TREE);
5219
5220 /* Emit code to initialize STACK, which points to the next varargs stack
5221 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5222 by named arguments. STACK is 8-byte aligned. */
5223 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5224 if (cum->aapcs_stack_size > 0)
5225 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5226 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5227 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5228
5229 /* Emit code to initialize GRTOP, the top of the GR save area.
5230 virtual_incoming_args_rtx should have been 16 byte aligned. */
5231 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5232 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5233 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5234
5235 /* Emit code to initialize VRTOP, the top of the VR save area.
5236 This address is gr_save_area_bytes below GRTOP, rounded
5237 down to the next 16-byte boundary. */
5238 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5239 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5240 STACK_BOUNDARY / BITS_PER_UNIT);
5241
5242 if (vr_offset)
5243 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5244 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5245 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246
5247 /* Emit code to initialize GROFF, the offset from GRTOP of the
5248 next GPR argument. */
5249 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5250 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5252
5253 /* Likewise emit code to initialize VROFF, the offset from FTOP
5254 of the next VR argument. */
5255 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5256 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5257 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5258}
5259
5260/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5261
5262static tree
5263aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5264 gimple_seq *post_p ATTRIBUTE_UNUSED)
5265{
5266 tree addr;
5267 bool indirect_p;
5268 bool is_ha; /* is HFA or HVA. */
5269 bool dw_align; /* double-word align. */
5270 enum machine_mode ag_mode = VOIDmode;
5271 int nregs;
5272 enum machine_mode mode;
5273
5274 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5275 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5276 HOST_WIDE_INT size, rsize, adjust, align;
5277 tree t, u, cond1, cond2;
5278
5279 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5280 if (indirect_p)
5281 type = build_pointer_type (type);
5282
5283 mode = TYPE_MODE (type);
5284
5285 f_stack = TYPE_FIELDS (va_list_type_node);
5286 f_grtop = DECL_CHAIN (f_stack);
5287 f_vrtop = DECL_CHAIN (f_grtop);
5288 f_groff = DECL_CHAIN (f_vrtop);
5289 f_vroff = DECL_CHAIN (f_groff);
5290
5291 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5292 f_stack, NULL_TREE);
5293 size = int_size_in_bytes (type);
5294 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5295
5296 dw_align = false;
5297 adjust = 0;
5298 if (aarch64_vfp_is_call_or_return_candidate (mode,
5299 type,
5300 &ag_mode,
5301 &nregs,
5302 &is_ha))
5303 {
5304 /* TYPE passed in fp/simd registers. */
5305 if (TARGET_GENERAL_REGS_ONLY)
5306 sorry ("%qs and floating point or vector arguments",
5307 "-mgeneral-regs-only");
5308
5309 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5310 unshare_expr (valist), f_vrtop, NULL_TREE);
5311 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5312 unshare_expr (valist), f_vroff, NULL_TREE);
5313
5314 rsize = nregs * UNITS_PER_VREG;
5315
5316 if (is_ha)
5317 {
5318 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5319 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5320 }
5321 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5322 && size < UNITS_PER_VREG)
5323 {
5324 adjust = UNITS_PER_VREG - size;
5325 }
5326 }
5327 else
5328 {
5329 /* TYPE passed in general registers. */
5330 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5331 unshare_expr (valist), f_grtop, NULL_TREE);
5332 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5333 unshare_expr (valist), f_groff, NULL_TREE);
5334 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5335 nregs = rsize / UNITS_PER_WORD;
5336
5337 if (align > 8)
5338 dw_align = true;
5339
5340 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5341 && size < UNITS_PER_WORD)
5342 {
5343 adjust = UNITS_PER_WORD - size;
5344 }
5345 }
5346
5347 /* Get a local temporary for the field value. */
5348 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5349
5350 /* Emit code to branch if off >= 0. */
5351 t = build2 (GE_EXPR, boolean_type_node, off,
5352 build_int_cst (TREE_TYPE (off), 0));
5353 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5354
5355 if (dw_align)
5356 {
5357 /* Emit: offs = (offs + 15) & -16. */
5358 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5359 build_int_cst (TREE_TYPE (off), 15));
5360 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5361 build_int_cst (TREE_TYPE (off), -16));
5362 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5363 }
5364 else
5365 roundup = NULL;
5366
5367 /* Update ap.__[g|v]r_offs */
5368 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5369 build_int_cst (TREE_TYPE (off), rsize));
5370 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5371
5372 /* String up. */
5373 if (roundup)
5374 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5375
5376 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5377 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5378 build_int_cst (TREE_TYPE (f_off), 0));
5379 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5380
5381 /* String up: make sure the assignment happens before the use. */
5382 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5383 COND_EXPR_ELSE (cond1) = t;
5384
5385 /* Prepare the trees handling the argument that is passed on the stack;
5386 the top level node will store in ON_STACK. */
5387 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5388 if (align > 8)
5389 {
5390 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5391 t = fold_convert (intDI_type_node, arg);
5392 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5393 build_int_cst (TREE_TYPE (t), 15));
5394 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5395 build_int_cst (TREE_TYPE (t), -16));
5396 t = fold_convert (TREE_TYPE (arg), t);
5397 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5398 }
5399 else
5400 roundup = NULL;
5401 /* Advance ap.__stack */
5402 t = fold_convert (intDI_type_node, arg);
5403 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5404 build_int_cst (TREE_TYPE (t), size + 7));
5405 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5406 build_int_cst (TREE_TYPE (t), -8));
5407 t = fold_convert (TREE_TYPE (arg), t);
5408 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5409 /* String up roundup and advance. */
5410 if (roundup)
5411 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5412 /* String up with arg */
5413 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5414 /* Big-endianness related address adjustment. */
5415 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5416 && size < UNITS_PER_WORD)
5417 {
5418 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5419 size_int (UNITS_PER_WORD - size));
5420 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5421 }
5422
5423 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5424 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5425
5426 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5427 t = off;
5428 if (adjust)
5429 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5430 build_int_cst (TREE_TYPE (off), adjust));
5431
5432 t = fold_convert (sizetype, t);
5433 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5434
5435 if (is_ha)
5436 {
5437 /* type ha; // treat as "struct {ftype field[n];}"
5438 ... [computing offs]
5439 for (i = 0; i <nregs; ++i, offs += 16)
5440 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5441 return ha; */
5442 int i;
5443 tree tmp_ha, field_t, field_ptr_t;
5444
5445 /* Declare a local variable. */
5446 tmp_ha = create_tmp_var_raw (type, "ha");
5447 gimple_add_tmp_var (tmp_ha);
5448
5449 /* Establish the base type. */
5450 switch (ag_mode)
5451 {
5452 case SFmode:
5453 field_t = float_type_node;
5454 field_ptr_t = float_ptr_type_node;
5455 break;
5456 case DFmode:
5457 field_t = double_type_node;
5458 field_ptr_t = double_ptr_type_node;
5459 break;
5460 case TFmode:
5461 field_t = long_double_type_node;
5462 field_ptr_t = long_double_ptr_type_node;
5463 break;
5464/* The half precision and quad precision are not fully supported yet. Enable
5465 the following code after the support is complete. Need to find the correct
5466 type node for __fp16 *. */
5467#if 0
5468 case HFmode:
5469 field_t = float_type_node;
5470 field_ptr_t = float_ptr_type_node;
5471 break;
5472#endif
5473 case V2SImode:
5474 case V4SImode:
5475 {
5476 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5477 field_t = build_vector_type_for_mode (innertype, ag_mode);
5478 field_ptr_t = build_pointer_type (field_t);
5479 }
5480 break;
5481 default:
5482 gcc_assert (0);
5483 }
5484
5485 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5486 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5487 addr = t;
5488 t = fold_convert (field_ptr_t, addr);
5489 t = build2 (MODIFY_EXPR, field_t,
5490 build1 (INDIRECT_REF, field_t, tmp_ha),
5491 build1 (INDIRECT_REF, field_t, t));
5492
5493 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5494 for (i = 1; i < nregs; ++i)
5495 {
5496 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5497 u = fold_convert (field_ptr_t, addr);
5498 u = build2 (MODIFY_EXPR, field_t,
5499 build2 (MEM_REF, field_t, tmp_ha,
5500 build_int_cst (field_ptr_t,
5501 (i *
5502 int_size_in_bytes (field_t)))),
5503 build1 (INDIRECT_REF, field_t, u));
5504 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5505 }
5506
5507 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5508 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5509 }
5510
5511 COND_EXPR_ELSE (cond2) = t;
5512 addr = fold_convert (build_pointer_type (type), cond1);
5513 addr = build_va_arg_indirect_ref (addr);
5514
5515 if (indirect_p)
5516 addr = build_va_arg_indirect_ref (addr);
5517
5518 return addr;
5519}
5520
5521/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5522
5523static void
5524aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5525 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5526 int no_rtl)
5527{
5528 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5529 CUMULATIVE_ARGS local_cum;
5530 int gr_saved, vr_saved;
5531
5532 /* The caller has advanced CUM up to, but not beyond, the last named
5533 argument. Advance a local copy of CUM past the last "real" named
5534 argument, to find out how many registers are left over. */
5535 local_cum = *cum;
5536 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5537
5538 /* Found out how many registers we need to save. */
5539 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5540 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5541
5542 if (TARGET_GENERAL_REGS_ONLY)
5543 {
5544 if (local_cum.aapcs_nvrn > 0)
5545 sorry ("%qs and floating point or vector arguments",
5546 "-mgeneral-regs-only");
5547 vr_saved = 0;
5548 }
5549
5550 if (!no_rtl)
5551 {
5552 if (gr_saved > 0)
5553 {
5554 rtx ptr, mem;
5555
5556 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5557 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5558 - gr_saved * UNITS_PER_WORD);
5559 mem = gen_frame_mem (BLKmode, ptr);
5560 set_mem_alias_set (mem, get_varargs_alias_set ());
5561
5562 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5563 mem, gr_saved);
5564 }
5565 if (vr_saved > 0)
5566 {
5567 /* We can't use move_block_from_reg, because it will use
5568 the wrong mode, storing D regs only. */
5569 enum machine_mode mode = TImode;
5570 int off, i;
5571
5572 /* Set OFF to the offset from virtual_incoming_args_rtx of
5573 the first vector register. The VR save area lies below
5574 the GR one, and is aligned to 16 bytes. */
5575 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5576 STACK_BOUNDARY / BITS_PER_UNIT);
5577 off -= vr_saved * UNITS_PER_VREG;
5578
5579 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5580 {
5581 rtx ptr, mem;
5582
5583 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5584 mem = gen_frame_mem (mode, ptr);
5585 set_mem_alias_set (mem, get_varargs_alias_set ());
5586 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5587 off += UNITS_PER_VREG;
5588 }
5589 }
5590 }
5591
5592 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5593 any complication of having crtl->args.pretend_args_size changed. */
5594 cfun->machine->saved_varargs_size
5595 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5596 STACK_BOUNDARY / BITS_PER_UNIT)
5597 + vr_saved * UNITS_PER_VREG);
5598}
5599
5600static void
5601aarch64_conditional_register_usage (void)
5602{
5603 int i;
5604 if (!TARGET_FLOAT)
5605 {
5606 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5607 {
5608 fixed_regs[i] = 1;
5609 call_used_regs[i] = 1;
5610 }
5611 }
5612}
5613
5614/* Walk down the type tree of TYPE counting consecutive base elements.
5615 If *MODEP is VOIDmode, then set it to the first valid floating point
5616 type. If a non-floating point type is found, or if a floating point
5617 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5618 otherwise return the count in the sub-tree. */
5619static int
5620aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5621{
5622 enum machine_mode mode;
5623 HOST_WIDE_INT size;
5624
5625 switch (TREE_CODE (type))
5626 {
5627 case REAL_TYPE:
5628 mode = TYPE_MODE (type);
5629 if (mode != DFmode && mode != SFmode && mode != TFmode)
5630 return -1;
5631
5632 if (*modep == VOIDmode)
5633 *modep = mode;
5634
5635 if (*modep == mode)
5636 return 1;
5637
5638 break;
5639
5640 case COMPLEX_TYPE:
5641 mode = TYPE_MODE (TREE_TYPE (type));
5642 if (mode != DFmode && mode != SFmode && mode != TFmode)
5643 return -1;
5644
5645 if (*modep == VOIDmode)
5646 *modep = mode;
5647
5648 if (*modep == mode)
5649 return 2;
5650
5651 break;
5652
5653 case VECTOR_TYPE:
5654 /* Use V2SImode and V4SImode as representatives of all 64-bit
5655 and 128-bit vector types. */
5656 size = int_size_in_bytes (type);
5657 switch (size)
5658 {
5659 case 8:
5660 mode = V2SImode;
5661 break;
5662 case 16:
5663 mode = V4SImode;
5664 break;
5665 default:
5666 return -1;
5667 }
5668
5669 if (*modep == VOIDmode)
5670 *modep = mode;
5671
5672 /* Vector modes are considered to be opaque: two vectors are
5673 equivalent for the purposes of being homogeneous aggregates
5674 if they are the same size. */
5675 if (*modep == mode)
5676 return 1;
5677
5678 break;
5679
5680 case ARRAY_TYPE:
5681 {
5682 int count;
5683 tree index = TYPE_DOMAIN (type);
5684
5685 /* Can't handle incomplete types. */
5686 if (!COMPLETE_TYPE_P (type))
5687 return -1;
5688
5689 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5690 if (count == -1
5691 || !index
5692 || !TYPE_MAX_VALUE (index)
5693 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5694 || !TYPE_MIN_VALUE (index)
5695 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5696 || count < 0)
5697 return -1;
5698
5699 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5700 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5701
5702 /* There must be no padding. */
5703 if (!host_integerp (TYPE_SIZE (type), 1)
5704 || (tree_low_cst (TYPE_SIZE (type), 1)
5705 != count * GET_MODE_BITSIZE (*modep)))
5706 return -1;
5707
5708 return count;
5709 }
5710
5711 case RECORD_TYPE:
5712 {
5713 int count = 0;
5714 int sub_count;
5715 tree field;
5716
5717 /* Can't handle incomplete types. */
5718 if (!COMPLETE_TYPE_P (type))
5719 return -1;
5720
5721 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5722 {
5723 if (TREE_CODE (field) != FIELD_DECL)
5724 continue;
5725
5726 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5727 if (sub_count < 0)
5728 return -1;
5729 count += sub_count;
5730 }
5731
5732 /* There must be no padding. */
5733 if (!host_integerp (TYPE_SIZE (type), 1)
5734 || (tree_low_cst (TYPE_SIZE (type), 1)
5735 != count * GET_MODE_BITSIZE (*modep)))
5736 return -1;
5737
5738 return count;
5739 }
5740
5741 case UNION_TYPE:
5742 case QUAL_UNION_TYPE:
5743 {
5744 /* These aren't very interesting except in a degenerate case. */
5745 int count = 0;
5746 int sub_count;
5747 tree field;
5748
5749 /* Can't handle incomplete types. */
5750 if (!COMPLETE_TYPE_P (type))
5751 return -1;
5752
5753 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5754 {
5755 if (TREE_CODE (field) != FIELD_DECL)
5756 continue;
5757
5758 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5759 if (sub_count < 0)
5760 return -1;
5761 count = count > sub_count ? count : sub_count;
5762 }
5763
5764 /* There must be no padding. */
5765 if (!host_integerp (TYPE_SIZE (type), 1)
5766 || (tree_low_cst (TYPE_SIZE (type), 1)
5767 != count * GET_MODE_BITSIZE (*modep)))
5768 return -1;
5769
5770 return count;
5771 }
5772
5773 default:
5774 break;
5775 }
5776
5777 return -1;
5778}
5779
5780/* Return TRUE if the type, as described by TYPE and MODE, is a composite
5781 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5782 array types. The C99 floating-point complex types are also considered
5783 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5784 types, which are GCC extensions and out of the scope of AAPCS64, are
5785 treated as composite types here as well.
5786
5787 Note that MODE itself is not sufficient in determining whether a type
5788 is such a composite type or not. This is because
5789 stor-layout.c:compute_record_mode may have already changed the MODE
5790 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5791 structure with only one field may have its MODE set to the mode of the
5792 field. Also an integer mode whose size matches the size of the
5793 RECORD_TYPE type may be used to substitute the original mode
5794 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5795 solely relied on. */
5796
5797static bool
5798aarch64_composite_type_p (const_tree type,
5799 enum machine_mode mode)
5800{
5801 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5802 return true;
5803
5804 if (mode == BLKmode
5805 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5806 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5807 return true;
5808
5809 return false;
5810}
5811
5812/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5813 type as described in AAPCS64 \S 4.1.2.
5814
5815 See the comment above aarch64_composite_type_p for the notes on MODE. */
5816
5817static bool
5818aarch64_short_vector_p (const_tree type,
5819 enum machine_mode mode)
5820{
5821 HOST_WIDE_INT size = -1;
5822
5823 if (type && TREE_CODE (type) == VECTOR_TYPE)
5824 size = int_size_in_bytes (type);
5825 else if (!aarch64_composite_type_p (type, mode)
5826 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5827 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5828 size = GET_MODE_SIZE (mode);
5829
5830 return (size == 8 || size == 16) ? true : false;
5831}
5832
5833/* Return TRUE if an argument, whose type is described by TYPE and MODE,
5834 shall be passed or returned in simd/fp register(s) (providing these
5835 parameter passing registers are available).
5836
5837 Upon successful return, *COUNT returns the number of needed registers,
5838 *BASE_MODE returns the mode of the individual register and when IS_HAF
5839 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5840 floating-point aggregate or a homogeneous short-vector aggregate. */
5841
5842static bool
5843aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5844 const_tree type,
5845 enum machine_mode *base_mode,
5846 int *count,
5847 bool *is_ha)
5848{
5849 enum machine_mode new_mode = VOIDmode;
5850 bool composite_p = aarch64_composite_type_p (type, mode);
5851
5852 if (is_ha != NULL) *is_ha = false;
5853
5854 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5855 || aarch64_short_vector_p (type, mode))
5856 {
5857 *count = 1;
5858 new_mode = mode;
5859 }
5860 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5861 {
5862 if (is_ha != NULL) *is_ha = true;
5863 *count = 2;
5864 new_mode = GET_MODE_INNER (mode);
5865 }
5866 else if (type && composite_p)
5867 {
5868 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5869
5870 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5871 {
5872 if (is_ha != NULL) *is_ha = true;
5873 *count = ag_count;
5874 }
5875 else
5876 return false;
5877 }
5878 else
5879 return false;
5880
5881 *base_mode = new_mode;
5882 return true;
5883}
5884
5885/* Implement TARGET_STRUCT_VALUE_RTX. */
5886
5887static rtx
5888aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5889 int incoming ATTRIBUTE_UNUSED)
5890{
5891 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5892}
5893
5894/* Implements target hook vector_mode_supported_p. */
5895static bool
5896aarch64_vector_mode_supported_p (enum machine_mode mode)
5897{
5898 if (TARGET_SIMD
5899 && (mode == V4SImode || mode == V8HImode
5900 || mode == V16QImode || mode == V2DImode
5901 || mode == V2SImode || mode == V4HImode
5902 || mode == V8QImode || mode == V2SFmode
5903 || mode == V4SFmode || mode == V2DFmode))
5904 return true;
5905
5906 return false;
5907}
5908
5909/* Return quad mode as the preferred SIMD mode. */
5910static enum machine_mode
5911aarch64_preferred_simd_mode (enum machine_mode mode)
5912{
5913 if (TARGET_SIMD)
5914 switch (mode)
5915 {
5916 case DFmode:
5917 return V2DFmode;
5918 case SFmode:
5919 return V4SFmode;
5920 case SImode:
5921 return V4SImode;
5922 case HImode:
5923 return V8HImode;
5924 case QImode:
5925 return V16QImode;
5926 case DImode:
5927 return V2DImode;
5928 break;
5929
5930 default:;
5931 }
5932 return word_mode;
5933}
5934
3b357264
JG
5935/* Return the bitmask of possible vector sizes for the vectorizer
5936 to iterate over. */
5937static unsigned int
5938aarch64_autovectorize_vector_sizes (void)
5939{
5940 return (16 | 8);
5941}
5942
c6fc9e43
YZ
5943/* A table to help perform AArch64-specific name mangling for AdvSIMD
5944 vector types in order to conform to the AAPCS64 (see "Procedure
5945 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5946 qualify for emission with the mangled names defined in that document,
5947 a vector type must not only be of the correct mode but also be
5948 composed of AdvSIMD vector element types (e.g.
5949 _builtin_aarch64_simd_qi); these types are registered by
5950 aarch64_init_simd_builtins (). In other words, vector types defined
5951 in other ways e.g. via vector_size attribute will get default
5952 mangled names. */
5953typedef struct
5954{
5955 enum machine_mode mode;
5956 const char *element_type_name;
5957 const char *mangled_name;
5958} aarch64_simd_mangle_map_entry;
5959
5960static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5961 /* 64-bit containerized types. */
5962 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5963 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5964 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5965 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5966 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5967 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5968 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
5969 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
5970 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5971 /* 128-bit containerized types. */
5972 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
5973 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
5974 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
5975 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
5976 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
5977 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
5978 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
5979 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
5980 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
5981 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
5982 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
5983 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
5984 { VOIDmode, NULL, NULL }
5985};
5986
ac2b960f
YZ
5987/* Implement TARGET_MANGLE_TYPE. */
5988
5989const char *
5990aarch64_mangle_type (const_tree type)
5991{
5992 /* The AArch64 ABI documents say that "__va_list" has to be
5993 managled as if it is in the "std" namespace. */
5994 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
5995 return "St9__va_list";
5996
c6fc9e43
YZ
5997 /* Check the mode of the vector type, and the name of the vector
5998 element type, against the table. */
5999 if (TREE_CODE (type) == VECTOR_TYPE)
6000 {
6001 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6002
6003 while (pos->mode != VOIDmode)
6004 {
6005 tree elt_type = TREE_TYPE (type);
6006
6007 if (pos->mode == TYPE_MODE (type)
6008 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6009 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6010 pos->element_type_name))
6011 return pos->mangled_name;
6012
6013 pos++;
6014 }
6015 }
6016
ac2b960f
YZ
6017 /* Use the default mangling. */
6018 return NULL;
6019}
6020
43e9d192
IB
6021/* Return the equivalent letter for size. */
6022static unsigned char
6023sizetochar (int size)
6024{
6025 switch (size)
6026 {
6027 case 64: return 'd';
6028 case 32: return 's';
6029 case 16: return 'h';
6030 case 8 : return 'b';
6031 default: gcc_unreachable ();
6032 }
6033}
6034
3520f7cc
JG
6035/* Return true iff x is a uniform vector of floating-point
6036 constants, and the constant can be represented in
6037 quarter-precision form. Note, as aarch64_float_const_representable
6038 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6039static bool
6040aarch64_vect_float_const_representable_p (rtx x)
6041{
6042 int i = 0;
6043 REAL_VALUE_TYPE r0, ri;
6044 rtx x0, xi;
6045
6046 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6047 return false;
6048
6049 x0 = CONST_VECTOR_ELT (x, 0);
6050 if (!CONST_DOUBLE_P (x0))
6051 return false;
6052
6053 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6054
6055 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6056 {
6057 xi = CONST_VECTOR_ELT (x, i);
6058 if (!CONST_DOUBLE_P (xi))
6059 return false;
6060
6061 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6062 if (!REAL_VALUES_EQUAL (r0, ri))
6063 return false;
6064 }
6065
6066 return aarch64_float_const_representable_p (x0);
6067}
6068
6069/* TODO: This function returns values similar to those
6070 returned by neon_valid_immediate in gcc/config/arm/arm.c
6071 but the API here is different enough that these magic numbers
6072 are not used. It should be sufficient to return true or false. */
43e9d192
IB
6073static int
6074aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6075 rtx *modconst, int *elementwidth,
6076 unsigned char *elementchar,
6077 int *mvn, int *shift)
6078{
6079#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6080 matches = 1; \
6081 for (i = 0; i < idx; i += (STRIDE)) \
6082 if (!(TEST)) \
6083 matches = 0; \
6084 if (matches) \
6085 { \
6086 immtype = (CLASS); \
6087 elsize = (ELSIZE); \
6088 elchar = sizetochar (elsize); \
6089 eshift = (SHIFT); \
6090 emvn = (NEG); \
6091 break; \
6092 }
6093
6094 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6095 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6096 unsigned char bytes[16];
6097 unsigned char elchar = 0;
6098 int immtype = -1, matches;
6099 unsigned int invmask = inverse ? 0xff : 0;
6100 int eshift, emvn;
6101
43e9d192 6102 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc
JG
6103 {
6104 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6105 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6106
6107 if (!(simd_imm_zero
6108 || aarch64_vect_float_const_representable_p (op)))
6109 return -1;
6110
6111 if (modconst)
6112 *modconst = CONST_VECTOR_ELT (op, 0);
6113
6114 if (elementwidth)
6115 *elementwidth = elem_width;
6116
6117 if (elementchar)
6118 *elementchar = sizetochar (elem_width);
6119
6120 if (shift)
6121 *shift = 0;
6122
6123 if (simd_imm_zero)
6124 return 19;
6125 else
6126 return 18;
6127 }
43e9d192
IB
6128
6129 /* Splat vector constant out into a byte vector. */
6130 for (i = 0; i < n_elts; i++)
6131 {
6132 rtx el = CONST_VECTOR_ELT (op, i);
6133 unsigned HOST_WIDE_INT elpart;
6134 unsigned int part, parts;
6135
6136 if (GET_CODE (el) == CONST_INT)
6137 {
6138 elpart = INTVAL (el);
6139 parts = 1;
6140 }
6141 else if (GET_CODE (el) == CONST_DOUBLE)
6142 {
6143 elpart = CONST_DOUBLE_LOW (el);
6144 parts = 2;
6145 }
6146 else
6147 gcc_unreachable ();
6148
6149 for (part = 0; part < parts; part++)
6150 {
6151 unsigned int byte;
6152 for (byte = 0; byte < innersize; byte++)
6153 {
6154 bytes[idx++] = (elpart & 0xff) ^ invmask;
6155 elpart >>= BITS_PER_UNIT;
6156 }
6157 if (GET_CODE (el) == CONST_DOUBLE)
6158 elpart = CONST_DOUBLE_HIGH (el);
6159 }
6160 }
6161
6162 /* Sanity check. */
6163 gcc_assert (idx == GET_MODE_SIZE (mode));
6164
6165 do
6166 {
6167 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6168 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6169
6170 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6171 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6172
6173 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6174 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6175
6176 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6177 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6178
6179 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6180
6181 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6182
6183 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6184 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6185
6186 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6187 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6188
6189 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6190 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6191
6192 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6193 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6194
6195 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6196
6197 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6198
6199 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6200 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6201
6202 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6203 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6204
6205 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6206 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6207
6208 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6209 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6210
6211 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6212
6213 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6214 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6215 }
6216 while (0);
6217
6218 /* TODO: Currently the assembler cannot handle types 12 to 15.
6219 And there is no way to specify cmode through the compiler.
6220 Disable them till there is support in the assembler. */
6221 if (immtype == -1
6222 || (immtype >= 12 && immtype <= 15)
6223 || immtype == 18)
6224 return -1;
6225
6226
6227 if (elementwidth)
6228 *elementwidth = elsize;
6229
6230 if (elementchar)
6231 *elementchar = elchar;
6232
6233 if (mvn)
6234 *mvn = emvn;
6235
6236 if (shift)
6237 *shift = eshift;
6238
6239 if (modconst)
6240 {
6241 unsigned HOST_WIDE_INT imm = 0;
6242
6243 /* Un-invert bytes of recognized vector, if necessary. */
6244 if (invmask != 0)
6245 for (i = 0; i < idx; i++)
6246 bytes[i] ^= invmask;
6247
6248 if (immtype == 17)
6249 {
6250 /* FIXME: Broken on 32-bit H_W_I hosts. */
6251 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6252
6253 for (i = 0; i < 8; i++)
6254 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6255 << (i * BITS_PER_UNIT);
6256
6257 *modconst = GEN_INT (imm);
6258 }
6259 else
6260 {
6261 unsigned HOST_WIDE_INT imm = 0;
6262
6263 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6264 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6265
6266 /* Construct 'abcdefgh' because the assembler cannot handle
6267 generic constants. */
6268 gcc_assert (shift != NULL && mvn != NULL);
6269 if (*mvn)
6270 imm = ~imm;
6271 imm = (imm >> *shift) & 0xff;
6272 *modconst = GEN_INT (imm);
6273 }
6274 }
6275
6276 return immtype;
6277#undef CHECK
6278}
6279
6280/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6281 (or, implicitly, MVNI) immediate. Write back width per element
3520f7cc
JG
6282 to *ELEMENTWIDTH, and a modified constant (whatever should be output
6283 for a MOVI instruction) in *MODCONST. */
43e9d192
IB
6284int
6285aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6286 rtx *modconst, int *elementwidth,
6287 unsigned char *elementchar,
6288 int *mvn, int *shift)
6289{
6290 rtx tmpconst;
6291 int tmpwidth;
6292 unsigned char tmpwidthc;
6293 int tmpmvn = 0, tmpshift = 0;
6294 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6295 &tmpwidth, &tmpwidthc,
6296 &tmpmvn, &tmpshift);
6297
6298 if (retval == -1)
6299 return 0;
6300
6301 if (modconst)
6302 *modconst = tmpconst;
6303
6304 if (elementwidth)
6305 *elementwidth = tmpwidth;
6306
6307 if (elementchar)
6308 *elementchar = tmpwidthc;
6309
6310 if (mvn)
6311 *mvn = tmpmvn;
6312
6313 if (shift)
6314 *shift = tmpshift;
6315
6316 return 1;
6317}
6318
6319static bool
6320aarch64_const_vec_all_same_int_p (rtx x,
6321 HOST_WIDE_INT minval,
6322 HOST_WIDE_INT maxval)
6323{
6324 HOST_WIDE_INT firstval;
6325 int count, i;
6326
6327 if (GET_CODE (x) != CONST_VECTOR
6328 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6329 return false;
6330
6331 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6332 if (firstval < minval || firstval > maxval)
6333 return false;
6334
6335 count = CONST_VECTOR_NUNITS (x);
6336 for (i = 1; i < count; i++)
6337 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6338 return false;
6339
6340 return true;
6341}
6342
6343/* Check of immediate shift constants are within range. */
6344bool
6345aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6346{
6347 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6348 if (left)
6349 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6350 else
6351 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6352}
6353
3520f7cc
JG
6354/* Return true if X is a uniform vector where all elements
6355 are either the floating-point constant 0.0 or the
6356 integer constant 0. */
43e9d192
IB
6357bool
6358aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6359{
3520f7cc 6360 return x == CONST0_RTX (mode);
43e9d192
IB
6361}
6362
6363bool
6364aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6365{
6366 HOST_WIDE_INT imm = INTVAL (x);
6367 int i;
6368
6369 for (i = 0; i < 8; i++)
6370 {
6371 unsigned int byte = imm & 0xff;
6372 if (byte != 0xff && byte != 0)
6373 return false;
6374 imm >>= 8;
6375 }
6376
6377 return true;
6378}
6379
6380/* Return a const_int vector of VAL. */
6381rtx
6382aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6383{
6384 int nunits = GET_MODE_NUNITS (mode);
6385 rtvec v = rtvec_alloc (nunits);
6386 int i;
6387
6388 for (i=0; i < nunits; i++)
6389 RTVEC_ELT (v, i) = GEN_INT (val);
6390
6391 return gen_rtx_CONST_VECTOR (mode, v);
6392}
6393
6394/* Construct and return a PARALLEL RTX vector. */
6395rtx
6396aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6397{
6398 int nunits = GET_MODE_NUNITS (mode);
6399 rtvec v = rtvec_alloc (nunits / 2);
6400 int base = high ? nunits / 2 : 0;
6401 rtx t1;
6402 int i;
6403
6404 for (i=0; i < nunits / 2; i++)
6405 RTVEC_ELT (v, i) = GEN_INT (base + i);
6406
6407 t1 = gen_rtx_PARALLEL (mode, v);
6408 return t1;
6409}
6410
6411/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6412 HIGH (exclusive). */
6413void
6414aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6415{
6416 HOST_WIDE_INT lane;
6417 gcc_assert (GET_CODE (operand) == CONST_INT);
6418 lane = INTVAL (operand);
6419
6420 if (lane < low || lane >= high)
6421 error ("lane out of range");
6422}
6423
6424void
6425aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6426{
6427 gcc_assert (GET_CODE (operand) == CONST_INT);
6428 HOST_WIDE_INT lane = INTVAL (operand);
6429
6430 if (lane < low || lane >= high)
6431 error ("constant out of range");
6432}
6433
6434/* Emit code to reinterpret one AdvSIMD type as another,
6435 without altering bits. */
6436void
6437aarch64_simd_reinterpret (rtx dest, rtx src)
6438{
6439 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6440}
6441
6442/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6443 registers). */
6444void
6445aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6446 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6447 rtx op1)
6448{
6449 rtx mem = gen_rtx_MEM (mode, destaddr);
6450 rtx tmp1 = gen_reg_rtx (mode);
6451 rtx tmp2 = gen_reg_rtx (mode);
6452
6453 emit_insn (intfn (tmp1, op1, tmp2));
6454
6455 emit_move_insn (mem, tmp1);
6456 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6457 emit_move_insn (mem, tmp2);
6458}
6459
6460/* Return TRUE if OP is a valid vector addressing mode. */
6461bool
6462aarch64_simd_mem_operand_p (rtx op)
6463{
6464 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6465 || GET_CODE (XEXP (op, 0)) == REG);
6466}
6467
6468/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6469 not to early-clobber SRC registers in the process.
6470
6471 We assume that the operands described by SRC and DEST represent a
6472 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6473 number of components into which the copy has been decomposed. */
6474void
6475aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6476 rtx *src, unsigned int count)
6477{
6478 unsigned int i;
6479
6480 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6481 || REGNO (operands[0]) < REGNO (operands[1]))
6482 {
6483 for (i = 0; i < count; i++)
6484 {
6485 operands[2 * i] = dest[i];
6486 operands[2 * i + 1] = src[i];
6487 }
6488 }
6489 else
6490 {
6491 for (i = 0; i < count; i++)
6492 {
6493 operands[2 * i] = dest[count - i - 1];
6494 operands[2 * i + 1] = src[count - i - 1];
6495 }
6496 }
6497}
6498
6499/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6500 one of VSTRUCT modes: OI, CI or XI. */
6501int
6502aarch64_simd_attr_length_move (rtx insn)
6503{
43e9d192
IB
6504 enum machine_mode mode;
6505
6506 extract_insn_cached (insn);
6507
6508 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6509 {
6510 mode = GET_MODE (recog_data.operand[0]);
6511 switch (mode)
6512 {
6513 case OImode:
6514 return 8;
6515 case CImode:
6516 return 12;
6517 case XImode:
6518 return 16;
6519 default:
6520 gcc_unreachable ();
6521 }
6522 }
6523 return 4;
6524}
6525
db0253a4
TB
6526/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6527 alignment of a vector to 128 bits. */
6528static HOST_WIDE_INT
6529aarch64_simd_vector_alignment (const_tree type)
6530{
6531 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6532 return MIN (align, 128);
6533}
6534
6535/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6536static bool
6537aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6538{
6539 if (is_packed)
6540 return false;
6541
6542 /* We guarantee alignment for vectors up to 128-bits. */
6543 if (tree_int_cst_compare (TYPE_SIZE (type),
6544 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6545 return false;
6546
6547 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6548 return true;
6549}
6550
4369c11e
TB
6551/* If VALS is a vector constant that can be loaded into a register
6552 using DUP, generate instructions to do so and return an RTX to
6553 assign to the register. Otherwise return NULL_RTX. */
6554static rtx
6555aarch64_simd_dup_constant (rtx vals)
6556{
6557 enum machine_mode mode = GET_MODE (vals);
6558 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6559 int n_elts = GET_MODE_NUNITS (mode);
6560 bool all_same = true;
6561 rtx x;
6562 int i;
6563
6564 if (GET_CODE (vals) != CONST_VECTOR)
6565 return NULL_RTX;
6566
6567 for (i = 1; i < n_elts; ++i)
6568 {
6569 x = CONST_VECTOR_ELT (vals, i);
6570 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6571 all_same = false;
6572 }
6573
6574 if (!all_same)
6575 return NULL_RTX;
6576
6577 /* We can load this constant by using DUP and a constant in a
6578 single ARM register. This will be cheaper than a vector
6579 load. */
6580 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6581 return gen_rtx_VEC_DUPLICATE (mode, x);
6582}
6583
6584
6585/* Generate code to load VALS, which is a PARALLEL containing only
6586 constants (for vec_init) or CONST_VECTOR, efficiently into a
6587 register. Returns an RTX to copy into the register, or NULL_RTX
6588 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6589rtx
6590aarch64_simd_make_constant (rtx vals)
6591{
6592 enum machine_mode mode = GET_MODE (vals);
6593 rtx const_dup;
6594 rtx const_vec = NULL_RTX;
6595 int n_elts = GET_MODE_NUNITS (mode);
6596 int n_const = 0;
6597 int i;
6598
6599 if (GET_CODE (vals) == CONST_VECTOR)
6600 const_vec = vals;
6601 else if (GET_CODE (vals) == PARALLEL)
6602 {
6603 /* A CONST_VECTOR must contain only CONST_INTs and
6604 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6605 Only store valid constants in a CONST_VECTOR. */
6606 for (i = 0; i < n_elts; ++i)
6607 {
6608 rtx x = XVECEXP (vals, 0, i);
6609 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6610 n_const++;
6611 }
6612 if (n_const == n_elts)
6613 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6614 }
6615 else
6616 gcc_unreachable ();
6617
6618 if (const_vec != NULL_RTX
6619 && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6620 NULL, NULL, NULL))
6621 /* Load using MOVI/MVNI. */
6622 return const_vec;
6623 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6624 /* Loaded using DUP. */
6625 return const_dup;
6626 else if (const_vec != NULL_RTX)
6627 /* Load from constant pool. We can not take advantage of single-cycle
6628 LD1 because we need a PC-relative addressing mode. */
6629 return const_vec;
6630 else
6631 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6632 We can not construct an initializer. */
6633 return NULL_RTX;
6634}
6635
6636void
6637aarch64_expand_vector_init (rtx target, rtx vals)
6638{
6639 enum machine_mode mode = GET_MODE (target);
6640 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6641 int n_elts = GET_MODE_NUNITS (mode);
6642 int n_var = 0, one_var = -1;
6643 bool all_same = true;
6644 rtx x, mem;
6645 int i;
6646
6647 x = XVECEXP (vals, 0, 0);
6648 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6649 n_var = 1, one_var = 0;
6650
6651 for (i = 1; i < n_elts; ++i)
6652 {
6653 x = XVECEXP (vals, 0, i);
6654 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6655 ++n_var, one_var = i;
6656
6657 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6658 all_same = false;
6659 }
6660
6661 if (n_var == 0)
6662 {
6663 rtx constant = aarch64_simd_make_constant (vals);
6664 if (constant != NULL_RTX)
6665 {
6666 emit_move_insn (target, constant);
6667 return;
6668 }
6669 }
6670
6671 /* Splat a single non-constant element if we can. */
6672 if (all_same)
6673 {
6674 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6675 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6676 return;
6677 }
6678
6679 /* One field is non-constant. Load constant then overwrite varying
6680 field. This is more efficient than using the stack. */
6681 if (n_var == 1)
6682 {
6683 rtx copy = copy_rtx (vals);
6684 rtx index = GEN_INT (one_var);
6685 enum insn_code icode;
6686
6687 /* Load constant part of vector, substitute neighboring value for
6688 varying element. */
6689 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6690 aarch64_expand_vector_init (target, copy);
6691
6692 /* Insert variable. */
6693 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6694 icode = optab_handler (vec_set_optab, mode);
6695 gcc_assert (icode != CODE_FOR_nothing);
6696 emit_insn (GEN_FCN (icode) (target, x, index));
6697 return;
6698 }
6699
6700 /* Construct the vector in memory one field at a time
6701 and load the whole vector. */
6702 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6703 for (i = 0; i < n_elts; i++)
6704 emit_move_insn (adjust_address_nv (mem, inner_mode,
6705 i * GET_MODE_SIZE (inner_mode)),
6706 XVECEXP (vals, 0, i));
6707 emit_move_insn (target, mem);
6708
6709}
6710
43e9d192
IB
6711static unsigned HOST_WIDE_INT
6712aarch64_shift_truncation_mask (enum machine_mode mode)
6713{
6714 return
6715 (aarch64_vector_mode_supported_p (mode)
6716 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6717}
6718
6719#ifndef TLS_SECTION_ASM_FLAG
6720#define TLS_SECTION_ASM_FLAG 'T'
6721#endif
6722
6723void
6724aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6725 tree decl ATTRIBUTE_UNUSED)
6726{
6727 char flagchars[10], *f = flagchars;
6728
6729 /* If we have already declared this section, we can use an
6730 abbreviated form to switch back to it -- unless this section is
6731 part of a COMDAT groups, in which case GAS requires the full
6732 declaration every time. */
6733 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6734 && (flags & SECTION_DECLARED))
6735 {
6736 fprintf (asm_out_file, "\t.section\t%s\n", name);
6737 return;
6738 }
6739
6740 if (!(flags & SECTION_DEBUG))
6741 *f++ = 'a';
6742 if (flags & SECTION_WRITE)
6743 *f++ = 'w';
6744 if (flags & SECTION_CODE)
6745 *f++ = 'x';
6746 if (flags & SECTION_SMALL)
6747 *f++ = 's';
6748 if (flags & SECTION_MERGE)
6749 *f++ = 'M';
6750 if (flags & SECTION_STRINGS)
6751 *f++ = 'S';
6752 if (flags & SECTION_TLS)
6753 *f++ = TLS_SECTION_ASM_FLAG;
6754 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6755 *f++ = 'G';
6756 *f = '\0';
6757
6758 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6759
6760 if (!(flags & SECTION_NOTYPE))
6761 {
6762 const char *type;
6763 const char *format;
6764
6765 if (flags & SECTION_BSS)
6766 type = "nobits";
6767 else
6768 type = "progbits";
6769
6770#ifdef TYPE_OPERAND_FMT
6771 format = "," TYPE_OPERAND_FMT;
6772#else
6773 format = ",@%s";
6774#endif
6775
6776 fprintf (asm_out_file, format, type);
6777
6778 if (flags & SECTION_ENTSIZE)
6779 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6780 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6781 {
6782 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6783 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6784 else
6785 fprintf (asm_out_file, ",%s,comdat",
6786 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6787 }
6788 }
6789
6790 putc ('\n', asm_out_file);
6791}
6792
6793/* Select a format to encode pointers in exception handling data. */
6794int
6795aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6796{
6797 int type;
6798 switch (aarch64_cmodel)
6799 {
6800 case AARCH64_CMODEL_TINY:
6801 case AARCH64_CMODEL_TINY_PIC:
6802 case AARCH64_CMODEL_SMALL:
6803 case AARCH64_CMODEL_SMALL_PIC:
6804 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6805 for everything. */
6806 type = DW_EH_PE_sdata4;
6807 break;
6808 default:
6809 /* No assumptions here. 8-byte relocs required. */
6810 type = DW_EH_PE_sdata8;
6811 break;
6812 }
6813 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6814}
6815
0462169c
SN
6816/* Emit load exclusive. */
6817
6818static void
6819aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6820 rtx mem, rtx model_rtx)
6821{
6822 rtx (*gen) (rtx, rtx, rtx);
6823
6824 switch (mode)
6825 {
6826 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6827 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6828 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6829 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6830 default:
6831 gcc_unreachable ();
6832 }
6833
6834 emit_insn (gen (rval, mem, model_rtx));
6835}
6836
6837/* Emit store exclusive. */
6838
6839static void
6840aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6841 rtx rval, rtx mem, rtx model_rtx)
6842{
6843 rtx (*gen) (rtx, rtx, rtx, rtx);
6844
6845 switch (mode)
6846 {
6847 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6848 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6849 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6850 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6851 default:
6852 gcc_unreachable ();
6853 }
6854
6855 emit_insn (gen (bval, rval, mem, model_rtx));
6856}
6857
6858/* Mark the previous jump instruction as unlikely. */
6859
6860static void
6861aarch64_emit_unlikely_jump (rtx insn)
6862{
6863 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6864
6865 insn = emit_jump_insn (insn);
6866 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6867}
6868
6869/* Expand a compare and swap pattern. */
6870
6871void
6872aarch64_expand_compare_and_swap (rtx operands[])
6873{
6874 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6875 enum machine_mode mode, cmp_mode;
6876 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6877
6878 bval = operands[0];
6879 rval = operands[1];
6880 mem = operands[2];
6881 oldval = operands[3];
6882 newval = operands[4];
6883 is_weak = operands[5];
6884 mod_s = operands[6];
6885 mod_f = operands[7];
6886 mode = GET_MODE (mem);
6887 cmp_mode = mode;
6888
6889 /* Normally the succ memory model must be stronger than fail, but in the
6890 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6891 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6892
6893 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6894 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6895 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6896
6897 switch (mode)
6898 {
6899 case QImode:
6900 case HImode:
6901 /* For short modes, we're going to perform the comparison in SImode,
6902 so do the zero-extension now. */
6903 cmp_mode = SImode;
6904 rval = gen_reg_rtx (SImode);
6905 oldval = convert_modes (SImode, mode, oldval, true);
6906 /* Fall through. */
6907
6908 case SImode:
6909 case DImode:
6910 /* Force the value into a register if needed. */
6911 if (!aarch64_plus_operand (oldval, mode))
6912 oldval = force_reg (cmp_mode, oldval);
6913 break;
6914
6915 default:
6916 gcc_unreachable ();
6917 }
6918
6919 switch (mode)
6920 {
6921 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6922 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6923 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6924 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6925 default:
6926 gcc_unreachable ();
6927 }
6928
6929 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6930
6931 if (mode == QImode || mode == HImode)
6932 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6933
6934 x = gen_rtx_REG (CCmode, CC_REGNUM);
6935 x = gen_rtx_EQ (SImode, x, const0_rtx);
6936 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6937}
6938
6939/* Split a compare and swap pattern. */
6940
6941void
6942aarch64_split_compare_and_swap (rtx operands[])
6943{
6944 rtx rval, mem, oldval, newval, scratch;
6945 enum machine_mode mode;
0462169c
SN
6946 bool is_weak;
6947 rtx label1, label2, x, cond;
6948
6949 rval = operands[0];
6950 mem = operands[1];
6951 oldval = operands[2];
6952 newval = operands[3];
6953 is_weak = (operands[4] != const0_rtx);
0462169c
SN
6954 scratch = operands[7];
6955 mode = GET_MODE (mem);
6956
6957 label1 = NULL_RTX;
6958 if (!is_weak)
6959 {
6960 label1 = gen_label_rtx ();
6961 emit_label (label1);
6962 }
6963 label2 = gen_label_rtx ();
6964
6965 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6966
6967 cond = aarch64_gen_compare_reg (NE, rval, oldval);
6968 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
6969 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6970 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
6971 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6972
6973 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
6974
6975 if (!is_weak)
6976 {
6977 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
6978 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
6979 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
6980 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
6981 }
6982 else
6983 {
6984 cond = gen_rtx_REG (CCmode, CC_REGNUM);
6985 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
6986 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
6987 }
6988
6989 emit_label (label2);
6990}
6991
6992/* Split an atomic operation. */
6993
6994void
6995aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
6996 rtx value, rtx model_rtx, rtx cond)
6997{
6998 enum machine_mode mode = GET_MODE (mem);
6999 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7000 rtx label, x;
7001
7002 label = gen_label_rtx ();
7003 emit_label (label);
7004
7005 if (new_out)
7006 new_out = gen_lowpart (wmode, new_out);
7007 if (old_out)
7008 old_out = gen_lowpart (wmode, old_out);
7009 else
7010 old_out = new_out;
7011 value = simplify_gen_subreg (wmode, value, mode, 0);
7012
7013 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7014
7015 switch (code)
7016 {
7017 case SET:
7018 new_out = value;
7019 break;
7020
7021 case NOT:
7022 x = gen_rtx_AND (wmode, old_out, value);
7023 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7024 x = gen_rtx_NOT (wmode, new_out);
7025 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7026 break;
7027
7028 case MINUS:
7029 if (CONST_INT_P (value))
7030 {
7031 value = GEN_INT (-INTVAL (value));
7032 code = PLUS;
7033 }
7034 /* Fall through. */
7035
7036 default:
7037 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7038 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7039 break;
7040 }
7041
7042 aarch64_emit_store_exclusive (mode, cond, mem,
7043 gen_lowpart (mode, new_out), model_rtx);
7044
7045 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7046 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7047 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7048 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7049}
7050
43e9d192
IB
7051static void
7052aarch64_start_file (void)
7053{
7054 if (selected_arch)
7055 asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
7056 else if (selected_cpu)
7057 asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
7058 default_file_start();
7059}
7060
7061/* Target hook for c_mode_for_suffix. */
7062static enum machine_mode
7063aarch64_c_mode_for_suffix (char suffix)
7064{
7065 if (suffix == 'q')
7066 return TFmode;
7067
7068 return VOIDmode;
7069}
7070
3520f7cc
JG
7071/* We can only represent floating point constants which will fit in
7072 "quarter-precision" values. These values are characterised by
7073 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7074 by:
7075
7076 (-1)^s * (n/16) * 2^r
7077
7078 Where:
7079 's' is the sign bit.
7080 'n' is an integer in the range 16 <= n <= 31.
7081 'r' is an integer in the range -3 <= r <= 4. */
7082
7083/* Return true iff X can be represented by a quarter-precision
7084 floating point immediate operand X. Note, we cannot represent 0.0. */
7085bool
7086aarch64_float_const_representable_p (rtx x)
7087{
7088 /* This represents our current view of how many bits
7089 make up the mantissa. */
7090 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7091 int sign, exponent;
7092 unsigned HOST_WIDE_INT mantissa, mask;
7093 HOST_WIDE_INT m1, m2;
7094 REAL_VALUE_TYPE r, m;
7095
7096 if (!CONST_DOUBLE_P (x))
7097 return false;
7098
7099 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7100
7101 /* We cannot represent infinities, NaNs or +/-zero. We won't
7102 know if we have +zero until we analyse the mantissa, but we
7103 can reject the other invalid values. */
7104 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7105 || REAL_VALUE_MINUS_ZERO (r))
7106 return false;
7107
7108 /* Extract sign and exponent. */
7109 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7110 r = real_value_abs (&r);
7111 exponent = REAL_EXP (&r);
7112
7113 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7114 highest (sign) bit, with a fixed binary point at bit point_pos.
7115 m1 holds the low part of the mantissa, m2 the high part.
7116 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7117 bits for the mantissa, this can fail (low bits will be lost). */
7118 real_ldexp (&m, &r, point_pos - exponent);
7119 REAL_VALUE_TO_INT (&m1, &m2, m);
7120
7121 /* If the low part of the mantissa has bits set we cannot represent
7122 the value. */
7123 if (m1 != 0)
7124 return false;
7125 /* We have rejected the lower HOST_WIDE_INT, so update our
7126 understanding of how many bits lie in the mantissa and
7127 look only at the high HOST_WIDE_INT. */
7128 mantissa = m2;
7129 point_pos -= HOST_BITS_PER_WIDE_INT;
7130
7131 /* We can only represent values with a mantissa of the form 1.xxxx. */
7132 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7133 if ((mantissa & mask) != 0)
7134 return false;
7135
7136 /* Having filtered unrepresentable values, we may now remove all
7137 but the highest 5 bits. */
7138 mantissa >>= point_pos - 5;
7139
7140 /* We cannot represent the value 0.0, so reject it. This is handled
7141 elsewhere. */
7142 if (mantissa == 0)
7143 return false;
7144
7145 /* Then, as bit 4 is always set, we can mask it off, leaving
7146 the mantissa in the range [0, 15]. */
7147 mantissa &= ~(1 << 4);
7148 gcc_assert (mantissa <= 15);
7149
7150 /* GCC internally does not use IEEE754-like encoding (where normalized
7151 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7152 Our mantissa values are shifted 4 places to the left relative to
7153 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7154 by 5 places to correct for GCC's representation. */
7155 exponent = 5 - exponent;
7156
7157 return (exponent >= 0 && exponent <= 7);
7158}
7159
7160char*
7161aarch64_output_simd_mov_immediate (rtx *const_vector,
7162 enum machine_mode mode,
7163 unsigned width)
7164{
7165 int is_valid;
7166 unsigned char widthc;
7167 int lane_width_bits;
7168 static char templ[40];
7169 int shift = 0, mvn = 0;
7170 const char *mnemonic;
7171 unsigned int lane_count = 0;
7172
7173 is_valid =
7174 aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7175 const_vector, &lane_width_bits,
7176 &widthc, &mvn, &shift);
7177 gcc_assert (is_valid);
7178
7179 mode = GET_MODE_INNER (mode);
7180 if (mode == SFmode || mode == DFmode)
7181 {
7182 bool zero_p =
7183 aarch64_float_const_zero_rtx_p (*const_vector);
7184 gcc_assert (shift == 0);
7185 mnemonic = zero_p ? "movi" : "fmov";
7186 }
7187 else
7188 mnemonic = mvn ? "mvni" : "movi";
7189
7190 gcc_assert (lane_width_bits != 0);
7191 lane_count = width / lane_width_bits;
7192
7193 if (lane_count == 1)
7194 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7195 else if (shift)
7196 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7197 mnemonic, lane_count, widthc, shift);
7198 else
7199 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7200 mnemonic, lane_count, widthc);
7201 return templ;
7202}
7203
88b08073
JG
7204/* Split operands into moves from op[1] + op[2] into op[0]. */
7205
7206void
7207aarch64_split_combinev16qi (rtx operands[3])
7208{
7209 unsigned int dest = REGNO (operands[0]);
7210 unsigned int src1 = REGNO (operands[1]);
7211 unsigned int src2 = REGNO (operands[2]);
7212 enum machine_mode halfmode = GET_MODE (operands[1]);
7213 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7214 rtx destlo, desthi;
7215
7216 gcc_assert (halfmode == V16QImode);
7217
7218 if (src1 == dest && src2 == dest + halfregs)
7219 {
7220 /* No-op move. Can't split to nothing; emit something. */
7221 emit_note (NOTE_INSN_DELETED);
7222 return;
7223 }
7224
7225 /* Preserve register attributes for variable tracking. */
7226 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7227 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7228 GET_MODE_SIZE (halfmode));
7229
7230 /* Special case of reversed high/low parts. */
7231 if (reg_overlap_mentioned_p (operands[2], destlo)
7232 && reg_overlap_mentioned_p (operands[1], desthi))
7233 {
7234 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7235 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7236 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7237 }
7238 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7239 {
7240 /* Try to avoid unnecessary moves if part of the result
7241 is in the right place already. */
7242 if (src1 != dest)
7243 emit_move_insn (destlo, operands[1]);
7244 if (src2 != dest + halfregs)
7245 emit_move_insn (desthi, operands[2]);
7246 }
7247 else
7248 {
7249 if (src2 != dest + halfregs)
7250 emit_move_insn (desthi, operands[2]);
7251 if (src1 != dest)
7252 emit_move_insn (destlo, operands[1]);
7253 }
7254}
7255
7256/* vec_perm support. */
7257
7258#define MAX_VECT_LEN 16
7259
7260struct expand_vec_perm_d
7261{
7262 rtx target, op0, op1;
7263 unsigned char perm[MAX_VECT_LEN];
7264 enum machine_mode vmode;
7265 unsigned char nelt;
7266 bool one_vector_p;
7267 bool testing_p;
7268};
7269
7270/* Generate a variable permutation. */
7271
7272static void
7273aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7274{
7275 enum machine_mode vmode = GET_MODE (target);
7276 bool one_vector_p = rtx_equal_p (op0, op1);
7277
7278 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7279 gcc_checking_assert (GET_MODE (op0) == vmode);
7280 gcc_checking_assert (GET_MODE (op1) == vmode);
7281 gcc_checking_assert (GET_MODE (sel) == vmode);
7282 gcc_checking_assert (TARGET_SIMD);
7283
7284 if (one_vector_p)
7285 {
7286 if (vmode == V8QImode)
7287 {
7288 /* Expand the argument to a V16QI mode by duplicating it. */
7289 rtx pair = gen_reg_rtx (V16QImode);
7290 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7291 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7292 }
7293 else
7294 {
7295 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7296 }
7297 }
7298 else
7299 {
7300 rtx pair;
7301
7302 if (vmode == V8QImode)
7303 {
7304 pair = gen_reg_rtx (V16QImode);
7305 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7306 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7307 }
7308 else
7309 {
7310 pair = gen_reg_rtx (OImode);
7311 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7312 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7313 }
7314 }
7315}
7316
7317void
7318aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7319{
7320 enum machine_mode vmode = GET_MODE (target);
7321 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7322 bool one_vector_p = rtx_equal_p (op0, op1);
7323 rtx rmask[MAX_VECT_LEN], mask;
7324
7325 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7326
7327 /* The TBL instruction does not use a modulo index, so we must take care
7328 of that ourselves. */
7329 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7330 for (i = 0; i < nelt; ++i)
7331 rmask[i] = mask;
7332 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7333 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7334
7335 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7336}
7337
cc4d934f
JG
7338/* Recognize patterns suitable for the TRN instructions. */
7339static bool
7340aarch64_evpc_trn (struct expand_vec_perm_d *d)
7341{
7342 unsigned int i, odd, mask, nelt = d->nelt;
7343 rtx out, in0, in1, x;
7344 rtx (*gen) (rtx, rtx, rtx);
7345 enum machine_mode vmode = d->vmode;
7346
7347 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7348 return false;
7349
7350 /* Note that these are little-endian tests.
7351 We correct for big-endian later. */
7352 if (d->perm[0] == 0)
7353 odd = 0;
7354 else if (d->perm[0] == 1)
7355 odd = 1;
7356 else
7357 return false;
7358 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7359
7360 for (i = 0; i < nelt; i += 2)
7361 {
7362 if (d->perm[i] != i + odd)
7363 return false;
7364 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7365 return false;
7366 }
7367
7368 /* Success! */
7369 if (d->testing_p)
7370 return true;
7371
7372 in0 = d->op0;
7373 in1 = d->op1;
7374 if (BYTES_BIG_ENDIAN)
7375 {
7376 x = in0, in0 = in1, in1 = x;
7377 odd = !odd;
7378 }
7379 out = d->target;
7380
7381 if (odd)
7382 {
7383 switch (vmode)
7384 {
7385 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7386 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7387 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7388 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7389 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7390 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7391 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7392 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7393 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7394 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7395 default:
7396 return false;
7397 }
7398 }
7399 else
7400 {
7401 switch (vmode)
7402 {
7403 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7404 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7405 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7406 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7407 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7408 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7409 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7410 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7411 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7412 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7413 default:
7414 return false;
7415 }
7416 }
7417
7418 emit_insn (gen (out, in0, in1));
7419 return true;
7420}
7421
7422/* Recognize patterns suitable for the UZP instructions. */
7423static bool
7424aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7425{
7426 unsigned int i, odd, mask, nelt = d->nelt;
7427 rtx out, in0, in1, x;
7428 rtx (*gen) (rtx, rtx, rtx);
7429 enum machine_mode vmode = d->vmode;
7430
7431 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7432 return false;
7433
7434 /* Note that these are little-endian tests.
7435 We correct for big-endian later. */
7436 if (d->perm[0] == 0)
7437 odd = 0;
7438 else if (d->perm[0] == 1)
7439 odd = 1;
7440 else
7441 return false;
7442 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7443
7444 for (i = 0; i < nelt; i++)
7445 {
7446 unsigned elt = (i * 2 + odd) & mask;
7447 if (d->perm[i] != elt)
7448 return false;
7449 }
7450
7451 /* Success! */
7452 if (d->testing_p)
7453 return true;
7454
7455 in0 = d->op0;
7456 in1 = d->op1;
7457 if (BYTES_BIG_ENDIAN)
7458 {
7459 x = in0, in0 = in1, in1 = x;
7460 odd = !odd;
7461 }
7462 out = d->target;
7463
7464 if (odd)
7465 {
7466 switch (vmode)
7467 {
7468 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7469 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7470 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7471 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7472 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7473 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7474 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7475 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7476 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7477 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7478 default:
7479 return false;
7480 }
7481 }
7482 else
7483 {
7484 switch (vmode)
7485 {
7486 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7487 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7488 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7489 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7490 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7491 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7492 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7493 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7494 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7495 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7496 default:
7497 return false;
7498 }
7499 }
7500
7501 emit_insn (gen (out, in0, in1));
7502 return true;
7503}
7504
7505/* Recognize patterns suitable for the ZIP instructions. */
7506static bool
7507aarch64_evpc_zip (struct expand_vec_perm_d *d)
7508{
7509 unsigned int i, high, mask, nelt = d->nelt;
7510 rtx out, in0, in1, x;
7511 rtx (*gen) (rtx, rtx, rtx);
7512 enum machine_mode vmode = d->vmode;
7513
7514 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7515 return false;
7516
7517 /* Note that these are little-endian tests.
7518 We correct for big-endian later. */
7519 high = nelt / 2;
7520 if (d->perm[0] == high)
7521 /* Do Nothing. */
7522 ;
7523 else if (d->perm[0] == 0)
7524 high = 0;
7525 else
7526 return false;
7527 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7528
7529 for (i = 0; i < nelt / 2; i++)
7530 {
7531 unsigned elt = (i + high) & mask;
7532 if (d->perm[i * 2] != elt)
7533 return false;
7534 elt = (elt + nelt) & mask;
7535 if (d->perm[i * 2 + 1] != elt)
7536 return false;
7537 }
7538
7539 /* Success! */
7540 if (d->testing_p)
7541 return true;
7542
7543 in0 = d->op0;
7544 in1 = d->op1;
7545 if (BYTES_BIG_ENDIAN)
7546 {
7547 x = in0, in0 = in1, in1 = x;
7548 high = !high;
7549 }
7550 out = d->target;
7551
7552 if (high)
7553 {
7554 switch (vmode)
7555 {
7556 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7557 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7558 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7559 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7560 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7561 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7562 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7563 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7564 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7565 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7566 default:
7567 return false;
7568 }
7569 }
7570 else
7571 {
7572 switch (vmode)
7573 {
7574 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7575 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7576 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7577 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7578 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7579 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7580 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7581 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7582 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7583 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7584 default:
7585 return false;
7586 }
7587 }
7588
7589 emit_insn (gen (out, in0, in1));
7590 return true;
7591}
7592
88b08073
JG
7593static bool
7594aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7595{
7596 rtx rperm[MAX_VECT_LEN], sel;
7597 enum machine_mode vmode = d->vmode;
7598 unsigned int i, nelt = d->nelt;
7599
7600 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7601 numbering of elements for big-endian, we must reverse the order. */
7602 if (BYTES_BIG_ENDIAN)
7603 return false;
7604
7605 if (d->testing_p)
7606 return true;
7607
7608 /* Generic code will try constant permutation twice. Once with the
7609 original mode and again with the elements lowered to QImode.
7610 So wait and don't do the selector expansion ourselves. */
7611 if (vmode != V8QImode && vmode != V16QImode)
7612 return false;
7613
7614 for (i = 0; i < nelt; ++i)
7615 rperm[i] = GEN_INT (d->perm[i]);
7616 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7617 sel = force_reg (vmode, sel);
7618
7619 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7620 return true;
7621}
7622
7623static bool
7624aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7625{
7626 /* The pattern matching functions above are written to look for a small
7627 number to begin the sequence (0, 1, N/2). If we begin with an index
7628 from the second operand, we can swap the operands. */
7629 if (d->perm[0] >= d->nelt)
7630 {
7631 unsigned i, nelt = d->nelt;
7632 rtx x;
7633
7634 for (i = 0; i < nelt; ++i)
7635 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7636
7637 x = d->op0;
7638 d->op0 = d->op1;
7639 d->op1 = x;
7640 }
7641
7642 if (TARGET_SIMD)
cc4d934f
JG
7643 {
7644 if (aarch64_evpc_zip (d))
7645 return true;
7646 else if (aarch64_evpc_uzp (d))
7647 return true;
7648 else if (aarch64_evpc_trn (d))
7649 return true;
7650 return aarch64_evpc_tbl (d);
7651 }
88b08073
JG
7652 return false;
7653}
7654
7655/* Expand a vec_perm_const pattern. */
7656
7657bool
7658aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7659{
7660 struct expand_vec_perm_d d;
7661 int i, nelt, which;
7662
7663 d.target = target;
7664 d.op0 = op0;
7665 d.op1 = op1;
7666
7667 d.vmode = GET_MODE (target);
7668 gcc_assert (VECTOR_MODE_P (d.vmode));
7669 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7670 d.testing_p = false;
7671
7672 for (i = which = 0; i < nelt; ++i)
7673 {
7674 rtx e = XVECEXP (sel, 0, i);
7675 int ei = INTVAL (e) & (2 * nelt - 1);
7676 which |= (ei < nelt ? 1 : 2);
7677 d.perm[i] = ei;
7678 }
7679
7680 switch (which)
7681 {
7682 default:
7683 gcc_unreachable ();
7684
7685 case 3:
7686 d.one_vector_p = false;
7687 if (!rtx_equal_p (op0, op1))
7688 break;
7689
7690 /* The elements of PERM do not suggest that only the first operand
7691 is used, but both operands are identical. Allow easier matching
7692 of the permutation by folding the permutation into the single
7693 input vector. */
7694 /* Fall Through. */
7695 case 2:
7696 for (i = 0; i < nelt; ++i)
7697 d.perm[i] &= nelt - 1;
7698 d.op0 = op1;
7699 d.one_vector_p = true;
7700 break;
7701
7702 case 1:
7703 d.op1 = op0;
7704 d.one_vector_p = true;
7705 break;
7706 }
7707
7708 return aarch64_expand_vec_perm_const_1 (&d);
7709}
7710
7711static bool
7712aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7713 const unsigned char *sel)
7714{
7715 struct expand_vec_perm_d d;
7716 unsigned int i, nelt, which;
7717 bool ret;
7718
7719 d.vmode = vmode;
7720 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7721 d.testing_p = true;
7722 memcpy (d.perm, sel, nelt);
7723
7724 /* Calculate whether all elements are in one vector. */
7725 for (i = which = 0; i < nelt; ++i)
7726 {
7727 unsigned char e = d.perm[i];
7728 gcc_assert (e < 2 * nelt);
7729 which |= (e < nelt ? 1 : 2);
7730 }
7731
7732 /* If all elements are from the second vector, reindex as if from the
7733 first vector. */
7734 if (which == 2)
7735 for (i = 0; i < nelt; ++i)
7736 d.perm[i] -= nelt;
7737
7738 /* Check whether the mask can be applied to a single vector. */
7739 d.one_vector_p = (which != 3);
7740
7741 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7742 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7743 if (!d.one_vector_p)
7744 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7745
7746 start_sequence ();
7747 ret = aarch64_expand_vec_perm_const_1 (&d);
7748 end_sequence ();
7749
7750 return ret;
7751}
7752
43e9d192
IB
7753#undef TARGET_ADDRESS_COST
7754#define TARGET_ADDRESS_COST aarch64_address_cost
7755
7756/* This hook will determines whether unnamed bitfields affect the alignment
7757 of the containing structure. The hook returns true if the structure
7758 should inherit the alignment requirements of an unnamed bitfield's
7759 type. */
7760#undef TARGET_ALIGN_ANON_BITFIELD
7761#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7762
7763#undef TARGET_ASM_ALIGNED_DI_OP
7764#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7765
7766#undef TARGET_ASM_ALIGNED_HI_OP
7767#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7768
7769#undef TARGET_ASM_ALIGNED_SI_OP
7770#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7771
7772#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7773#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7774 hook_bool_const_tree_hwi_hwi_const_tree_true
7775
7776#undef TARGET_ASM_FILE_START
7777#define TARGET_ASM_FILE_START aarch64_start_file
7778
7779#undef TARGET_ASM_OUTPUT_MI_THUNK
7780#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7781
7782#undef TARGET_ASM_SELECT_RTX_SECTION
7783#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7784
7785#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7786#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7787
7788#undef TARGET_BUILD_BUILTIN_VA_LIST
7789#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7790
7791#undef TARGET_CALLEE_COPIES
7792#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7793
7794#undef TARGET_CAN_ELIMINATE
7795#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7796
7797#undef TARGET_CANNOT_FORCE_CONST_MEM
7798#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7799
7800#undef TARGET_CONDITIONAL_REGISTER_USAGE
7801#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7802
7803/* Only the least significant bit is used for initialization guard
7804 variables. */
7805#undef TARGET_CXX_GUARD_MASK_BIT
7806#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7807
7808#undef TARGET_C_MODE_FOR_SUFFIX
7809#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7810
7811#ifdef TARGET_BIG_ENDIAN_DEFAULT
7812#undef TARGET_DEFAULT_TARGET_FLAGS
7813#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7814#endif
7815
7816#undef TARGET_CLASS_MAX_NREGS
7817#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7818
119103ca
JG
7819#undef TARGET_BUILTIN_DECL
7820#define TARGET_BUILTIN_DECL aarch64_builtin_decl
7821
43e9d192
IB
7822#undef TARGET_EXPAND_BUILTIN
7823#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7824
7825#undef TARGET_EXPAND_BUILTIN_VA_START
7826#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7827
7828#undef TARGET_FUNCTION_ARG
7829#define TARGET_FUNCTION_ARG aarch64_function_arg
7830
7831#undef TARGET_FUNCTION_ARG_ADVANCE
7832#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7833
7834#undef TARGET_FUNCTION_ARG_BOUNDARY
7835#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7836
7837#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7838#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7839
7840#undef TARGET_FUNCTION_VALUE
7841#define TARGET_FUNCTION_VALUE aarch64_function_value
7842
7843#undef TARGET_FUNCTION_VALUE_REGNO_P
7844#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7845
7846#undef TARGET_FRAME_POINTER_REQUIRED
7847#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7848
7849#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7850#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7851
7852#undef TARGET_INIT_BUILTINS
7853#define TARGET_INIT_BUILTINS aarch64_init_builtins
7854
7855#undef TARGET_LEGITIMATE_ADDRESS_P
7856#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7857
7858#undef TARGET_LEGITIMATE_CONSTANT_P
7859#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7860
7861#undef TARGET_LIBGCC_CMP_RETURN_MODE
7862#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7863
ac2b960f
YZ
7864#undef TARGET_MANGLE_TYPE
7865#define TARGET_MANGLE_TYPE aarch64_mangle_type
7866
43e9d192
IB
7867#undef TARGET_MEMORY_MOVE_COST
7868#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7869
7870#undef TARGET_MUST_PASS_IN_STACK
7871#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7872
7873/* This target hook should return true if accesses to volatile bitfields
7874 should use the narrowest mode possible. It should return false if these
7875 accesses should use the bitfield container type. */
7876#undef TARGET_NARROW_VOLATILE_BITFIELD
7877#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7878
7879#undef TARGET_OPTION_OVERRIDE
7880#define TARGET_OPTION_OVERRIDE aarch64_override_options
7881
7882#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7883#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7884 aarch64_override_options_after_change
7885
7886#undef TARGET_PASS_BY_REFERENCE
7887#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7888
7889#undef TARGET_PREFERRED_RELOAD_CLASS
7890#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7891
7892#undef TARGET_SECONDARY_RELOAD
7893#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7894
7895#undef TARGET_SHIFT_TRUNCATION_MASK
7896#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7897
7898#undef TARGET_SETUP_INCOMING_VARARGS
7899#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7900
7901#undef TARGET_STRUCT_VALUE_RTX
7902#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
7903
7904#undef TARGET_REGISTER_MOVE_COST
7905#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7906
7907#undef TARGET_RETURN_IN_MEMORY
7908#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7909
7910#undef TARGET_RETURN_IN_MSB
7911#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7912
7913#undef TARGET_RTX_COSTS
7914#define TARGET_RTX_COSTS aarch64_rtx_costs
7915
7916#undef TARGET_TRAMPOLINE_INIT
7917#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7918
7919#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7920#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7921
7922#undef TARGET_VECTOR_MODE_SUPPORTED_P
7923#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7924
7925#undef TARGET_ARRAY_MODE_SUPPORTED_P
7926#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7927
7928#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7929#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7930
42fc9a7f
JG
7931#undef TARGET_VECTORIZE_BUILTINS
7932#define TARGET_VECTORIZE_BUILTINS
7933
7934#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7935#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7936 aarch64_builtin_vectorized_function
7937
3b357264
JG
7938#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7939#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7940 aarch64_autovectorize_vector_sizes
7941
43e9d192
IB
7942/* Section anchor support. */
7943
7944#undef TARGET_MIN_ANCHOR_OFFSET
7945#define TARGET_MIN_ANCHOR_OFFSET -256
7946
7947/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7948 byte offset; we can do much more for larger data types, but have no way
7949 to determine the size of the access. We assume accesses are aligned. */
7950#undef TARGET_MAX_ANCHOR_OFFSET
7951#define TARGET_MAX_ANCHOR_OFFSET 4095
7952
db0253a4
TB
7953#undef TARGET_VECTOR_ALIGNMENT
7954#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7955
7956#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7957#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7958 aarch64_simd_vector_alignment_reachable
7959
88b08073
JG
7960/* vec_perm support. */
7961
7962#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7963#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7964 aarch64_vectorize_vec_perm_const_ok
7965
70f09188
AP
7966
7967#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
7968
43e9d192
IB
7969struct gcc_target targetm = TARGET_INITIALIZER;
7970
7971#include "gt-aarch64.h"