]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
common.opt: Add -gdwarf.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
43e9d192 1/* Machine description for AArch64 architecture.
d1e082c2 2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
29#include "regs.h"
30#include "df.h"
31#include "hard-reg-set.h"
32#include "output.h"
33#include "expr.h"
34#include "reload.h"
35#include "toplev.h"
36#include "target.h"
37#include "target-def.h"
38#include "targhooks.h"
39#include "ggc.h"
40#include "function.h"
41#include "tm_p.h"
42#include "recog.h"
43#include "langhooks.h"
44#include "diagnostic-core.h"
45#include "gimple.h"
46#include "optabs.h"
47#include "dwarf2.h"
48
49/* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80};
81
82struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88};
89
90/* The current code model. */
91enum aarch64_code_model aarch64_cmodel;
92
93#ifdef HAVE_AS_TLS
94#undef TARGET_HAVE_TLS
95#define TARGET_HAVE_TLS 1
96#endif
97
98static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192
IB
105static void aarch64_override_options_after_change (void);
106static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108static bool aarch64_vector_mode_supported_p (enum machine_mode);
109static unsigned bit_count (unsigned HOST_WIDE_INT);
110static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
112
88b08073
JG
113static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 const unsigned char *sel);
115
43e9d192
IB
116/* The processor for which instructions should be scheduled. */
117enum aarch64_processor aarch64_tune = generic;
118
119/* The current tuning set. */
120const struct tune_params *aarch64_tune_params;
121
122/* Mask to specify which instructions we are allowed to generate. */
123unsigned long aarch64_isa_flags = 0;
124
125/* Mask to specify which instruction scheduling options should be used. */
126unsigned long aarch64_tune_flags = 0;
127
128/* Tuning parameters. */
129
130#if HAVE_DESIGNATED_INITIALIZERS
131#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132#else
133#define NAMED_PARAM(NAME, VAL) (VAL)
134#endif
135
136#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137__extension__
138#endif
139static const struct cpu_rtx_cost_table generic_rtx_cost_table =
140{
141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
153};
154
155#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156__extension__
157#endif
158static const struct cpu_addrcost_table generic_addrcost_table =
159{
160 NAMED_PARAM (pre_modify, 0),
161 NAMED_PARAM (post_modify, 0),
162 NAMED_PARAM (register_offset, 0),
163 NAMED_PARAM (register_extend, 0),
164 NAMED_PARAM (imm_offset, 0)
165};
166
167#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168__extension__
169#endif
170static const struct cpu_regmove_cost generic_regmove_cost =
171{
172 NAMED_PARAM (GP2GP, 1),
173 NAMED_PARAM (GP2FP, 2),
174 NAMED_PARAM (FP2GP, 2),
175 /* We currently do not provide direct support for TFmode Q->Q move.
176 Therefore we need to raise the cost above 2 in order to have
177 reload handle the situation. */
178 NAMED_PARAM (FP2FP, 4)
179};
180
181#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182__extension__
183#endif
184static const struct tune_params generic_tunings =
185{
186 &generic_rtx_cost_table,
187 &generic_addrcost_table,
188 &generic_regmove_cost,
189 NAMED_PARAM (memmov_cost, 4)
190};
191
192/* A processor implementing AArch64. */
193struct processor
194{
195 const char *const name;
196 enum aarch64_processor core;
197 const char *arch;
198 const unsigned long flags;
199 const struct tune_params *const tune;
200};
201
202/* Processor cores implementing AArch64. */
203static const struct processor all_cores[] =
204{
205#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207#include "aarch64-cores.def"
208#undef AARCH64_CORE
209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210 {NULL, aarch64_none, NULL, 0, NULL}
211};
212
213/* Architectures implementing AArch64. */
214static const struct processor all_architectures[] =
215{
216#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217 {NAME, CORE, #ARCH, FLAGS, NULL},
218#include "aarch64-arches.def"
219#undef AARCH64_ARCH
220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221 {NULL, aarch64_none, NULL, 0, NULL}
222};
223
224/* Target specification. These are populated as commandline arguments
225 are processed, or NULL if not specified. */
226static const struct processor *selected_arch;
227static const struct processor *selected_cpu;
228static const struct processor *selected_tune;
229
230#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
231
232/* An ISA extension in the co-processor and main instruction set space. */
233struct aarch64_option_extension
234{
235 const char *const name;
236 const unsigned long flags_on;
237 const unsigned long flags_off;
238};
239
240/* ISA extensions in AArch64. */
241static const struct aarch64_option_extension all_extensions[] =
242{
243#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244 {NAME, FLAGS_ON, FLAGS_OFF},
245#include "aarch64-option-extensions.def"
246#undef AARCH64_OPT_EXTENSION
247 {NULL, 0, 0}
248};
249
250/* Used to track the size of an address when generating a pre/post
251 increment address. */
252static enum machine_mode aarch64_memory_reference_mode;
253
254/* Used to force GTY into this file. */
255static GTY(()) int gty_dummy;
256
257/* A table of valid AArch64 "bitmask immediate" values for
258 logical instructions. */
259
260#define AARCH64_NUM_BITMASKS 5334
261static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
262
263/* Did we set flag_omit_frame_pointer just so
264 aarch64_frame_pointer_required would be called? */
265static bool faked_omit_frame_pointer;
266
267typedef enum aarch64_cond_code
268{
269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
272}
273aarch64_cc;
274
275#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
276
277/* The condition codes of the processor, and the inverse function. */
278static const char * const aarch64_condition_codes[] =
279{
280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
282};
283
284/* Provide a mapping from gcc register numbers to dwarf register numbers. */
285unsigned
286aarch64_dbx_register_number (unsigned regno)
287{
288 if (GP_REGNUM_P (regno))
289 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290 else if (regno == SP_REGNUM)
291 return AARCH64_DWARF_SP;
292 else if (FP_REGNUM_P (regno))
293 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
294
295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296 equivalent DWARF register. */
297 return DWARF_FRAME_REGISTERS;
298}
299
300/* Return TRUE if MODE is any of the large INT modes. */
301static bool
302aarch64_vect_struct_mode_p (enum machine_mode mode)
303{
304 return mode == OImode || mode == CImode || mode == XImode;
305}
306
307/* Return TRUE if MODE is any of the vector modes. */
308static bool
309aarch64_vector_mode_p (enum machine_mode mode)
310{
311 return aarch64_vector_mode_supported_p (mode)
312 || aarch64_vect_struct_mode_p (mode);
313}
314
315/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
316static bool
317aarch64_array_mode_supported_p (enum machine_mode mode,
318 unsigned HOST_WIDE_INT nelems)
319{
320 if (TARGET_SIMD
321 && AARCH64_VALID_SIMD_QREG_MODE (mode)
322 && (nelems >= 2 && nelems <= 4))
323 return true;
324
325 return false;
326}
327
328/* Implement HARD_REGNO_NREGS. */
329
330int
331aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
332{
333 switch (aarch64_regno_regclass (regno))
334 {
335 case FP_REGS:
336 case FP_LO_REGS:
337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338 default:
339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
340 }
341 gcc_unreachable ();
342}
343
344/* Implement HARD_REGNO_MODE_OK. */
345
346int
347aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
348{
349 if (GET_MODE_CLASS (mode) == MODE_CC)
350 return regno == CC_REGNUM;
351
352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353 || regno == ARG_POINTER_REGNUM)
354 return mode == Pmode;
355
356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357 return 1;
358
359 if (FP_REGNUM_P (regno))
360 {
361 if (aarch64_vect_struct_mode_p (mode))
362 return
363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364 else
365 return 1;
366 }
367
368 return 0;
369}
370
371/* Return true if calls to DECL should be treated as
372 long-calls (ie called via a register). */
373static bool
374aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
375{
376 return false;
377}
378
379/* Return true if calls to symbol-ref SYM should be treated as
380 long-calls (ie called via a register). */
381bool
382aarch64_is_long_call_p (rtx sym)
383{
384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
385}
386
387/* Return true if the offsets to a zero/sign-extract operation
388 represent an expression that matches an extend operation. The
389 operands represent the paramters from
390
391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
392bool
393aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 rtx extract_imm)
395{
396 HOST_WIDE_INT mult_val, extract_val;
397
398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399 return false;
400
401 mult_val = INTVAL (mult_imm);
402 extract_val = INTVAL (extract_imm);
403
404 if (extract_val > 8
405 && extract_val < GET_MODE_BITSIZE (mode)
406 && exact_log2 (extract_val & ~7) > 0
407 && (extract_val & 7) <= 4
408 && mult_val == (1 << (extract_val & 7)))
409 return true;
410
411 return false;
412}
413
414/* Emit an insn that's a simple single-set. Both the operands must be
415 known to be valid. */
416inline static rtx
417emit_set_insn (rtx x, rtx y)
418{
419 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
420}
421
422/* X and Y are two things to compare using CODE. Emit the compare insn and
423 return the rtx for register 0 in the proper mode. */
424rtx
425aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
426{
427 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
429
430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431 return cc_reg;
432}
433
434/* Build the SYMBOL_REF for __tls_get_addr. */
435
436static GTY(()) rtx tls_get_addr_libfunc;
437
438rtx
439aarch64_tls_get_addr (void)
440{
441 if (!tls_get_addr_libfunc)
442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443 return tls_get_addr_libfunc;
444}
445
446/* Return the TLS model to use for ADDR. */
447
448static enum tls_model
449tls_symbolic_operand_type (rtx addr)
450{
451 enum tls_model tls_kind = TLS_MODEL_NONE;
452 rtx sym, addend;
453
454 if (GET_CODE (addr) == CONST)
455 {
456 split_const (addr, &sym, &addend);
457 if (GET_CODE (sym) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
459 }
460 else if (GET_CODE (addr) == SYMBOL_REF)
461 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
462
463 return tls_kind;
464}
465
466/* We'll allow lo_sum's in addresses in our legitimate addresses
467 so that combine would take care of combining addresses where
468 necessary, but for generation purposes, we'll generate the address
469 as :
470 RTL Absolute
471 tmp = hi (symbol_ref); adrp x1, foo
472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
473 nop
474
475 PIC TLS
476 adrp x1, :got:foo adrp tmp, :tlsgd:foo
477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
478 bl __tls_get_addr
479 nop
480
481 Load TLS symbol, depending on TLS mechanism and TLS access model.
482
483 Global Dynamic - Traditional TLS:
484 adrp tmp, :tlsgd:imm
485 add dest, tmp, #:tlsgd_lo12:imm
486 bl __tls_get_addr
487
488 Global Dynamic - TLS Descriptors:
489 adrp dest, :tlsdesc:imm
490 ldr tmp, [dest, #:tlsdesc_lo12:imm]
491 add dest, dest, #:tlsdesc_lo12:imm
492 blr tmp
493 mrs tp, tpidr_el0
494 add dest, dest, tp
495
496 Initial Exec:
497 mrs tp, tpidr_el0
498 adrp tmp, :gottprel:imm
499 ldr dest, [tmp, #:gottprel_lo12:imm]
500 add dest, dest, tp
501
502 Local Exec:
503 mrs tp, tpidr_el0
504 add t0, tp, #:tprel_hi12:imm
505 add t0, #:tprel_lo12_nc:imm
506*/
507
508static void
509aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 enum aarch64_symbol_type type)
511{
512 switch (type)
513 {
514 case SYMBOL_SMALL_ABSOLUTE:
515 {
516 rtx tmp_reg = dest;
517 if (can_create_pseudo_p ())
518 {
519 tmp_reg = gen_reg_rtx (Pmode);
520 }
521
522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 return;
525 }
526
527 case SYMBOL_SMALL_GOT:
528 {
529 rtx tmp_reg = dest;
530 if (can_create_pseudo_p ())
531 {
532 tmp_reg = gen_reg_rtx (Pmode);
533 }
534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 return;
537 }
538
539 case SYMBOL_SMALL_TLSGD:
540 {
541 rtx insns;
542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
543
544 start_sequence ();
545 emit_call_insn (gen_tlsgd_small (result, imm));
546 insns = get_insns ();
547 end_sequence ();
548
549 RTL_CONST_CALL_P (insns) = 1;
550 emit_libcall_block (insns, dest, result, imm);
551 return;
552 }
553
554 case SYMBOL_SMALL_TLSDESC:
555 {
556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 rtx tp;
558
559 emit_insn (gen_tlsdesc_small (imm));
560 tp = aarch64_load_tp (NULL);
561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 return;
564 }
565
566 case SYMBOL_SMALL_GOTTPREL:
567 {
568 rtx tmp_reg = gen_reg_rtx (Pmode);
569 rtx tp = aarch64_load_tp (NULL);
570 emit_insn (gen_tlsie_small (tmp_reg, imm));
571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 return;
574 }
575
576 case SYMBOL_SMALL_TPREL:
577 {
578 rtx tp = aarch64_load_tp (NULL);
579 emit_insn (gen_tlsle_small (dest, tp, imm));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
582 }
583
584 default:
585 gcc_unreachable ();
586 }
587}
588
589/* Emit a move from SRC to DEST. Assume that the move expanders can
590 handle all moves if !can_create_pseudo_p (). The distinction is
591 important because, unlike emit_move_insn, the move expanders know
592 how to force Pmode objects into the constant pool even when the
593 constant pool address is not itself legitimate. */
594static rtx
595aarch64_emit_move (rtx dest, rtx src)
596{
597 return (can_create_pseudo_p ()
598 ? emit_move_insn (dest, src)
599 : emit_move_insn_1 (dest, src));
600}
601
602void
603aarch64_split_128bit_move (rtx dst, rtx src)
604{
605 rtx low_dst;
606
607 gcc_assert (GET_MODE (dst) == TImode);
608
609 if (REG_P (dst) && REG_P (src))
610 {
611 int src_regno = REGNO (src);
612 int dst_regno = REGNO (dst);
613
614 gcc_assert (GET_MODE (src) == TImode);
615
616 /* Handle r -> w, w -> r. */
617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
618 {
619 emit_insn (gen_aarch64_movtilow_di (dst,
620 gen_lowpart (word_mode, src)));
621 emit_insn (gen_aarch64_movtihigh_di (dst,
622 gen_highpart (word_mode, src)));
623 return;
624 }
625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
626 {
627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 src));
629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 src));
631 return;
632 }
633 /* Fall through to r -> r cases. */
634 }
635
636 low_dst = gen_lowpart (word_mode, dst);
637 if (REG_P (low_dst)
638 && reg_overlap_mentioned_p (low_dst, src))
639 {
640 aarch64_emit_move (gen_highpart (word_mode, dst),
641 gen_highpart_mode (word_mode, TImode, src));
642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
643 }
644 else
645 {
646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647 aarch64_emit_move (gen_highpart (word_mode, dst),
648 gen_highpart_mode (word_mode, TImode, src));
649 }
650}
651
652bool
653aarch64_split_128bit_move_p (rtx dst, rtx src)
654{
655 return (! REG_P (src)
656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
657}
658
659static rtx
660aarch64_force_temporary (rtx x, rtx value)
661{
662 if (can_create_pseudo_p ())
663 return force_reg (Pmode, value);
664 else
665 {
666 x = aarch64_emit_move (x, value);
667 return x;
668 }
669}
670
671
672static rtx
673aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
674{
675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
676 {
677 rtx high;
678 /* Load the full offset into a register. This
679 might be improvable in the future. */
680 high = GEN_INT (offset);
681 offset = 0;
682 high = aarch64_force_temporary (temp, high);
683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
684 }
685 return plus_constant (mode, reg, offset);
686}
687
688void
689aarch64_expand_mov_immediate (rtx dest, rtx imm)
690{
691 enum machine_mode mode = GET_MODE (dest);
692 unsigned HOST_WIDE_INT mask;
693 int i;
694 bool first;
695 unsigned HOST_WIDE_INT val;
696 bool subtargets;
697 rtx subtarget;
698 int one_match, zero_match;
699
700 gcc_assert (mode == SImode || mode == DImode);
701
702 /* Check on what type of symbol it is. */
703 if (GET_CODE (imm) == SYMBOL_REF
704 || GET_CODE (imm) == LABEL_REF
705 || GET_CODE (imm) == CONST)
706 {
707 rtx mem, base, offset;
708 enum aarch64_symbol_type sty;
709
710 /* If we have (const (plus symbol offset)), separate out the offset
711 before we start classifying the symbol. */
712 split_const (imm, &base, &offset);
713
714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715 switch (sty)
716 {
717 case SYMBOL_FORCE_TO_MEM:
718 if (offset != const0_rtx
719 && targetm.cannot_force_const_mem (mode, imm))
720 {
721 gcc_assert(can_create_pseudo_p ());
722 base = aarch64_force_temporary (dest, base);
723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 aarch64_emit_move (dest, base);
725 return;
726 }
727 mem = force_const_mem (mode, imm);
728 gcc_assert (mem);
729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 return;
731
732 case SYMBOL_SMALL_TLSGD:
733 case SYMBOL_SMALL_TLSDESC:
734 case SYMBOL_SMALL_GOTTPREL:
735 case SYMBOL_SMALL_GOT:
736 if (offset != const0_rtx)
737 {
738 gcc_assert(can_create_pseudo_p ());
739 base = aarch64_force_temporary (dest, base);
740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 aarch64_emit_move (dest, base);
742 return;
743 }
744 /* FALLTHRU */
745
746 case SYMBOL_SMALL_TPREL:
747 case SYMBOL_SMALL_ABSOLUTE:
748 aarch64_load_symref_appropriately (dest, imm, sty);
749 return;
750
751 default:
752 gcc_unreachable ();
753 }
754 }
755
756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
757 {
758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759 return;
760 }
761
762 if (!CONST_INT_P (imm))
763 {
764 if (GET_CODE (imm) == HIGH)
765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766 else
767 {
768 rtx mem = force_const_mem (mode, imm);
769 gcc_assert (mem);
770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
771 }
772
773 return;
774 }
775
776 if (mode == SImode)
777 {
778 /* We know we can't do this in 1 insn, and we must be able to do it
779 in two; so don't mess around looking for sequences that don't buy
780 us anything. */
781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784 return;
785 }
786
787 /* Remaining cases are all for DImode. */
788
789 val = INTVAL (imm);
790 subtargets = optimize && can_create_pseudo_p ();
791
792 one_match = 0;
793 zero_match = 0;
794 mask = 0xffff;
795
796 for (i = 0; i < 64; i += 16, mask <<= 16)
797 {
798 if ((val & mask) == 0)
799 zero_match++;
800 else if ((val & mask) == mask)
801 one_match++;
802 }
803
804 if (one_match == 2)
805 {
806 mask = 0xffff;
807 for (i = 0; i < 64; i += 16, mask <<= 16)
808 {
809 if ((val & mask) != mask)
810 {
811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 GEN_INT ((val >> i) & 0xffff)));
814 return;
815 }
816 }
817 gcc_unreachable ();
818 }
819
820 if (zero_match == 2)
821 goto simple_sequence;
822
823 mask = 0x0ffff0000UL;
824 for (i = 16; i < 64; i += 16, mask <<= 16)
825 {
826 HOST_WIDE_INT comp = mask & ~(mask - 1);
827
828 if (aarch64_uimm12_shift (val - (val & mask)))
829 {
830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
831
832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 emit_insn (gen_adddi3 (dest, subtarget,
834 GEN_INT (val - (val & mask))));
835 return;
836 }
837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
838 {
839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
840
841 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 GEN_INT ((val + comp) & mask)));
843 emit_insn (gen_adddi3 (dest, subtarget,
844 GEN_INT (val - ((val + comp) & mask))));
845 return;
846 }
847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
848 {
849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
850
851 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 GEN_INT ((val - comp) | ~mask)));
853 emit_insn (gen_adddi3 (dest, subtarget,
854 GEN_INT (val - ((val - comp) | ~mask))));
855 return;
856 }
857 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
858 {
859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
860
861 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 GEN_INT (val | ~mask)));
863 emit_insn (gen_adddi3 (dest, subtarget,
864 GEN_INT (val - (val | ~mask))));
865 return;
866 }
867 }
868
869 /* See if we can do it by arithmetically combining two
870 immediates. */
871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
872 {
873 int j;
874 mask = 0xffff;
875
876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
878 {
879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 GEN_INT (aarch64_bitmasks[i])));
882 emit_insn (gen_adddi3 (dest, subtarget,
883 GEN_INT (val - aarch64_bitmasks[i])));
884 return;
885 }
886
887 for (j = 0; j < 64; j += 16, mask <<= 16)
888 {
889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
890 {
891 emit_insn (gen_rtx_SET (VOIDmode, dest,
892 GEN_INT (aarch64_bitmasks[i])));
893 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 GEN_INT ((val >> j) & 0xffff)));
895 return;
896 }
897 }
898 }
899
900 /* See if we can do it by logically combining two immediates. */
901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
902 {
903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
904 {
905 int j;
906
907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
909 {
910 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 GEN_INT (aarch64_bitmasks[i])));
913 emit_insn (gen_iordi3 (dest, subtarget,
914 GEN_INT (aarch64_bitmasks[j])));
915 return;
916 }
917 }
918 else if ((val & aarch64_bitmasks[i]) == val)
919 {
920 int j;
921
922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
924 {
925
926 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 GEN_INT (aarch64_bitmasks[j])));
929 emit_insn (gen_anddi3 (dest, subtarget,
930 GEN_INT (aarch64_bitmasks[i])));
931 return;
932 }
933 }
934 }
935
936 simple_sequence:
937 first = true;
938 mask = 0xffff;
939 for (i = 0; i < 64; i += 16, mask <<= 16)
940 {
941 if ((val & mask) != 0)
942 {
943 if (first)
944 {
945 emit_insn (gen_rtx_SET (VOIDmode, dest,
946 GEN_INT (val & mask)));
947 first = false;
948 }
949 else
950 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 GEN_INT ((val >> i) & 0xffff)));
952 }
953 }
954}
955
956static bool
957aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
958{
959 /* Indirect calls are not currently supported. */
960 if (decl == NULL)
961 return false;
962
963 /* Cannot tail-call to long-calls, since these are outside of the
964 range of a branch instruction (we could handle this if we added
965 support for indirect tail-calls. */
966 if (aarch64_decl_is_long_call_p (decl))
967 return false;
968
969 return true;
970}
971
972/* Implement TARGET_PASS_BY_REFERENCE. */
973
974static bool
975aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 enum machine_mode mode,
977 const_tree type,
978 bool named ATTRIBUTE_UNUSED)
979{
980 HOST_WIDE_INT size;
981 enum machine_mode dummymode;
982 int nregs;
983
984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
985 size = (mode == BLKmode && type)
986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
987
988 if (type)
989 {
990 /* Arrays always passed by reference. */
991 if (TREE_CODE (type) == ARRAY_TYPE)
992 return true;
993 /* Other aggregates based on their size. */
994 if (AGGREGATE_TYPE_P (type))
995 size = int_size_in_bytes (type);
996 }
997
998 /* Variable sized arguments are always returned by reference. */
999 if (size < 0)
1000 return true;
1001
1002 /* Can this be a candidate to be passed in fp/simd register(s)? */
1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 &dummymode, &nregs,
1005 NULL))
1006 return false;
1007
1008 /* Arguments which are variable sized or larger than 2 registers are
1009 passed by reference unless they are a homogenous floating point
1010 aggregate. */
1011 return size > 2 * UNITS_PER_WORD;
1012}
1013
1014/* Return TRUE if VALTYPE is padded to its least significant bits. */
1015static bool
1016aarch64_return_in_msb (const_tree valtype)
1017{
1018 enum machine_mode dummy_mode;
1019 int dummy_int;
1020
1021 /* Never happens in little-endian mode. */
1022 if (!BYTES_BIG_ENDIAN)
1023 return false;
1024
1025 /* Only composite types smaller than or equal to 16 bytes can
1026 be potentially returned in registers. */
1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028 || int_size_in_bytes (valtype) <= 0
1029 || int_size_in_bytes (valtype) > 16)
1030 return false;
1031
1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034 is always passed/returned in the least significant bits of fp/simd
1035 register(s). */
1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 &dummy_mode, &dummy_int, NULL))
1038 return false;
1039
1040 return true;
1041}
1042
1043/* Implement TARGET_FUNCTION_VALUE.
1044 Define how to find the value returned by a function. */
1045
1046static rtx
1047aarch64_function_value (const_tree type, const_tree func,
1048 bool outgoing ATTRIBUTE_UNUSED)
1049{
1050 enum machine_mode mode;
1051 int unsignedp;
1052 int count;
1053 enum machine_mode ag_mode;
1054
1055 mode = TYPE_MODE (type);
1056 if (INTEGRAL_TYPE_P (type))
1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1058
1059 if (aarch64_return_in_msb (type))
1060 {
1061 HOST_WIDE_INT size = int_size_in_bytes (type);
1062
1063 if (size % UNITS_PER_WORD != 0)
1064 {
1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1067 }
1068 }
1069
1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 &ag_mode, &count, NULL))
1072 {
1073 if (!aarch64_composite_type_p (type, mode))
1074 {
1075 gcc_assert (count == 1 && mode == ag_mode);
1076 return gen_rtx_REG (mode, V0_REGNUM);
1077 }
1078 else
1079 {
1080 int i;
1081 rtx par;
1082
1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 for (i = 0; i < count; i++)
1085 {
1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 XVECEXP (par, 0, i) = tmp;
1090 }
1091 return par;
1092 }
1093 }
1094 else
1095 return gen_rtx_REG (mode, R0_REGNUM);
1096}
1097
1098/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099 Return true if REGNO is the number of a hard register in which the values
1100 of called function may come back. */
1101
1102static bool
1103aarch64_function_value_regno_p (const unsigned int regno)
1104{
1105 /* Maximum of 16 bytes can be returned in the general registers. Examples
1106 of 16-byte return values are: 128-bit integers and 16-byte small
1107 structures (excluding homogeneous floating-point aggregates). */
1108 if (regno == R0_REGNUM || regno == R1_REGNUM)
1109 return true;
1110
1111 /* Up to four fp/simd registers can return a function value, e.g. a
1112 homogeneous floating-point aggregate having four members. */
1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114 return !TARGET_GENERAL_REGS_ONLY;
1115
1116 return false;
1117}
1118
1119/* Implement TARGET_RETURN_IN_MEMORY.
1120
1121 If the type T of the result of a function is such that
1122 void func (T arg)
1123 would require that arg be passed as a value in a register (or set of
1124 registers) according to the parameter passing rules, then the result
1125 is returned in the same registers as would be used for such an
1126 argument. */
1127
1128static bool
1129aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1130{
1131 HOST_WIDE_INT size;
1132 enum machine_mode ag_mode;
1133 int count;
1134
1135 if (!AGGREGATE_TYPE_P (type)
1136 && TREE_CODE (type) != COMPLEX_TYPE
1137 && TREE_CODE (type) != VECTOR_TYPE)
1138 /* Simple scalar types always returned in registers. */
1139 return false;
1140
1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 type,
1143 &ag_mode,
1144 &count,
1145 NULL))
1146 return false;
1147
1148 /* Types larger than 2 registers returned in memory. */
1149 size = int_size_in_bytes (type);
1150 return (size < 0 || size > 2 * UNITS_PER_WORD);
1151}
1152
1153static bool
1154aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 const_tree type, int *nregs)
1156{
1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158 return aarch64_vfp_is_call_or_return_candidate (mode,
1159 type,
1160 &pcum->aapcs_vfp_rmode,
1161 nregs,
1162 NULL);
1163}
1164
1165/* Given MODE and TYPE of a function argument, return the alignment in
1166 bits. The idea is to suppress any stronger alignment requested by
1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168 This is a helper function for local use only. */
1169
1170static unsigned int
1171aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1172{
1173 unsigned int alignment;
1174
1175 if (type)
1176 {
1177 if (!integer_zerop (TYPE_SIZE (type)))
1178 {
1179 if (TYPE_MODE (type) == mode)
1180 alignment = TYPE_ALIGN (type);
1181 else
1182 alignment = GET_MODE_ALIGNMENT (mode);
1183 }
1184 else
1185 alignment = 0;
1186 }
1187 else
1188 alignment = GET_MODE_ALIGNMENT (mode);
1189
1190 return alignment;
1191}
1192
1193/* Layout a function argument according to the AAPCS64 rules. The rule
1194 numbers refer to the rule numbers in the AAPCS64. */
1195
1196static void
1197aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 const_tree type,
1199 bool named ATTRIBUTE_UNUSED)
1200{
1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202 int ncrn, nvrn, nregs;
1203 bool allocate_ncrn, allocate_nvrn;
1204
1205 /* We need to do this once per argument. */
1206 if (pcum->aapcs_arg_processed)
1207 return;
1208
1209 pcum->aapcs_arg_processed = true;
1210
1211 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213 mode,
1214 type,
1215 &nregs);
1216
1217 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218 The following code thus handles passing by SIMD/FP registers first. */
1219
1220 nvrn = pcum->aapcs_nvrn;
1221
1222 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223 and homogenous short-vector aggregates (HVA). */
1224 if (allocate_nvrn)
1225 {
1226 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1227 {
1228 pcum->aapcs_nextnvrn = nvrn + nregs;
1229 if (!aarch64_composite_type_p (type, mode))
1230 {
1231 gcc_assert (nregs == 1);
1232 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1233 }
1234 else
1235 {
1236 rtx par;
1237 int i;
1238 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239 for (i = 0; i < nregs; i++)
1240 {
1241 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242 V0_REGNUM + nvrn + i);
1243 tmp = gen_rtx_EXPR_LIST
1244 (VOIDmode, tmp,
1245 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246 XVECEXP (par, 0, i) = tmp;
1247 }
1248 pcum->aapcs_reg = par;
1249 }
1250 return;
1251 }
1252 else
1253 {
1254 /* C.3 NSRN is set to 8. */
1255 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256 goto on_stack;
1257 }
1258 }
1259
1260 ncrn = pcum->aapcs_ncrn;
1261 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1263
1264
1265 /* C6 - C9. though the sign and zero extension semantics are
1266 handled elsewhere. This is the case where the argument fits
1267 entirely general registers. */
1268 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1269 {
1270 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1271
1272 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1273
1274 /* C.8 if the argument has an alignment of 16 then the NGRN is
1275 rounded up to the next even number. */
1276 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1277 {
1278 ++ncrn;
1279 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1280 }
1281 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282 A reg is still generated for it, but the caller should be smart
1283 enough not to use it. */
1284 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1285 {
1286 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1287 }
1288 else
1289 {
1290 rtx par;
1291 int i;
1292
1293 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294 for (i = 0; i < nregs; i++)
1295 {
1296 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 GEN_INT (i * UNITS_PER_WORD));
1299 XVECEXP (par, 0, i) = tmp;
1300 }
1301 pcum->aapcs_reg = par;
1302 }
1303
1304 pcum->aapcs_nextncrn = ncrn + nregs;
1305 return;
1306 }
1307
1308 /* C.11 */
1309 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1310
1311 /* The argument is passed on stack; record the needed number of words for
1312 this argument (we can re-use NREGS) and align the total size if
1313 necessary. */
1314on_stack:
1315 pcum->aapcs_stack_words = nregs;
1316 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318 16 / UNITS_PER_WORD) + 1;
1319 return;
1320}
1321
1322/* Implement TARGET_FUNCTION_ARG. */
1323
1324static rtx
1325aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326 const_tree type, bool named)
1327{
1328 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1330
1331 if (mode == VOIDmode)
1332 return NULL_RTX;
1333
1334 aarch64_layout_arg (pcum_v, mode, type, named);
1335 return pcum->aapcs_reg;
1336}
1337
1338void
1339aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340 const_tree fntype ATTRIBUTE_UNUSED,
1341 rtx libname ATTRIBUTE_UNUSED,
1342 const_tree fndecl ATTRIBUTE_UNUSED,
1343 unsigned n_named ATTRIBUTE_UNUSED)
1344{
1345 pcum->aapcs_ncrn = 0;
1346 pcum->aapcs_nvrn = 0;
1347 pcum->aapcs_nextncrn = 0;
1348 pcum->aapcs_nextnvrn = 0;
1349 pcum->pcs_variant = ARM_PCS_AAPCS64;
1350 pcum->aapcs_reg = NULL_RTX;
1351 pcum->aapcs_arg_processed = false;
1352 pcum->aapcs_stack_words = 0;
1353 pcum->aapcs_stack_size = 0;
1354
1355 return;
1356}
1357
1358static void
1359aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360 enum machine_mode mode,
1361 const_tree type,
1362 bool named)
1363{
1364 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1366 {
1367 aarch64_layout_arg (pcum_v, mode, type, named);
1368 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369 != (pcum->aapcs_stack_words != 0));
1370 pcum->aapcs_arg_processed = false;
1371 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374 pcum->aapcs_stack_words = 0;
1375 pcum->aapcs_reg = NULL_RTX;
1376 }
1377}
1378
1379bool
1380aarch64_function_arg_regno_p (unsigned regno)
1381{
1382 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1384}
1385
1386/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1387 PARM_BOUNDARY bits of alignment, but will be given anything up
1388 to STACK_BOUNDARY bits if the type requires it. This makes sure
1389 that both before and after the layout of each argument, the Next
1390 Stacked Argument Address (NSAA) will have a minimum alignment of
1391 8 bytes. */
1392
1393static unsigned int
1394aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1395{
1396 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1397
1398 if (alignment < PARM_BOUNDARY)
1399 alignment = PARM_BOUNDARY;
1400 if (alignment > STACK_BOUNDARY)
1401 alignment = STACK_BOUNDARY;
1402 return alignment;
1403}
1404
1405/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1406
1407 Return true if an argument passed on the stack should be padded upwards,
1408 i.e. if the least-significant byte of the stack slot has useful data.
1409
1410 Small aggregate types are placed in the lowest memory address.
1411
1412 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1413
1414bool
1415aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1416{
1417 /* On little-endian targets, the least significant byte of every stack
1418 argument is passed at the lowest byte address of the stack slot. */
1419 if (!BYTES_BIG_ENDIAN)
1420 return true;
1421
1422 /* Otherwise, integral types and floating point types are padded downward:
1423 the least significant byte of a stack argument is passed at the highest
1424 byte address of the stack slot. */
1425 if (type
1426 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428 return false;
1429
1430 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1431 return true;
1432}
1433
1434/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1435
1436 It specifies padding for the last (may also be the only)
1437 element of a block move between registers and memory. If
1438 assuming the block is in the memory, padding upward means that
1439 the last element is padded after its highest significant byte,
1440 while in downward padding, the last element is padded at the
1441 its least significant byte side.
1442
1443 Small aggregates and small complex types are always padded
1444 upwards.
1445
1446 We don't need to worry about homogeneous floating-point or
1447 short-vector aggregates; their move is not affected by the
1448 padding direction determined here. Regardless of endianness,
1449 each element of such an aggregate is put in the least
1450 significant bits of a fp/simd register.
1451
1452 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453 register has useful data, and return the opposite if the most
1454 significant byte does. */
1455
1456bool
1457aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458 bool first ATTRIBUTE_UNUSED)
1459{
1460
1461 /* Small composite types are always padded upward. */
1462 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1463 {
1464 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465 : GET_MODE_SIZE (mode));
1466 if (size < 2 * UNITS_PER_WORD)
1467 return true;
1468 }
1469
1470 /* Otherwise, use the default padding. */
1471 return !BYTES_BIG_ENDIAN;
1472}
1473
1474static enum machine_mode
1475aarch64_libgcc_cmp_return_mode (void)
1476{
1477 return SImode;
1478}
1479
1480static bool
1481aarch64_frame_pointer_required (void)
1482{
1483 /* If the function contains dynamic stack allocations, we need to
1484 use the frame pointer to access the static parts of the frame. */
1485 if (cfun->calls_alloca)
1486 return true;
1487
1488 /* We may have turned flag_omit_frame_pointer on in order to have this
1489 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490 and we'll check it here.
1491 If we really did set flag_omit_frame_pointer normally, then we return false
1492 (no frame pointer required) in all cases. */
1493
1494 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495 return false;
1496 else if (flag_omit_leaf_frame_pointer)
1497 return !crtl->is_leaf;
1498 return true;
1499}
1500
1501/* Mark the registers that need to be saved by the callee and calculate
1502 the size of the callee-saved registers area and frame record (both FP
1503 and LR may be omitted). */
1504static void
1505aarch64_layout_frame (void)
1506{
1507 HOST_WIDE_INT offset = 0;
1508 int regno;
1509
1510 if (reload_completed && cfun->machine->frame.laid_out)
1511 return;
1512
1513 cfun->machine->frame.fp_lr_offset = 0;
1514
1515 /* First mark all the registers that really need to be saved... */
1516 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1518
1519 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520 cfun->machine->frame.reg_offset[regno] = -1;
1521
1522 /* ... that includes the eh data registers (if needed)... */
1523 if (crtl->calls_eh_return)
1524 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1526
1527 /* ... and any callee saved register that dataflow says is live. */
1528 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529 if (df_regs_ever_live_p (regno)
1530 && !call_used_regs[regno])
1531 cfun->machine->frame.reg_offset[regno] = 0;
1532
1533 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534 if (df_regs_ever_live_p (regno)
1535 && !call_used_regs[regno])
1536 cfun->machine->frame.reg_offset[regno] = 0;
1537
1538 if (frame_pointer_needed)
1539 {
1540 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1543 }
1544
1545 /* Now assign stack slots for them. */
1546 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547 if (cfun->machine->frame.reg_offset[regno] != -1)
1548 {
1549 cfun->machine->frame.reg_offset[regno] = offset;
1550 offset += UNITS_PER_WORD;
1551 }
1552
1553 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554 if (cfun->machine->frame.reg_offset[regno] != -1)
1555 {
1556 cfun->machine->frame.reg_offset[regno] = offset;
1557 offset += UNITS_PER_WORD;
1558 }
1559
1560 if (frame_pointer_needed)
1561 {
1562 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563 offset += UNITS_PER_WORD;
1564 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1565 }
1566
1567 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1568 {
1569 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570 offset += UNITS_PER_WORD;
1571 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1572 }
1573
1574 cfun->machine->frame.padding0 =
1575 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1577
1578 cfun->machine->frame.saved_regs_size = offset;
1579 cfun->machine->frame.laid_out = true;
1580}
1581
1582/* Make the last instruction frame-related and note that it performs
1583 the operation described by FRAME_PATTERN. */
1584
1585static void
1586aarch64_set_frame_expr (rtx frame_pattern)
1587{
1588 rtx insn;
1589
1590 insn = get_last_insn ();
1591 RTX_FRAME_RELATED_P (insn) = 1;
1592 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594 frame_pattern,
1595 REG_NOTES (insn));
1596}
1597
1598static bool
1599aarch64_register_saved_on_entry (int regno)
1600{
1601 return cfun->machine->frame.reg_offset[regno] != -1;
1602}
1603
1604
1605static void
1606aarch64_save_or_restore_fprs (int start_offset, int increment,
1607 bool restore, rtx base_rtx)
1608
1609{
1610 unsigned regno;
1611 unsigned regno2;
1612 rtx insn;
1613 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1614
1615
1616 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1617 {
1618 if (aarch64_register_saved_on_entry (regno))
1619 {
1620 rtx mem;
1621 mem = gen_mem_ref (DFmode,
1622 plus_constant (Pmode,
1623 base_rtx,
1624 start_offset));
1625
1626 for (regno2 = regno + 1;
1627 regno2 <= V31_REGNUM
1628 && !aarch64_register_saved_on_entry (regno2);
1629 regno2++)
1630 {
1631 /* Empty loop. */
1632 }
1633 if (regno2 <= V31_REGNUM &&
1634 aarch64_register_saved_on_entry (regno2))
1635 {
1636 rtx mem2;
1637 /* Next highest register to be saved. */
1638 mem2 = gen_mem_ref (DFmode,
1639 plus_constant
1640 (Pmode,
1641 base_rtx,
1642 start_offset + increment));
1643 if (restore == false)
1644 {
1645 insn = emit_insn
1646 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647 mem2, gen_rtx_REG (DFmode, regno2)));
1648
1649 }
1650 else
1651 {
1652 insn = emit_insn
1653 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654 gen_rtx_REG (DFmode, regno2), mem2));
1655
1656 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1658 }
1659
1660 /* The first part of a frame-related parallel insn
1661 is always assumed to be relevant to the frame
1662 calculations; subsequent parts, are only
1663 frame-related if explicitly marked. */
1664 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665 1)) = 1;
1666 regno = regno2;
1667 start_offset += increment * 2;
1668 }
1669 else
1670 {
1671 if (restore == false)
1672 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673 else
1674 {
1675 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1677 }
1678 start_offset += increment;
1679 }
1680 RTX_FRAME_RELATED_P (insn) = 1;
1681 }
1682 }
1683
1684}
1685
1686
1687/* offset from the stack pointer of where the saves and
1688 restore's have to happen. */
1689static void
1690aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691 bool restore)
1692{
1693 rtx insn;
1694 rtx base_rtx = stack_pointer_rtx;
1695 HOST_WIDE_INT start_offset = offset;
1696 HOST_WIDE_INT increment = UNITS_PER_WORD;
1697 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699 unsigned regno;
1700 unsigned regno2;
1701
1702 for (regno = R0_REGNUM; regno <= limit; regno++)
1703 {
1704 if (aarch64_register_saved_on_entry (regno))
1705 {
1706 rtx mem;
1707 mem = gen_mem_ref (Pmode,
1708 plus_constant (Pmode,
1709 base_rtx,
1710 start_offset));
1711
1712 for (regno2 = regno + 1;
1713 regno2 <= limit
1714 && !aarch64_register_saved_on_entry (regno2);
1715 regno2++)
1716 {
1717 /* Empty loop. */
1718 }
1719 if (regno2 <= limit &&
1720 aarch64_register_saved_on_entry (regno2))
1721 {
1722 rtx mem2;
1723 /* Next highest register to be saved. */
1724 mem2 = gen_mem_ref (Pmode,
1725 plus_constant
1726 (Pmode,
1727 base_rtx,
1728 start_offset + increment));
1729 if (restore == false)
1730 {
1731 insn = emit_insn
1732 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733 mem2, gen_rtx_REG (DImode, regno2)));
1734
1735 }
1736 else
1737 {
1738 insn = emit_insn
1739 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740 gen_rtx_REG (DImode, regno2), mem2));
1741
1742 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1744 }
1745
1746 /* The first part of a frame-related parallel insn
1747 is always assumed to be relevant to the frame
1748 calculations; subsequent parts, are only
1749 frame-related if explicitly marked. */
1750 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751 1)) = 1;
1752 regno = regno2;
1753 start_offset += increment * 2;
1754 }
1755 else
1756 {
1757 if (restore == false)
1758 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759 else
1760 {
1761 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1763 }
1764 start_offset += increment;
1765 }
1766 RTX_FRAME_RELATED_P (insn) = 1;
1767 }
1768 }
1769
1770 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1771
1772}
1773
1774/* AArch64 stack frames generated by this compiler look like:
1775
1776 +-------------------------------+
1777 | |
1778 | incoming stack arguments |
1779 | |
1780 +-------------------------------+ <-- arg_pointer_rtx
1781 | |
1782 | callee-allocated save area |
1783 | for register varargs |
1784 | |
1785 +-------------------------------+
1786 | |
1787 | local variables |
1788 | |
1789 +-------------------------------+ <-- frame_pointer_rtx
1790 | |
1791 | callee-saved registers |
1792 | |
1793 +-------------------------------+
1794 | LR' |
1795 +-------------------------------+
1796 | FP' |
1797 P +-------------------------------+ <-- hard_frame_pointer_rtx
1798 | dynamic allocation |
1799 +-------------------------------+
1800 | |
1801 | outgoing stack arguments |
1802 | |
1803 +-------------------------------+ <-- stack_pointer_rtx
1804
1805 Dynamic stack allocations such as alloca insert data at point P.
1806 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807 hard_frame_pointer_rtx unchanged. */
1808
1809/* Generate the prologue instructions for entry into a function.
1810 Establish the stack frame by decreasing the stack pointer with a
1811 properly calculated size and, if necessary, create a frame record
1812 filled with the values of LR and previous frame pointer. The
1813 current FP is also set up is it is in use. */
1814
1815void
1816aarch64_expand_prologue (void)
1817{
1818 /* sub sp, sp, #<frame_size>
1819 stp {fp, lr}, [sp, #<frame_size> - 16]
1820 add fp, sp, #<frame_size> - hardfp_offset
1821 stp {cs_reg}, [fp, #-16] etc.
1822
1823 sub sp, sp, <final_adjustment_if_any>
1824 */
1825 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1826 HOST_WIDE_INT frame_size, offset;
1827 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1828 rtx insn;
1829
1830 aarch64_layout_frame ();
1831 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835 + crtl->outgoing_args_size);
1836 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837 STACK_BOUNDARY / BITS_PER_UNIT);
1838
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = frame_size;
1841
1842 fp_offset = (offset
1843 - original_frame_size
1844 - cfun->machine->frame.saved_regs_size);
1845
44c0e7b9 1846 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
1847 if (offset >= 512)
1848 {
1849 /* When the frame has a large size, an initial decrease is done on
1850 the stack pointer to jump over the callee-allocated save area for
1851 register varargs, the local variable area and/or the callee-saved
1852 register area. This will allow the pre-index write-back
1853 store pair instructions to be used for setting up the stack frame
1854 efficiently. */
1855 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856 if (offset >= 512)
1857 offset = cfun->machine->frame.saved_regs_size;
1858
1859 frame_size -= (offset + crtl->outgoing_args_size);
1860 fp_offset = 0;
1861
1862 if (frame_size >= 0x1000000)
1863 {
1864 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865 emit_move_insn (op0, GEN_INT (-frame_size));
1866 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867 aarch64_set_frame_expr (gen_rtx_SET
1868 (Pmode, stack_pointer_rtx,
1869 gen_rtx_PLUS (Pmode,
1870 stack_pointer_rtx,
1871 GEN_INT (-frame_size))));
1872 }
1873 else if (frame_size > 0)
1874 {
1875 if ((frame_size & 0xfff) != frame_size)
1876 {
1877 insn = emit_insn (gen_add2_insn
1878 (stack_pointer_rtx,
1879 GEN_INT (-(frame_size
1880 & ~(HOST_WIDE_INT)0xfff))));
1881 RTX_FRAME_RELATED_P (insn) = 1;
1882 }
1883 if ((frame_size & 0xfff) != 0)
1884 {
1885 insn = emit_insn (gen_add2_insn
1886 (stack_pointer_rtx,
1887 GEN_INT (-(frame_size
1888 & (HOST_WIDE_INT)0xfff))));
1889 RTX_FRAME_RELATED_P (insn) = 1;
1890 }
1891 }
1892 }
1893 else
1894 frame_size = -1;
1895
1896 if (offset > 0)
1897 {
1898 /* Save the frame pointer and lr if the frame pointer is needed
1899 first. Make the frame pointer point to the location of the
1900 old frame pointer on the stack. */
1901 if (frame_pointer_needed)
1902 {
1903 rtx mem_fp, mem_lr;
1904
1905 if (fp_offset)
1906 {
1907 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908 GEN_INT (-offset)));
1909 RTX_FRAME_RELATED_P (insn) = 1;
1910 aarch64_set_frame_expr (gen_rtx_SET
1911 (Pmode, stack_pointer_rtx,
1912 gen_rtx_MINUS (Pmode,
1913 stack_pointer_rtx,
1914 GEN_INT (offset))));
1915 mem_fp = gen_frame_mem (DImode,
1916 plus_constant (Pmode,
1917 stack_pointer_rtx,
1918 fp_offset));
1919 mem_lr = gen_frame_mem (DImode,
1920 plus_constant (Pmode,
1921 stack_pointer_rtx,
1922 fp_offset
1923 + UNITS_PER_WORD));
1924 insn = emit_insn (gen_store_pairdi (mem_fp,
1925 hard_frame_pointer_rtx,
1926 mem_lr,
1927 gen_rtx_REG (DImode,
1928 LR_REGNUM)));
1929 }
1930 else
1931 {
1932 insn = emit_insn (gen_storewb_pairdi_di
1933 (stack_pointer_rtx, stack_pointer_rtx,
1934 hard_frame_pointer_rtx,
1935 gen_rtx_REG (DImode, LR_REGNUM),
1936 GEN_INT (-offset),
1937 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1939 }
1940
1941 /* The first part of a frame-related parallel insn is always
1942 assumed to be relevant to the frame calculations;
1943 subsequent parts, are only frame-related if explicitly
1944 marked. */
1945 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946 RTX_FRAME_RELATED_P (insn) = 1;
1947
1948 /* Set up frame pointer to point to the location of the
1949 previous frame pointer on the stack. */
1950 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951 stack_pointer_rtx,
1952 GEN_INT (fp_offset)));
1953 aarch64_set_frame_expr (gen_rtx_SET
1954 (Pmode, hard_frame_pointer_rtx,
1955 gen_rtx_PLUS (Pmode,
1956 stack_pointer_rtx,
1957 GEN_INT (fp_offset))));
1958 RTX_FRAME_RELATED_P (insn) = 1;
1959 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960 hard_frame_pointer_rtx));
1961 }
1962 else
1963 {
1964 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965 GEN_INT (-offset)));
1966 RTX_FRAME_RELATED_P (insn) = 1;
1967 }
1968
1969 aarch64_save_or_restore_callee_save_registers
1970 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1971 }
1972
1973 /* when offset >= 512,
1974 sub sp, sp, #<outgoing_args_size> */
1975 if (frame_size > -1)
1976 {
1977 if (crtl->outgoing_args_size > 0)
1978 {
1979 insn = emit_insn (gen_add2_insn
1980 (stack_pointer_rtx,
1981 GEN_INT (- crtl->outgoing_args_size)));
1982 RTX_FRAME_RELATED_P (insn) = 1;
1983 }
1984 }
1985}
1986
1987/* Generate the epilogue instructions for returning from a function. */
1988void
1989aarch64_expand_epilogue (bool for_sibcall)
1990{
1991 HOST_WIDE_INT original_frame_size, frame_size, offset;
1992 HOST_WIDE_INT fp_offset;
1993 rtx insn;
44c0e7b9 1994 rtx cfa_reg;
43e9d192
IB
1995
1996 aarch64_layout_frame ();
1997 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999 + crtl->outgoing_args_size);
2000 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001 STACK_BOUNDARY / BITS_PER_UNIT);
2002
2003 fp_offset = (offset
2004 - original_frame_size
2005 - cfun->machine->frame.saved_regs_size);
2006
44c0e7b9
YZ
2007 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2008
2009 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2010 if (offset >= 512)
2011 {
2012 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013 if (offset >= 512)
2014 offset = cfun->machine->frame.saved_regs_size;
2015
2016 frame_size -= (offset + crtl->outgoing_args_size);
2017 fp_offset = 0;
2018 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2019 {
2020 insn = emit_insn (gen_add2_insn
2021 (stack_pointer_rtx,
2022 GEN_INT (crtl->outgoing_args_size)));
2023 RTX_FRAME_RELATED_P (insn) = 1;
2024 }
2025 }
2026 else
2027 frame_size = -1;
2028
2029 /* If there were outgoing arguments or we've done dynamic stack
2030 allocation, then restore the stack pointer from the frame
2031 pointer. This is at most one insn and more efficient than using
2032 GCC's internal mechanism. */
2033 if (frame_pointer_needed
2034 && (crtl->outgoing_args_size || cfun->calls_alloca))
2035 {
2036 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037 hard_frame_pointer_rtx,
2038 GEN_INT (- fp_offset)));
2039 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2040 /* As SP is set to (FP - fp_offset), according to the rules in
2041 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042 from the value of SP from now on. */
2043 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2044 }
2045
2046 aarch64_save_or_restore_callee_save_registers
2047 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2048
2049 /* Restore the frame pointer and lr if the frame pointer is needed. */
2050 if (offset > 0)
2051 {
2052 if (frame_pointer_needed)
2053 {
2054 rtx mem_fp, mem_lr;
2055
2056 if (fp_offset)
2057 {
2058 mem_fp = gen_frame_mem (DImode,
2059 plus_constant (Pmode,
2060 stack_pointer_rtx,
2061 fp_offset));
2062 mem_lr = gen_frame_mem (DImode,
2063 plus_constant (Pmode,
2064 stack_pointer_rtx,
2065 fp_offset
2066 + UNITS_PER_WORD));
2067 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068 mem_fp,
2069 gen_rtx_REG (DImode,
2070 LR_REGNUM),
2071 mem_lr));
2072 }
2073 else
2074 {
2075 insn = emit_insn (gen_loadwb_pairdi_di
2076 (stack_pointer_rtx,
2077 stack_pointer_rtx,
2078 hard_frame_pointer_rtx,
2079 gen_rtx_REG (DImode, LR_REGNUM),
2080 GEN_INT (offset),
2081 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2083 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2085 plus_constant (Pmode, cfa_reg,
2086 offset))));
43e9d192
IB
2087 }
2088
2089 /* The first part of a frame-related parallel insn
2090 is always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096 add_reg_note (insn, REG_CFA_RESTORE,
2097 gen_rtx_REG (DImode, LR_REGNUM));
2098
2099 if (fp_offset)
2100 {
2101 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102 GEN_INT (offset)));
2103 RTX_FRAME_RELATED_P (insn) = 1;
2104 }
2105 }
43e9d192
IB
2106 else
2107 {
2108 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109 GEN_INT (offset)));
2110 RTX_FRAME_RELATED_P (insn) = 1;
2111 }
2112 }
2113
2114 /* Stack adjustment for exception handler. */
2115 if (crtl->calls_eh_return)
2116 {
2117 /* We need to unwind the stack by the offset computed by
2118 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2119 based on SP. Ideally we would update the SP and define the
2120 CFA along the lines of:
2121
2122 SP = SP + EH_RETURN_STACKADJ_RTX
2123 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2124
2125 However the dwarf emitter only understands a constant
2126 register offset.
2127
2128 The solution choosen here is to use the otherwise unused IP0
2129 as a temporary register to hold the current SP value. The
2130 CFA is described using IP0 then SP is modified. */
2131
2132 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2133
2134 insn = emit_move_insn (ip0, stack_pointer_rtx);
2135 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136 RTX_FRAME_RELATED_P (insn) = 1;
2137
2138 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2139
2140 /* Ensure the assignment to IP0 does not get optimized away. */
2141 emit_use (ip0);
2142 }
2143
2144 if (frame_size > -1)
2145 {
2146 if (frame_size >= 0x1000000)
2147 {
2148 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149 emit_move_insn (op0, GEN_INT (frame_size));
2150 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151 aarch64_set_frame_expr (gen_rtx_SET
2152 (Pmode, stack_pointer_rtx,
2153 gen_rtx_PLUS (Pmode,
2154 stack_pointer_rtx,
2155 GEN_INT (frame_size))));
2156 }
2157 else if (frame_size > 0)
2158 {
2159 if ((frame_size & 0xfff) != 0)
2160 {
2161 insn = emit_insn (gen_add2_insn
2162 (stack_pointer_rtx,
2163 GEN_INT ((frame_size
2164 & (HOST_WIDE_INT) 0xfff))));
2165 RTX_FRAME_RELATED_P (insn) = 1;
2166 }
2167 if ((frame_size & 0xfff) != frame_size)
2168 {
2169 insn = emit_insn (gen_add2_insn
2170 (stack_pointer_rtx,
2171 GEN_INT ((frame_size
2172 & ~ (HOST_WIDE_INT) 0xfff))));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2174 }
2175 }
2176
2177 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178 gen_rtx_PLUS (Pmode,
2179 stack_pointer_rtx,
2180 GEN_INT (offset))));
2181 }
2182
2183 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184 if (!for_sibcall)
2185 emit_jump_insn (ret_rtx);
2186}
2187
2188/* Return the place to copy the exception unwinding return address to.
2189 This will probably be a stack slot, but could (in theory be the
2190 return register). */
2191rtx
2192aarch64_final_eh_return_addr (void)
2193{
2194 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195 aarch64_layout_frame ();
2196 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 + crtl->outgoing_args_size);
2199 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 STACK_BOUNDARY / BITS_PER_UNIT);
2201 fp_offset = offset
2202 - original_frame_size
2203 - cfun->machine->frame.saved_regs_size;
2204
2205 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206 return gen_rtx_REG (DImode, LR_REGNUM);
2207
2208 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2209 result in a store to save LR introduced by builtin_eh_return () being
2210 incorrectly deleted because the alias is not detected.
2211 So in the calculation of the address to copy the exception unwinding
2212 return address to, we note 2 cases.
2213 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214 we return a SP-relative location since all the addresses are SP-relative
2215 in this case. This prevents the store from being optimized away.
2216 If the fp_offset is not 0, then the addresses will be FP-relative and
2217 therefore we return a FP-relative location. */
2218
2219 if (frame_pointer_needed)
2220 {
2221 if (fp_offset)
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224 else
2225 return gen_frame_mem (DImode,
2226 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2227 }
2228
2229 /* If FP is not needed, we calculate the location of LR, which would be
2230 at the top of the saved registers block. */
2231
2232 return gen_frame_mem (DImode,
2233 plus_constant (Pmode,
2234 stack_pointer_rtx,
2235 fp_offset
2236 + cfun->machine->frame.saved_regs_size
2237 - 2 * UNITS_PER_WORD));
2238}
2239
2240/* Output code to build up a constant in a register. */
2241static void
d9600ae5 2242aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2243{
2244 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2245 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2246 else
2247 {
2248 int i;
2249 int ncount = 0;
2250 int zcount = 0;
2251 HOST_WIDE_INT valp = val >> 16;
2252 HOST_WIDE_INT valm;
2253 HOST_WIDE_INT tval;
2254
2255 for (i = 16; i < 64; i += 16)
2256 {
2257 valm = (valp & 0xffff);
2258
2259 if (valm != 0)
2260 ++ zcount;
2261
2262 if (valm != 0xffff)
2263 ++ ncount;
2264
2265 valp >>= 16;
2266 }
2267
2268 /* zcount contains the number of additional MOVK instructions
2269 required if the constant is built up with an initial MOVZ instruction,
2270 while ncount is the number of MOVK instructions required if starting
2271 with a MOVN instruction. Choose the sequence that yields the fewest
2272 number of instructions, preferring MOVZ instructions when they are both
2273 the same. */
2274 if (ncount < zcount)
2275 {
d9600ae5
SN
2276 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277 GEN_INT ((~val) & 0xffff));
43e9d192
IB
2278 tval = 0xffff;
2279 }
2280 else
2281 {
d9600ae5
SN
2282 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283 GEN_INT (val & 0xffff));
43e9d192
IB
2284 tval = 0;
2285 }
2286
2287 val >>= 16;
2288
2289 for (i = 16; i < 64; i += 16)
2290 {
2291 if ((val & 0xffff) != tval)
d9600ae5
SN
2292 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2294 val >>= 16;
2295 }
2296 }
2297}
2298
2299static void
d9600ae5 2300aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2301{
2302 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2303 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2305
2306 if (mdelta < 0)
2307 mdelta = -mdelta;
2308
2309 if (mdelta >= 4096 * 4096)
2310 {
d9600ae5
SN
2311 aarch64_build_constant (scratchreg, delta);
2312 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2313 }
2314 else if (mdelta > 0)
2315 {
43e9d192 2316 if (mdelta >= 4096)
d9600ae5
SN
2317 {
2318 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320 if (delta < 0)
2321 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323 else
2324 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2326 }
43e9d192 2327 if (mdelta % 4096 != 0)
d9600ae5
SN
2328 {
2329 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2332 }
43e9d192
IB
2333 }
2334}
2335
2336/* Output code to add DELTA to the first argument, and then jump
2337 to FUNCTION. Used for C++ multiple inheritance. */
2338static void
2339aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340 HOST_WIDE_INT delta,
2341 HOST_WIDE_INT vcall_offset,
2342 tree function)
2343{
2344 /* The this pointer is always in x0. Note that this differs from
2345 Arm where the this pointer maybe bumped to r1 if r0 is required
2346 to return a pointer to an aggregate. On AArch64 a result value
2347 pointer will be in x8. */
2348 int this_regno = R0_REGNUM;
75f1d6fc 2349 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2350
75f1d6fc
SN
2351 reload_completed = 1;
2352 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2353
2354 if (vcall_offset == 0)
d9600ae5 2355 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2356 else
2357 {
2358 gcc_assert ((vcall_offset & 0x7) == 0);
2359
75f1d6fc
SN
2360 this_rtx = gen_rtx_REG (Pmode, this_regno);
2361 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2363
75f1d6fc
SN
2364 addr = this_rtx;
2365 if (delta != 0)
2366 {
2367 if (delta >= -256 && delta < 256)
2368 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369 plus_constant (Pmode, this_rtx, delta));
2370 else
d9600ae5 2371 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2372 }
2373
75f1d6fc
SN
2374 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2375
43e9d192 2376 if (vcall_offset >= -256 && vcall_offset < 32768)
75f1d6fc 2377 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2378 else
2379 {
d9600ae5 2380 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2381 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2382 }
2383
75f1d6fc
SN
2384 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2386 }
2387
75f1d6fc
SN
2388 /* Generate a tail call to the target function. */
2389 if (!TREE_USED (function))
2390 {
2391 assemble_external (function);
2392 TREE_USED (function) = 1;
2393 }
2394 funexp = XEXP (DECL_RTL (function), 0);
2395 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397 SIBLING_CALL_P (insn) = 1;
2398
2399 insn = get_insns ();
2400 shorten_branches (insn);
2401 final_start_function (insn, file, 1);
2402 final (insn, file, 1);
43e9d192 2403 final_end_function ();
75f1d6fc
SN
2404
2405 /* Stop pretending to be a post-reload pass. */
2406 reload_completed = 0;
43e9d192
IB
2407}
2408
43e9d192
IB
2409static int
2410aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2411{
2412 if (GET_CODE (*x) == SYMBOL_REF)
2413 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2414
2415 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416 TLS offsets, not real symbol references. */
2417 if (GET_CODE (*x) == UNSPEC
2418 && XINT (*x, 1) == UNSPEC_TLS)
2419 return -1;
2420
2421 return 0;
2422}
2423
2424static bool
2425aarch64_tls_referenced_p (rtx x)
2426{
2427 if (!TARGET_HAVE_TLS)
2428 return false;
2429
2430 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2431}
2432
2433
2434static int
2435aarch64_bitmasks_cmp (const void *i1, const void *i2)
2436{
2437 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2439
2440 if (*imm1 < *imm2)
2441 return -1;
2442 if (*imm1 > *imm2)
2443 return +1;
2444 return 0;
2445}
2446
2447
2448static void
2449aarch64_build_bitmask_table (void)
2450{
2451 unsigned HOST_WIDE_INT mask, imm;
2452 unsigned int log_e, e, s, r;
2453 unsigned int nimms = 0;
2454
2455 for (log_e = 1; log_e <= 6; log_e++)
2456 {
2457 e = 1 << log_e;
2458 if (e == 64)
2459 mask = ~(HOST_WIDE_INT) 0;
2460 else
2461 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462 for (s = 1; s < e; s++)
2463 {
2464 for (r = 0; r < e; r++)
2465 {
2466 /* set s consecutive bits to 1 (s < 64) */
2467 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468 /* rotate right by r */
2469 if (r != 0)
2470 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471 /* replicate the constant depending on SIMD size */
2472 switch (log_e) {
2473 case 1: imm |= (imm << 2);
2474 case 2: imm |= (imm << 4);
2475 case 3: imm |= (imm << 8);
2476 case 4: imm |= (imm << 16);
2477 case 5: imm |= (imm << 32);
2478 case 6:
2479 break;
2480 default:
2481 gcc_unreachable ();
2482 }
2483 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484 aarch64_bitmasks[nimms++] = imm;
2485 }
2486 }
2487 }
2488
2489 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491 aarch64_bitmasks_cmp);
2492}
2493
2494
2495/* Return true if val can be encoded as a 12-bit unsigned immediate with
2496 a left shift of 0 or 12 bits. */
2497bool
2498aarch64_uimm12_shift (HOST_WIDE_INT val)
2499{
2500 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2502 );
2503}
2504
2505
2506/* Return true if val is an immediate that can be loaded into a
2507 register by a MOVZ instruction. */
2508static bool
2509aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2510{
2511 if (GET_MODE_SIZE (mode) > 4)
2512 {
2513 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515 return 1;
2516 }
2517 else
2518 {
2519 /* Ignore sign extension. */
2520 val &= (HOST_WIDE_INT) 0xffffffff;
2521 }
2522 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2524}
2525
2526
2527/* Return true if val is a valid bitmask immediate. */
2528bool
2529aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2530{
2531 if (GET_MODE_SIZE (mode) < 8)
2532 {
2533 /* Replicate bit pattern. */
2534 val &= (HOST_WIDE_INT) 0xffffffff;
2535 val |= val << 32;
2536 }
2537 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2539}
2540
2541
2542/* Return true if val is an immediate that can be loaded into a
2543 register in a single instruction. */
2544bool
2545aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2546{
2547 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548 return 1;
2549 return aarch64_bitmask_imm (val, mode);
2550}
2551
2552static bool
2553aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2554{
2555 rtx base, offset;
2556 if (GET_CODE (x) == HIGH)
2557 return true;
2558
2559 split_const (x, &base, &offset);
2560 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2562
2563 return aarch64_tls_referenced_p (x);
2564}
2565
2566/* Return true if register REGNO is a valid index register.
2567 STRICT_P is true if REG_OK_STRICT is in effect. */
2568
2569bool
2570aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2571{
2572 if (!HARD_REGISTER_NUM_P (regno))
2573 {
2574 if (!strict_p)
2575 return true;
2576
2577 if (!reg_renumber)
2578 return false;
2579
2580 regno = reg_renumber[regno];
2581 }
2582 return GP_REGNUM_P (regno);
2583}
2584
2585/* Return true if register REGNO is a valid base register for mode MODE.
2586 STRICT_P is true if REG_OK_STRICT is in effect. */
2587
2588bool
2589aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2590{
2591 if (!HARD_REGISTER_NUM_P (regno))
2592 {
2593 if (!strict_p)
2594 return true;
2595
2596 if (!reg_renumber)
2597 return false;
2598
2599 regno = reg_renumber[regno];
2600 }
2601
2602 /* The fake registers will be eliminated to either the stack or
2603 hard frame pointer, both of which are usually valid base registers.
2604 Reload deals with the cases where the eliminated form isn't valid. */
2605 return (GP_REGNUM_P (regno)
2606 || regno == SP_REGNUM
2607 || regno == FRAME_POINTER_REGNUM
2608 || regno == ARG_POINTER_REGNUM);
2609}
2610
2611/* Return true if X is a valid base register for mode MODE.
2612 STRICT_P is true if REG_OK_STRICT is in effect. */
2613
2614static bool
2615aarch64_base_register_rtx_p (rtx x, bool strict_p)
2616{
2617 if (!strict_p && GET_CODE (x) == SUBREG)
2618 x = SUBREG_REG (x);
2619
2620 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2621}
2622
2623/* Return true if address offset is a valid index. If it is, fill in INFO
2624 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2625
2626static bool
2627aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628 enum machine_mode mode, bool strict_p)
2629{
2630 enum aarch64_address_type type;
2631 rtx index;
2632 int shift;
2633
2634 /* (reg:P) */
2635 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636 && GET_MODE (x) == Pmode)
2637 {
2638 type = ADDRESS_REG_REG;
2639 index = x;
2640 shift = 0;
2641 }
2642 /* (sign_extend:DI (reg:SI)) */
2643 else if ((GET_CODE (x) == SIGN_EXTEND
2644 || GET_CODE (x) == ZERO_EXTEND)
2645 && GET_MODE (x) == DImode
2646 && GET_MODE (XEXP (x, 0)) == SImode)
2647 {
2648 type = (GET_CODE (x) == SIGN_EXTEND)
2649 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650 index = XEXP (x, 0);
2651 shift = 0;
2652 }
2653 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654 else if (GET_CODE (x) == MULT
2655 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657 && GET_MODE (XEXP (x, 0)) == DImode
2658 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659 && CONST_INT_P (XEXP (x, 1)))
2660 {
2661 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663 index = XEXP (XEXP (x, 0), 0);
2664 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2665 }
2666 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667 else if (GET_CODE (x) == ASHIFT
2668 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670 && GET_MODE (XEXP (x, 0)) == DImode
2671 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672 && CONST_INT_P (XEXP (x, 1)))
2673 {
2674 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676 index = XEXP (XEXP (x, 0), 0);
2677 shift = INTVAL (XEXP (x, 1));
2678 }
2679 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680 else if ((GET_CODE (x) == SIGN_EXTRACT
2681 || GET_CODE (x) == ZERO_EXTRACT)
2682 && GET_MODE (x) == DImode
2683 && GET_CODE (XEXP (x, 0)) == MULT
2684 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2686 {
2687 type = (GET_CODE (x) == SIGN_EXTRACT)
2688 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689 index = XEXP (XEXP (x, 0), 0);
2690 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691 if (INTVAL (XEXP (x, 1)) != 32 + shift
2692 || INTVAL (XEXP (x, 2)) != 0)
2693 shift = -1;
2694 }
2695 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696 (const_int 0xffffffff<<shift)) */
2697 else if (GET_CODE (x) == AND
2698 && GET_MODE (x) == DImode
2699 && GET_CODE (XEXP (x, 0)) == MULT
2700 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702 && CONST_INT_P (XEXP (x, 1)))
2703 {
2704 type = ADDRESS_REG_UXTW;
2705 index = XEXP (XEXP (x, 0), 0);
2706 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708 shift = -1;
2709 }
2710 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711 else if ((GET_CODE (x) == SIGN_EXTRACT
2712 || GET_CODE (x) == ZERO_EXTRACT)
2713 && GET_MODE (x) == DImode
2714 && GET_CODE (XEXP (x, 0)) == ASHIFT
2715 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2717 {
2718 type = (GET_CODE (x) == SIGN_EXTRACT)
2719 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720 index = XEXP (XEXP (x, 0), 0);
2721 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722 if (INTVAL (XEXP (x, 1)) != 32 + shift
2723 || INTVAL (XEXP (x, 2)) != 0)
2724 shift = -1;
2725 }
2726 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727 (const_int 0xffffffff<<shift)) */
2728 else if (GET_CODE (x) == AND
2729 && GET_MODE (x) == DImode
2730 && GET_CODE (XEXP (x, 0)) == ASHIFT
2731 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733 && CONST_INT_P (XEXP (x, 1)))
2734 {
2735 type = ADDRESS_REG_UXTW;
2736 index = XEXP (XEXP (x, 0), 0);
2737 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739 shift = -1;
2740 }
2741 /* (mult:P (reg:P) (const_int scale)) */
2742 else if (GET_CODE (x) == MULT
2743 && GET_MODE (x) == Pmode
2744 && GET_MODE (XEXP (x, 0)) == Pmode
2745 && CONST_INT_P (XEXP (x, 1)))
2746 {
2747 type = ADDRESS_REG_REG;
2748 index = XEXP (x, 0);
2749 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2750 }
2751 /* (ashift:P (reg:P) (const_int shift)) */
2752 else if (GET_CODE (x) == ASHIFT
2753 && GET_MODE (x) == Pmode
2754 && GET_MODE (XEXP (x, 0)) == Pmode
2755 && CONST_INT_P (XEXP (x, 1)))
2756 {
2757 type = ADDRESS_REG_REG;
2758 index = XEXP (x, 0);
2759 shift = INTVAL (XEXP (x, 1));
2760 }
2761 else
2762 return false;
2763
2764 if (GET_CODE (index) == SUBREG)
2765 index = SUBREG_REG (index);
2766
2767 if ((shift == 0 ||
2768 (shift > 0 && shift <= 3
2769 && (1 << shift) == GET_MODE_SIZE (mode)))
2770 && REG_P (index)
2771 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2772 {
2773 info->type = type;
2774 info->offset = index;
2775 info->shift = shift;
2776 return true;
2777 }
2778
2779 return false;
2780}
2781
2782static inline bool
2783offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2784{
2785 return (offset >= -64 * GET_MODE_SIZE (mode)
2786 && offset < 64 * GET_MODE_SIZE (mode)
2787 && offset % GET_MODE_SIZE (mode) == 0);
2788}
2789
2790static inline bool
2791offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792 HOST_WIDE_INT offset)
2793{
2794 return offset >= -256 && offset < 256;
2795}
2796
2797static inline bool
2798offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2799{
2800 return (offset >= 0
2801 && offset < 4096 * GET_MODE_SIZE (mode)
2802 && offset % GET_MODE_SIZE (mode) == 0);
2803}
2804
2805/* Return true if X is a valid address for machine mode MODE. If it is,
2806 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2807 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2808
2809static bool
2810aarch64_classify_address (struct aarch64_address_info *info,
2811 rtx x, enum machine_mode mode,
2812 RTX_CODE outer_code, bool strict_p)
2813{
2814 enum rtx_code code = GET_CODE (x);
2815 rtx op0, op1;
2816 bool allow_reg_index_p =
2817 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2818
2819 /* Don't support anything other than POST_INC or REG addressing for
2820 AdvSIMD. */
2821 if (aarch64_vector_mode_p (mode)
2822 && (code != POST_INC && code != REG))
2823 return false;
2824
2825 switch (code)
2826 {
2827 case REG:
2828 case SUBREG:
2829 info->type = ADDRESS_REG_IMM;
2830 info->base = x;
2831 info->offset = const0_rtx;
2832 return aarch64_base_register_rtx_p (x, strict_p);
2833
2834 case PLUS:
2835 op0 = XEXP (x, 0);
2836 op1 = XEXP (x, 1);
2837 if (GET_MODE_SIZE (mode) != 0
2838 && CONST_INT_P (op1)
2839 && aarch64_base_register_rtx_p (op0, strict_p))
2840 {
2841 HOST_WIDE_INT offset = INTVAL (op1);
2842
2843 info->type = ADDRESS_REG_IMM;
2844 info->base = op0;
2845 info->offset = op1;
2846
2847 /* TImode and TFmode values are allowed in both pairs of X
2848 registers and individual Q registers. The available
2849 address modes are:
2850 X,X: 7-bit signed scaled offset
2851 Q: 9-bit signed offset
2852 We conservatively require an offset representable in either mode.
2853 */
2854 if (mode == TImode || mode == TFmode)
2855 return (offset_7bit_signed_scaled_p (mode, offset)
2856 && offset_9bit_signed_unscaled_p (mode, offset));
2857
2858 if (outer_code == PARALLEL)
2859 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860 && offset_7bit_signed_scaled_p (mode, offset));
2861 else
2862 return (offset_9bit_signed_unscaled_p (mode, offset)
2863 || offset_12bit_unsigned_scaled_p (mode, offset));
2864 }
2865
2866 if (allow_reg_index_p)
2867 {
2868 /* Look for base + (scaled/extended) index register. */
2869 if (aarch64_base_register_rtx_p (op0, strict_p)
2870 && aarch64_classify_index (info, op1, mode, strict_p))
2871 {
2872 info->base = op0;
2873 return true;
2874 }
2875 if (aarch64_base_register_rtx_p (op1, strict_p)
2876 && aarch64_classify_index (info, op0, mode, strict_p))
2877 {
2878 info->base = op1;
2879 return true;
2880 }
2881 }
2882
2883 return false;
2884
2885 case POST_INC:
2886 case POST_DEC:
2887 case PRE_INC:
2888 case PRE_DEC:
2889 info->type = ADDRESS_REG_WB;
2890 info->base = XEXP (x, 0);
2891 info->offset = NULL_RTX;
2892 return aarch64_base_register_rtx_p (info->base, strict_p);
2893
2894 case POST_MODIFY:
2895 case PRE_MODIFY:
2896 info->type = ADDRESS_REG_WB;
2897 info->base = XEXP (x, 0);
2898 if (GET_CODE (XEXP (x, 1)) == PLUS
2899 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901 && aarch64_base_register_rtx_p (info->base, strict_p))
2902 {
2903 HOST_WIDE_INT offset;
2904 info->offset = XEXP (XEXP (x, 1), 1);
2905 offset = INTVAL (info->offset);
2906
2907 /* TImode and TFmode values are allowed in both pairs of X
2908 registers and individual Q registers. The available
2909 address modes are:
2910 X,X: 7-bit signed scaled offset
2911 Q: 9-bit signed offset
2912 We conservatively require an offset representable in either mode.
2913 */
2914 if (mode == TImode || mode == TFmode)
2915 return (offset_7bit_signed_scaled_p (mode, offset)
2916 && offset_9bit_signed_unscaled_p (mode, offset));
2917
2918 if (outer_code == PARALLEL)
2919 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920 && offset_7bit_signed_scaled_p (mode, offset));
2921 else
2922 return offset_9bit_signed_unscaled_p (mode, offset);
2923 }
2924 return false;
2925
2926 case CONST:
2927 case SYMBOL_REF:
2928 case LABEL_REF:
79517551
SN
2929 /* load literal: pc-relative constant pool entry. Only supported
2930 for SI mode or larger. */
43e9d192 2931 info->type = ADDRESS_SYMBOLIC;
79517551 2932 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
2933 {
2934 rtx sym, addend;
2935
2936 split_const (x, &sym, &addend);
2937 return (GET_CODE (sym) == LABEL_REF
2938 || (GET_CODE (sym) == SYMBOL_REF
2939 && CONSTANT_POOL_ADDRESS_P (sym)));
2940 }
2941 return false;
2942
2943 case LO_SUM:
2944 info->type = ADDRESS_LO_SUM;
2945 info->base = XEXP (x, 0);
2946 info->offset = XEXP (x, 1);
2947 if (allow_reg_index_p
2948 && aarch64_base_register_rtx_p (info->base, strict_p))
2949 {
2950 rtx sym, offs;
2951 split_const (info->offset, &sym, &offs);
2952 if (GET_CODE (sym) == SYMBOL_REF
2953 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2954 == SYMBOL_SMALL_ABSOLUTE))
2955 {
2956 /* The symbol and offset must be aligned to the access size. */
2957 unsigned int align;
2958 unsigned int ref_size;
2959
2960 if (CONSTANT_POOL_ADDRESS_P (sym))
2961 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2962 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2963 {
2964 tree exp = SYMBOL_REF_DECL (sym);
2965 align = TYPE_ALIGN (TREE_TYPE (exp));
2966 align = CONSTANT_ALIGNMENT (exp, align);
2967 }
2968 else if (SYMBOL_REF_DECL (sym))
2969 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2970 else
2971 align = BITS_PER_UNIT;
2972
2973 ref_size = GET_MODE_SIZE (mode);
2974 if (ref_size == 0)
2975 ref_size = GET_MODE_SIZE (DImode);
2976
2977 return ((INTVAL (offs) & (ref_size - 1)) == 0
2978 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2979 }
2980 }
2981 return false;
2982
2983 default:
2984 return false;
2985 }
2986}
2987
2988bool
2989aarch64_symbolic_address_p (rtx x)
2990{
2991 rtx offset;
2992
2993 split_const (x, &x, &offset);
2994 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2995}
2996
2997/* Classify the base of symbolic expression X, given that X appears in
2998 context CONTEXT. */
2999static enum aarch64_symbol_type
3000aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3001{
3002 rtx offset;
3003 split_const (x, &x, &offset);
3004 return aarch64_classify_symbol (x, context);
3005}
3006
3007
3008/* Return TRUE if X is a legitimate address for accessing memory in
3009 mode MODE. */
3010static bool
3011aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3012{
3013 struct aarch64_address_info addr;
3014
3015 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3016}
3017
3018/* Return TRUE if X is a legitimate address for accessing memory in
3019 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3020 pair operation. */
3021bool
3022aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3023 RTX_CODE outer_code, bool strict_p)
3024{
3025 struct aarch64_address_info addr;
3026
3027 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3028}
3029
3030/* Return TRUE if rtx X is immediate constant 0.0 */
3031bool
3520f7cc 3032aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3033{
3034 REAL_VALUE_TYPE r;
3035
3036 if (GET_MODE (x) == VOIDmode)
3037 return false;
3038
3039 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3040 if (REAL_VALUE_MINUS_ZERO (r))
3041 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3042 return REAL_VALUES_EQUAL (r, dconst0);
3043}
3044
70f09188
AP
3045/* Return the fixed registers used for condition codes. */
3046
3047static bool
3048aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3049{
3050 *p1 = CC_REGNUM;
3051 *p2 = INVALID_REGNUM;
3052 return true;
3053}
3054
43e9d192
IB
3055enum machine_mode
3056aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3057{
3058 /* All floating point compares return CCFP if it is an equality
3059 comparison, and CCFPE otherwise. */
3060 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3061 {
3062 switch (code)
3063 {
3064 case EQ:
3065 case NE:
3066 case UNORDERED:
3067 case ORDERED:
3068 case UNLT:
3069 case UNLE:
3070 case UNGT:
3071 case UNGE:
3072 case UNEQ:
3073 case LTGT:
3074 return CCFPmode;
3075
3076 case LT:
3077 case LE:
3078 case GT:
3079 case GE:
3080 return CCFPEmode;
3081
3082 default:
3083 gcc_unreachable ();
3084 }
3085 }
3086
3087 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3088 && y == const0_rtx
3089 && (code == EQ || code == NE || code == LT || code == GE)
a8504f22 3090 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND))
43e9d192
IB
3091 return CC_NZmode;
3092
3093 /* A compare with a shifted operand. Because of canonicalization,
3094 the comparison will have to be swapped when we emit the assembly
3095 code. */
3096 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3097 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3098 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3099 || GET_CODE (x) == LSHIFTRT
3100 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3101 return CC_SWPmode;
3102
3103 /* A compare of a mode narrower than SI mode against zero can be done
3104 by extending the value in the comparison. */
3105 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3106 && y == const0_rtx)
3107 /* Only use sign-extension if we really need it. */
3108 return ((code == GT || code == GE || code == LE || code == LT)
3109 ? CC_SESWPmode : CC_ZESWPmode);
3110
3111 /* For everything else, return CCmode. */
3112 return CCmode;
3113}
3114
3115static unsigned
3116aarch64_get_condition_code (rtx x)
3117{
3118 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3119 enum rtx_code comp_code = GET_CODE (x);
3120
3121 if (GET_MODE_CLASS (mode) != MODE_CC)
3122 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3123
3124 switch (mode)
3125 {
3126 case CCFPmode:
3127 case CCFPEmode:
3128 switch (comp_code)
3129 {
3130 case GE: return AARCH64_GE;
3131 case GT: return AARCH64_GT;
3132 case LE: return AARCH64_LS;
3133 case LT: return AARCH64_MI;
3134 case NE: return AARCH64_NE;
3135 case EQ: return AARCH64_EQ;
3136 case ORDERED: return AARCH64_VC;
3137 case UNORDERED: return AARCH64_VS;
3138 case UNLT: return AARCH64_LT;
3139 case UNLE: return AARCH64_LE;
3140 case UNGT: return AARCH64_HI;
3141 case UNGE: return AARCH64_PL;
3142 default: gcc_unreachable ();
3143 }
3144 break;
3145
3146 case CCmode:
3147 switch (comp_code)
3148 {
3149 case NE: return AARCH64_NE;
3150 case EQ: return AARCH64_EQ;
3151 case GE: return AARCH64_GE;
3152 case GT: return AARCH64_GT;
3153 case LE: return AARCH64_LE;
3154 case LT: return AARCH64_LT;
3155 case GEU: return AARCH64_CS;
3156 case GTU: return AARCH64_HI;
3157 case LEU: return AARCH64_LS;
3158 case LTU: return AARCH64_CC;
3159 default: gcc_unreachable ();
3160 }
3161 break;
3162
3163 case CC_SWPmode:
3164 case CC_ZESWPmode:
3165 case CC_SESWPmode:
3166 switch (comp_code)
3167 {
3168 case NE: return AARCH64_NE;
3169 case EQ: return AARCH64_EQ;
3170 case GE: return AARCH64_LE;
3171 case GT: return AARCH64_LT;
3172 case LE: return AARCH64_GE;
3173 case LT: return AARCH64_GT;
3174 case GEU: return AARCH64_LS;
3175 case GTU: return AARCH64_CC;
3176 case LEU: return AARCH64_CS;
3177 case LTU: return AARCH64_HI;
3178 default: gcc_unreachable ();
3179 }
3180 break;
3181
3182 case CC_NZmode:
3183 switch (comp_code)
3184 {
3185 case NE: return AARCH64_NE;
3186 case EQ: return AARCH64_EQ;
3187 case GE: return AARCH64_PL;
3188 case LT: return AARCH64_MI;
3189 default: gcc_unreachable ();
3190 }
3191 break;
3192
3193 default:
3194 gcc_unreachable ();
3195 break;
3196 }
3197}
3198
3199static unsigned
3200bit_count (unsigned HOST_WIDE_INT value)
3201{
3202 unsigned count = 0;
3203
3204 while (value)
3205 {
3206 count++;
3207 value &= value - 1;
3208 }
3209
3210 return count;
3211}
3212
3213void
3214aarch64_print_operand (FILE *f, rtx x, char code)
3215{
3216 switch (code)
3217 {
3218 case 'e':
3219 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3220 {
3221 int n;
3222
3223 if (GET_CODE (x) != CONST_INT
3224 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3225 {
3226 output_operand_lossage ("invalid operand for '%%%c'", code);
3227 return;
3228 }
3229
3230 switch (n)
3231 {
3232 case 3:
3233 fputc ('b', f);
3234 break;
3235 case 4:
3236 fputc ('h', f);
3237 break;
3238 case 5:
3239 fputc ('w', f);
3240 break;
3241 default:
3242 output_operand_lossage ("invalid operand for '%%%c'", code);
3243 return;
3244 }
3245 }
3246 break;
3247
3248 case 'p':
3249 {
3250 int n;
3251
3252 /* Print N such that 2^N == X. */
3253 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3254 {
3255 output_operand_lossage ("invalid operand for '%%%c'", code);
3256 return;
3257 }
3258
3259 asm_fprintf (f, "%d", n);
3260 }
3261 break;
3262
3263 case 'P':
3264 /* Print the number of non-zero bits in X (a const_int). */
3265 if (GET_CODE (x) != CONST_INT)
3266 {
3267 output_operand_lossage ("invalid operand for '%%%c'", code);
3268 return;
3269 }
3270
3271 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3272 break;
3273
3274 case 'H':
3275 /* Print the higher numbered register of a pair (TImode) of regs. */
3276 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3277 {
3278 output_operand_lossage ("invalid operand for '%%%c'", code);
3279 return;
3280 }
3281
01a3a324 3282 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3283 break;
3284
3285 case 'Q':
3286 /* Print the least significant register of a pair (TImode) of regs. */
3287 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3288 {
3289 output_operand_lossage ("invalid operand for '%%%c'", code);
3290 return;
3291 }
01a3a324 3292 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
43e9d192
IB
3293 break;
3294
3295 case 'R':
3296 /* Print the most significant register of a pair (TImode) of regs. */
3297 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3298 {
3299 output_operand_lossage ("invalid operand for '%%%c'", code);
3300 return;
3301 }
01a3a324 3302 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
43e9d192
IB
3303 break;
3304
3305 case 'm':
3306 /* Print a condition (eq, ne, etc). */
3307
3308 /* CONST_TRUE_RTX means always -- that's the default. */
3309 if (x == const_true_rtx)
3310 return;
3311
3312 if (!COMPARISON_P (x))
3313 {
3314 output_operand_lossage ("invalid operand for '%%%c'", code);
3315 return;
3316 }
3317
3318 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3319 break;
3320
3321 case 'M':
3322 /* Print the inverse of a condition (eq <-> ne, etc). */
3323
3324 /* CONST_TRUE_RTX means never -- that's the default. */
3325 if (x == const_true_rtx)
3326 {
3327 fputs ("nv", f);
3328 return;
3329 }
3330
3331 if (!COMPARISON_P (x))
3332 {
3333 output_operand_lossage ("invalid operand for '%%%c'", code);
3334 return;
3335 }
3336
3337 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3338 (aarch64_get_condition_code (x))], f);
3339 break;
3340
3341 case 'b':
3342 case 'h':
3343 case 's':
3344 case 'd':
3345 case 'q':
3346 /* Print a scalar FP/SIMD register name. */
3347 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3348 {
3349 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3350 return;
3351 }
50ce6f88 3352 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3353 break;
3354
3355 case 'S':
3356 case 'T':
3357 case 'U':
3358 case 'V':
3359 /* Print the first FP/SIMD register name in a list. */
3360 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3361 {
3362 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3363 return;
3364 }
50ce6f88 3365 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3366 break;
3367
a05c0ddf
IB
3368 case 'X':
3369 /* Print integer constant in hex. */
3370 if (GET_CODE (x) != CONST_INT)
3371 {
3372 output_operand_lossage ("invalid operand for '%%%c'", code);
3373 return;
3374 }
3375 asm_fprintf (f, "0x%x", UINTVAL (x));
3376 break;
3377
43e9d192
IB
3378 case 'w':
3379 case 'x':
3380 /* Print a general register name or the zero register (32-bit or
3381 64-bit). */
3520f7cc
JG
3382 if (x == const0_rtx
3383 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3384 {
50ce6f88 3385 asm_fprintf (f, "%czr", code);
43e9d192
IB
3386 break;
3387 }
3388
3389 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3390 {
50ce6f88 3391 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3392 break;
3393 }
3394
3395 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3396 {
50ce6f88 3397 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3398 break;
3399 }
3400
3401 /* Fall through */
3402
3403 case 0:
3404 /* Print a normal operand, if it's a general register, then we
3405 assume DImode. */
3406 if (x == NULL)
3407 {
3408 output_operand_lossage ("missing operand");
3409 return;
3410 }
3411
3412 switch (GET_CODE (x))
3413 {
3414 case REG:
01a3a324 3415 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3416 break;
3417
3418 case MEM:
3419 aarch64_memory_reference_mode = GET_MODE (x);
3420 output_address (XEXP (x, 0));
3421 break;
3422
3423 case LABEL_REF:
3424 case SYMBOL_REF:
3425 output_addr_const (asm_out_file, x);
3426 break;
3427
3428 case CONST_INT:
3429 asm_fprintf (f, "%wd", INTVAL (x));
3430 break;
3431
3432 case CONST_VECTOR:
3520f7cc
JG
3433 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3434 {
3435 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3436 HOST_WIDE_INT_MIN,
3437 HOST_WIDE_INT_MAX));
3438 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3439 }
3440 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3441 {
3442 fputc ('0', f);
3443 }
3444 else
3445 gcc_unreachable ();
43e9d192
IB
3446 break;
3447
3520f7cc
JG
3448 case CONST_DOUBLE:
3449 /* CONST_DOUBLE can represent a double-width integer.
3450 In this case, the mode of x is VOIDmode. */
3451 if (GET_MODE (x) == VOIDmode)
3452 ; /* Do Nothing. */
3453 else if (aarch64_float_const_zero_rtx_p (x))
3454 {
3455 fputc ('0', f);
3456 break;
3457 }
3458 else if (aarch64_float_const_representable_p (x))
3459 {
3460#define buf_size 20
3461 char float_buf[buf_size] = {'\0'};
3462 REAL_VALUE_TYPE r;
3463 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3464 real_to_decimal_for_mode (float_buf, &r,
3465 buf_size, buf_size,
3466 1, GET_MODE (x));
3467 asm_fprintf (asm_out_file, "%s", float_buf);
3468 break;
3469#undef buf_size
3470 }
3471 output_operand_lossage ("invalid constant");
3472 return;
43e9d192
IB
3473 default:
3474 output_operand_lossage ("invalid operand");
3475 return;
3476 }
3477 break;
3478
3479 case 'A':
3480 if (GET_CODE (x) == HIGH)
3481 x = XEXP (x, 0);
3482
3483 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3484 {
3485 case SYMBOL_SMALL_GOT:
3486 asm_fprintf (asm_out_file, ":got:");
3487 break;
3488
3489 case SYMBOL_SMALL_TLSGD:
3490 asm_fprintf (asm_out_file, ":tlsgd:");
3491 break;
3492
3493 case SYMBOL_SMALL_TLSDESC:
3494 asm_fprintf (asm_out_file, ":tlsdesc:");
3495 break;
3496
3497 case SYMBOL_SMALL_GOTTPREL:
3498 asm_fprintf (asm_out_file, ":gottprel:");
3499 break;
3500
3501 case SYMBOL_SMALL_TPREL:
3502 asm_fprintf (asm_out_file, ":tprel:");
3503 break;
3504
3505 default:
3506 break;
3507 }
3508 output_addr_const (asm_out_file, x);
3509 break;
3510
3511 case 'L':
3512 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3513 {
3514 case SYMBOL_SMALL_GOT:
3515 asm_fprintf (asm_out_file, ":lo12:");
3516 break;
3517
3518 case SYMBOL_SMALL_TLSGD:
3519 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3520 break;
3521
3522 case SYMBOL_SMALL_TLSDESC:
3523 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3524 break;
3525
3526 case SYMBOL_SMALL_GOTTPREL:
3527 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3528 break;
3529
3530 case SYMBOL_SMALL_TPREL:
3531 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3532 break;
3533
3534 default:
3535 break;
3536 }
3537 output_addr_const (asm_out_file, x);
3538 break;
3539
3540 case 'G':
3541
3542 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3543 {
3544 case SYMBOL_SMALL_TPREL:
3545 asm_fprintf (asm_out_file, ":tprel_hi12:");
3546 break;
3547 default:
3548 break;
3549 }
3550 output_addr_const (asm_out_file, x);
3551 break;
3552
3553 default:
3554 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3555 return;
3556 }
3557}
3558
3559void
3560aarch64_print_operand_address (FILE *f, rtx x)
3561{
3562 struct aarch64_address_info addr;
3563
3564 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3565 MEM, true))
3566 switch (addr.type)
3567 {
3568 case ADDRESS_REG_IMM:
3569 if (addr.offset == const0_rtx)
01a3a324 3570 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3571 else
01a3a324 3572 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3573 INTVAL (addr.offset));
3574 return;
3575
3576 case ADDRESS_REG_REG:
3577 if (addr.shift == 0)
01a3a324
N
3578 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3579 reg_names [REGNO (addr.offset)]);
43e9d192 3580 else
01a3a324
N
3581 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3582 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3583 return;
3584
3585 case ADDRESS_REG_UXTW:
3586 if (addr.shift == 0)
01a3a324 3587 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3588 REGNO (addr.offset) - R0_REGNUM);
3589 else
01a3a324 3590 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3591 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3592 return;
3593
3594 case ADDRESS_REG_SXTW:
3595 if (addr.shift == 0)
01a3a324 3596 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3597 REGNO (addr.offset) - R0_REGNUM);
3598 else
01a3a324 3599 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3600 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3601 return;
3602
3603 case ADDRESS_REG_WB:
3604 switch (GET_CODE (x))
3605 {
3606 case PRE_INC:
01a3a324 3607 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3608 GET_MODE_SIZE (aarch64_memory_reference_mode));
3609 return;
3610 case POST_INC:
01a3a324 3611 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3612 GET_MODE_SIZE (aarch64_memory_reference_mode));
3613 return;
3614 case PRE_DEC:
01a3a324 3615 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3616 GET_MODE_SIZE (aarch64_memory_reference_mode));
3617 return;
3618 case POST_DEC:
01a3a324 3619 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3620 GET_MODE_SIZE (aarch64_memory_reference_mode));
3621 return;
3622 case PRE_MODIFY:
01a3a324 3623 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3624 INTVAL (addr.offset));
3625 return;
3626 case POST_MODIFY:
01a3a324 3627 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3628 INTVAL (addr.offset));
3629 return;
3630 default:
3631 break;
3632 }
3633 break;
3634
3635 case ADDRESS_LO_SUM:
01a3a324 3636 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3637 output_addr_const (f, addr.offset);
3638 asm_fprintf (f, "]");
3639 return;
3640
3641 case ADDRESS_SYMBOLIC:
3642 break;
3643 }
3644
3645 output_addr_const (f, x);
3646}
3647
3648void
3649aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3650 int labelno ATTRIBUTE_UNUSED)
3651{
3652 sorry ("function profiling");
3653}
3654
3655bool
3656aarch64_label_mentioned_p (rtx x)
3657{
3658 const char *fmt;
3659 int i;
3660
3661 if (GET_CODE (x) == LABEL_REF)
3662 return true;
3663
3664 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3665 referencing instruction, but they are constant offsets, not
3666 symbols. */
3667 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3668 return false;
3669
3670 fmt = GET_RTX_FORMAT (GET_CODE (x));
3671 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3672 {
3673 if (fmt[i] == 'E')
3674 {
3675 int j;
3676
3677 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3678 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3679 return 1;
3680 }
3681 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3682 return 1;
3683 }
3684
3685 return 0;
3686}
3687
3688/* Implement REGNO_REG_CLASS. */
3689
3690enum reg_class
3691aarch64_regno_regclass (unsigned regno)
3692{
3693 if (GP_REGNUM_P (regno))
3694 return CORE_REGS;
3695
3696 if (regno == SP_REGNUM)
3697 return STACK_REG;
3698
3699 if (regno == FRAME_POINTER_REGNUM
3700 || regno == ARG_POINTER_REGNUM)
3701 return CORE_REGS;
3702
3703 if (FP_REGNUM_P (regno))
3704 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3705
3706 return NO_REGS;
3707}
3708
3709/* Try a machine-dependent way of reloading an illegitimate address
3710 operand. If we find one, push the reload and return the new rtx. */
3711
3712rtx
3713aarch64_legitimize_reload_address (rtx *x_p,
3714 enum machine_mode mode,
3715 int opnum, int type,
3716 int ind_levels ATTRIBUTE_UNUSED)
3717{
3718 rtx x = *x_p;
3719
3720 /* Do not allow mem (plus (reg, const)) if vector mode. */
3721 if (aarch64_vector_mode_p (mode)
3722 && GET_CODE (x) == PLUS
3723 && REG_P (XEXP (x, 0))
3724 && CONST_INT_P (XEXP (x, 1)))
3725 {
3726 rtx orig_rtx = x;
3727 x = copy_rtx (x);
3728 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3729 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3730 opnum, (enum reload_type) type);
3731 return x;
3732 }
3733
3734 /* We must recognize output that we have already generated ourselves. */
3735 if (GET_CODE (x) == PLUS
3736 && GET_CODE (XEXP (x, 0)) == PLUS
3737 && REG_P (XEXP (XEXP (x, 0), 0))
3738 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3739 && CONST_INT_P (XEXP (x, 1)))
3740 {
3741 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3742 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3743 opnum, (enum reload_type) type);
3744 return x;
3745 }
3746
3747 /* We wish to handle large displacements off a base register by splitting
3748 the addend across an add and the mem insn. This can cut the number of
3749 extra insns needed from 3 to 1. It is only useful for load/store of a
3750 single register with 12 bit offset field. */
3751 if (GET_CODE (x) == PLUS
3752 && REG_P (XEXP (x, 0))
3753 && CONST_INT_P (XEXP (x, 1))
3754 && HARD_REGISTER_P (XEXP (x, 0))
3755 && mode != TImode
3756 && mode != TFmode
3757 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3758 {
3759 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3760 HOST_WIDE_INT low = val & 0xfff;
3761 HOST_WIDE_INT high = val - low;
3762 HOST_WIDE_INT offs;
3763 rtx cst;
3764
3765 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3766 BLKmode alignment. */
3767 if (GET_MODE_SIZE (mode) == 0)
3768 return NULL_RTX;
3769
3770 offs = low % GET_MODE_SIZE (mode);
3771
3772 /* Align misaligned offset by adjusting high part to compensate. */
3773 if (offs != 0)
3774 {
3775 if (aarch64_uimm12_shift (high + offs))
3776 {
3777 /* Align down. */
3778 low = low - offs;
3779 high = high + offs;
3780 }
3781 else
3782 {
3783 /* Align up. */
3784 offs = GET_MODE_SIZE (mode) - offs;
3785 low = low + offs;
3786 high = high + (low & 0x1000) - offs;
3787 low &= 0xfff;
3788 }
3789 }
3790
3791 /* Check for overflow. */
3792 if (high + low != val)
3793 return NULL_RTX;
3794
3795 cst = GEN_INT (high);
3796 if (!aarch64_uimm12_shift (high))
3797 cst = force_const_mem (Pmode, cst);
3798
3799 /* Reload high part into base reg, leaving the low part
3800 in the mem instruction. */
3801 x = gen_rtx_PLUS (Pmode,
3802 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3803 GEN_INT (low));
3804
3805 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3806 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3807 opnum, (enum reload_type) type);
3808 return x;
3809 }
3810
3811 return NULL_RTX;
3812}
3813
3814
3815static reg_class_t
3816aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3817 reg_class_t rclass,
3818 enum machine_mode mode,
3819 secondary_reload_info *sri)
3820{
3821 /* Address expressions of the form PLUS (SP, large_offset) need two
3822 scratch registers, one for the constant, and one for holding a
3823 copy of SP, since SP cannot be used on the RHS of an add-reg
3824 instruction. */
3825 if (mode == DImode
3826 && GET_CODE (x) == PLUS
3827 && XEXP (x, 0) == stack_pointer_rtx
3828 && CONST_INT_P (XEXP (x, 1))
3829 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3830 {
3831 sri->icode = CODE_FOR_reload_sp_immediate;
3832 return NO_REGS;
3833 }
3834
3835 /* Without the TARGET_SIMD instructions we cannot move a Q register
3836 to a Q register directly. We need a scratch. */
3837 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3838 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3839 && reg_class_subset_p (rclass, FP_REGS))
3840 {
3841 if (mode == TFmode)
3842 sri->icode = CODE_FOR_aarch64_reload_movtf;
3843 else if (mode == TImode)
3844 sri->icode = CODE_FOR_aarch64_reload_movti;
3845 return NO_REGS;
3846 }
3847
3848 /* A TFmode or TImode memory access should be handled via an FP_REGS
3849 because AArch64 has richer addressing modes for LDR/STR instructions
3850 than LDP/STP instructions. */
3851 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3852 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3853 return FP_REGS;
3854
3855 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3856 return CORE_REGS;
3857
3858 return NO_REGS;
3859}
3860
3861static bool
3862aarch64_can_eliminate (const int from, const int to)
3863{
3864 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3865 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3866
3867 if (frame_pointer_needed)
3868 {
3869 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3870 return true;
3871 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3872 return false;
3873 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3874 && !cfun->calls_alloca)
3875 return true;
3876 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3877 return true;
3878 return false;
3879 }
3880 else
3881 {
777e6976
IB
3882 /* If we decided that we didn't need a leaf frame pointer but then used
3883 LR in the function, then we'll want a frame pointer after all, so
3884 prevent this elimination to ensure a frame pointer is used.
3885
3886 NOTE: the original value of flag_omit_frame_pointer gets trashed
3887 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3888 of faked_omit_frame_pointer here (which is true when we always
3889 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3890 pointers when LR is clobbered). */
43e9d192 3891 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
777e6976
IB
3892 && df_regs_ever_live_p (LR_REGNUM)
3893 && faked_omit_frame_pointer)
43e9d192
IB
3894 return false;
3895 }
777e6976 3896
43e9d192
IB
3897 return true;
3898}
3899
3900HOST_WIDE_INT
3901aarch64_initial_elimination_offset (unsigned from, unsigned to)
3902{
3903 HOST_WIDE_INT frame_size;
3904 HOST_WIDE_INT offset;
3905
3906 aarch64_layout_frame ();
3907 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3908 + crtl->outgoing_args_size
3909 + cfun->machine->saved_varargs_size);
3910
3911 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3912 offset = frame_size;
3913
3914 if (to == HARD_FRAME_POINTER_REGNUM)
3915 {
3916 if (from == ARG_POINTER_REGNUM)
3917 return offset - crtl->outgoing_args_size;
3918
3919 if (from == FRAME_POINTER_REGNUM)
3920 return cfun->machine->frame.saved_regs_size;
3921 }
3922
3923 if (to == STACK_POINTER_REGNUM)
3924 {
3925 if (from == FRAME_POINTER_REGNUM)
3926 {
3927 HOST_WIDE_INT elim = crtl->outgoing_args_size
3928 + cfun->machine->frame.saved_regs_size
3929 - cfun->machine->frame.fp_lr_offset;
3930 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3931 return elim;
3932 }
3933 }
3934
3935 return offset;
3936}
3937
3938
3939/* Implement RETURN_ADDR_RTX. We do not support moving back to a
3940 previous frame. */
3941
3942rtx
3943aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3944{
3945 if (count != 0)
3946 return const0_rtx;
3947 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3948}
3949
3950
3951static void
3952aarch64_asm_trampoline_template (FILE *f)
3953{
01a3a324
N
3954 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3955 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3956 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192
IB
3957 assemble_aligned_integer (4, const0_rtx);
3958 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3959 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3960}
3961
3962unsigned
3963aarch64_trampoline_size (void)
3964{
3965 return 32; /* 3 insns + padding + 2 dwords. */
3966}
3967
3968static void
3969aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3970{
3971 rtx fnaddr, mem, a_tramp;
3972
3973 /* Don't need to copy the trailing D-words, we fill those in below. */
3974 emit_block_move (m_tramp, assemble_trampoline_template (),
3975 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3976 mem = adjust_address (m_tramp, DImode, 16);
3977 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3978 emit_move_insn (mem, fnaddr);
3979
3980 mem = adjust_address (m_tramp, DImode, 24);
3981 emit_move_insn (mem, chain_value);
3982
3983 /* XXX We should really define a "clear_cache" pattern and use
3984 gen_clear_cache(). */
3985 a_tramp = XEXP (m_tramp, 0);
3986 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3987 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3988 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3989}
3990
3991static unsigned char
3992aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3993{
3994 switch (regclass)
3995 {
3996 case CORE_REGS:
3997 case POINTER_REGS:
3998 case GENERAL_REGS:
3999 case ALL_REGS:
4000 case FP_REGS:
4001 case FP_LO_REGS:
4002 return
4003 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4004 (GET_MODE_SIZE (mode) + 7) / 8;
4005 case STACK_REG:
4006 return 1;
4007
4008 case NO_REGS:
4009 return 0;
4010
4011 default:
4012 break;
4013 }
4014 gcc_unreachable ();
4015}
4016
4017static reg_class_t
4018aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4019{
4020 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4021 ? GENERAL_REGS : regclass);
4022}
4023
4024void
4025aarch64_asm_output_labelref (FILE* f, const char *name)
4026{
4027 asm_fprintf (f, "%U%s", name);
4028}
4029
4030static void
4031aarch64_elf_asm_constructor (rtx symbol, int priority)
4032{
4033 if (priority == DEFAULT_INIT_PRIORITY)
4034 default_ctor_section_asm_out_constructor (symbol, priority);
4035 else
4036 {
4037 section *s;
4038 char buf[18];
4039 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4040 s = get_section (buf, SECTION_WRITE, NULL);
4041 switch_to_section (s);
4042 assemble_align (POINTER_SIZE);
4043 fputs ("\t.dword\t", asm_out_file);
4044 output_addr_const (asm_out_file, symbol);
4045 fputc ('\n', asm_out_file);
4046 }
4047}
4048
4049static void
4050aarch64_elf_asm_destructor (rtx symbol, int priority)
4051{
4052 if (priority == DEFAULT_INIT_PRIORITY)
4053 default_dtor_section_asm_out_destructor (symbol, priority);
4054 else
4055 {
4056 section *s;
4057 char buf[18];
4058 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4059 s = get_section (buf, SECTION_WRITE, NULL);
4060 switch_to_section (s);
4061 assemble_align (POINTER_SIZE);
4062 fputs ("\t.dword\t", asm_out_file);
4063 output_addr_const (asm_out_file, symbol);
4064 fputc ('\n', asm_out_file);
4065 }
4066}
4067
4068const char*
4069aarch64_output_casesi (rtx *operands)
4070{
4071 char buf[100];
4072 char label[100];
9d821fa5 4073 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
43e9d192
IB
4074 int index;
4075 static const char *const patterns[4][2] =
4076 {
4077 {
4078 "ldrb\t%w3, [%0,%w1,uxtw]",
4079 "add\t%3, %4, %w3, sxtb #2"
4080 },
4081 {
4082 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4083 "add\t%3, %4, %w3, sxth #2"
4084 },
4085 {
4086 "ldr\t%w3, [%0,%w1,uxtw #2]",
4087 "add\t%3, %4, %w3, sxtw #2"
4088 },
4089 /* We assume that DImode is only generated when not optimizing and
4090 that we don't really need 64-bit address offsets. That would
4091 imply an object file with 8GB of code in a single function! */
4092 {
4093 "ldr\t%w3, [%0,%w1,uxtw #2]",
4094 "add\t%3, %4, %w3, sxtw #2"
4095 }
4096 };
4097
4098 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4099
4100 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4101
4102 gcc_assert (index >= 0 && index <= 3);
4103
4104 /* Need to implement table size reduction, by chaning the code below. */
4105 output_asm_insn (patterns[index][0], operands);
4106 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4107 snprintf (buf, sizeof (buf),
4108 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4109 output_asm_insn (buf, operands);
4110 output_asm_insn (patterns[index][1], operands);
4111 output_asm_insn ("br\t%3", operands);
4112 assemble_label (asm_out_file, label);
4113 return "";
4114}
4115
4116
4117/* Return size in bits of an arithmetic operand which is shifted/scaled and
4118 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4119 operator. */
4120
4121int
4122aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4123{
4124 if (shift >= 0 && shift <= 3)
4125 {
4126 int size;
4127 for (size = 8; size <= 32; size *= 2)
4128 {
4129 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4130 if (mask == bits << shift)
4131 return size;
4132 }
4133 }
4134 return 0;
4135}
4136
4137static bool
4138aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4139 const_rtx x ATTRIBUTE_UNUSED)
4140{
4141 /* We can't use blocks for constants when we're using a per-function
4142 constant pool. */
4143 return false;
4144}
4145
4146static section *
4147aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4148 rtx x ATTRIBUTE_UNUSED,
4149 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4150{
4151 /* Force all constant pool entries into the current function section. */
4152 return function_section (current_function_decl);
4153}
4154
4155
4156/* Costs. */
4157
4158/* Helper function for rtx cost calculation. Strip a shift expression
4159 from X. Returns the inner operand if successful, or the original
4160 expression on failure. */
4161static rtx
4162aarch64_strip_shift (rtx x)
4163{
4164 rtx op = x;
4165
4166 if ((GET_CODE (op) == ASHIFT
4167 || GET_CODE (op) == ASHIFTRT
4168 || GET_CODE (op) == LSHIFTRT)
4169 && CONST_INT_P (XEXP (op, 1)))
4170 return XEXP (op, 0);
4171
4172 if (GET_CODE (op) == MULT
4173 && CONST_INT_P (XEXP (op, 1))
4174 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4175 return XEXP (op, 0);
4176
4177 return x;
4178}
4179
4180/* Helper function for rtx cost calculation. Strip a shift or extend
4181 expression from X. Returns the inner operand if successful, or the
4182 original expression on failure. We deal with a number of possible
4183 canonicalization variations here. */
4184static rtx
4185aarch64_strip_shift_or_extend (rtx x)
4186{
4187 rtx op = x;
4188
4189 /* Zero and sign extraction of a widened value. */
4190 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4191 && XEXP (op, 2) == const0_rtx
4192 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4193 XEXP (op, 1)))
4194 return XEXP (XEXP (op, 0), 0);
4195
4196 /* It can also be represented (for zero-extend) as an AND with an
4197 immediate. */
4198 if (GET_CODE (op) == AND
4199 && GET_CODE (XEXP (op, 0)) == MULT
4200 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4201 && CONST_INT_P (XEXP (op, 1))
4202 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4203 INTVAL (XEXP (op, 1))) != 0)
4204 return XEXP (XEXP (op, 0), 0);
4205
4206 /* Now handle extended register, as this may also have an optional
4207 left shift by 1..4. */
4208 if (GET_CODE (op) == ASHIFT
4209 && CONST_INT_P (XEXP (op, 1))
4210 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4211 op = XEXP (op, 0);
4212
4213 if (GET_CODE (op) == ZERO_EXTEND
4214 || GET_CODE (op) == SIGN_EXTEND)
4215 op = XEXP (op, 0);
4216
4217 if (op != x)
4218 return op;
4219
4220 return aarch64_strip_shift (x);
4221}
4222
4223/* Calculate the cost of calculating X, storing it in *COST. Result
4224 is true if the total cost of the operation has now been calculated. */
4225static bool
4226aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4227 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4228{
4229 rtx op0, op1;
4230 const struct cpu_rtx_cost_table *extra_cost
4231 = aarch64_tune_params->insn_extra_cost;
4232
4233 switch (code)
4234 {
4235 case SET:
4236 op0 = SET_DEST (x);
4237 op1 = SET_SRC (x);
4238
4239 switch (GET_CODE (op0))
4240 {
4241 case MEM:
4242 if (speed)
4243 *cost += extra_cost->memory_store;
4244
4245 if (op1 != const0_rtx)
4246 *cost += rtx_cost (op1, SET, 1, speed);
4247 return true;
4248
4249 case SUBREG:
4250 if (! REG_P (SUBREG_REG (op0)))
4251 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4252 /* Fall through. */
4253 case REG:
4254 /* Cost is just the cost of the RHS of the set. */
4255 *cost += rtx_cost (op1, SET, 1, true);
4256 return true;
4257
4258 case ZERO_EXTRACT: /* Bit-field insertion. */
4259 case SIGN_EXTRACT:
4260 /* Strip any redundant widening of the RHS to meet the width of
4261 the target. */
4262 if (GET_CODE (op1) == SUBREG)
4263 op1 = SUBREG_REG (op1);
4264 if ((GET_CODE (op1) == ZERO_EXTEND
4265 || GET_CODE (op1) == SIGN_EXTEND)
4266 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4267 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4268 >= INTVAL (XEXP (op0, 1))))
4269 op1 = XEXP (op1, 0);
4270 *cost += rtx_cost (op1, SET, 1, speed);
4271 return true;
4272
4273 default:
4274 break;
4275 }
4276 return false;
4277
4278 case MEM:
4279 if (speed)
4280 *cost += extra_cost->memory_load;
4281
4282 return true;
4283
4284 case NEG:
4285 op0 = CONST0_RTX (GET_MODE (x));
4286 op1 = XEXP (x, 0);
4287 goto cost_minus;
4288
4289 case COMPARE:
4290 op0 = XEXP (x, 0);
4291 op1 = XEXP (x, 1);
4292
4293 if (op1 == const0_rtx
4294 && GET_CODE (op0) == AND)
4295 {
4296 x = op0;
4297 goto cost_logic;
4298 }
4299
4300 /* Comparisons can work if the order is swapped.
4301 Canonicalization puts the more complex operation first, but
4302 we want it in op1. */
4303 if (! (REG_P (op0)
4304 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4305 {
4306 op0 = XEXP (x, 1);
4307 op1 = XEXP (x, 0);
4308 }
4309 goto cost_minus;
4310
4311 case MINUS:
4312 op0 = XEXP (x, 0);
4313 op1 = XEXP (x, 1);
4314
4315 cost_minus:
4316 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4317 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4318 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4319 {
4320 if (op0 != const0_rtx)
4321 *cost += rtx_cost (op0, MINUS, 0, speed);
4322
4323 if (CONST_INT_P (op1))
4324 {
4325 if (!aarch64_uimm12_shift (INTVAL (op1)))
4326 *cost += rtx_cost (op1, MINUS, 1, speed);
4327 }
4328 else
4329 {
4330 op1 = aarch64_strip_shift_or_extend (op1);
4331 *cost += rtx_cost (op1, MINUS, 1, speed);
4332 }
4333 return true;
4334 }
4335
4336 return false;
4337
4338 case PLUS:
4339 op0 = XEXP (x, 0);
4340 op1 = XEXP (x, 1);
4341
4342 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4343 {
4344 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4345 {
4346 *cost += rtx_cost (op0, PLUS, 0, speed);
4347 }
4348 else
4349 {
4350 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4351
4352 if (new_op0 == op0
4353 && GET_CODE (op0) == MULT)
4354 {
4355 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4356 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4357 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4358 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4359 {
4360 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4361 speed)
4362 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4363 speed)
4364 + rtx_cost (op1, PLUS, 1, speed));
4365 if (speed)
4366 *cost += extra_cost->int_multiply_extend_add;
4367 return true;
4368 }
4369 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4370 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4371 + rtx_cost (op1, PLUS, 1, speed));
4372
4373 if (speed)
4374 *cost += extra_cost->int_multiply_add;
4375 }
4376
4377 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4378 + rtx_cost (op1, PLUS, 1, speed));
4379 }
4380 return true;
4381 }
4382
4383 return false;
4384
4385 case IOR:
4386 case XOR:
4387 case AND:
4388 cost_logic:
4389 op0 = XEXP (x, 0);
4390 op1 = XEXP (x, 1);
4391
4392 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4393 {
4394 if (CONST_INT_P (op1)
4395 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4396 {
4397 *cost += rtx_cost (op0, AND, 0, speed);
4398 }
4399 else
4400 {
4401 if (GET_CODE (op0) == NOT)
4402 op0 = XEXP (op0, 0);
4403 op0 = aarch64_strip_shift (op0);
4404 *cost += (rtx_cost (op0, AND, 0, speed)
4405 + rtx_cost (op1, AND, 1, speed));
4406 }
4407 return true;
4408 }
4409 return false;
4410
4411 case ZERO_EXTEND:
4412 if ((GET_MODE (x) == DImode
4413 && GET_MODE (XEXP (x, 0)) == SImode)
4414 || GET_CODE (XEXP (x, 0)) == MEM)
4415 {
4416 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4417 return true;
4418 }
4419 return false;
4420
4421 case SIGN_EXTEND:
4422 if (GET_CODE (XEXP (x, 0)) == MEM)
4423 {
4424 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4425 return true;
4426 }
4427 return false;
4428
4429 case ROTATE:
4430 if (!CONST_INT_P (XEXP (x, 1)))
4431 *cost += COSTS_N_INSNS (2);
4432 /* Fall through. */
4433 case ROTATERT:
4434 case LSHIFTRT:
4435 case ASHIFT:
4436 case ASHIFTRT:
4437
4438 /* Shifting by a register often takes an extra cycle. */
4439 if (speed && !CONST_INT_P (XEXP (x, 1)))
4440 *cost += extra_cost->register_shift;
4441
4442 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4443 return true;
4444
4445 case HIGH:
4446 if (!CONSTANT_P (XEXP (x, 0)))
4447 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4448 return true;
4449
4450 case LO_SUM:
4451 if (!CONSTANT_P (XEXP (x, 1)))
4452 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4453 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4454 return true;
4455
4456 case ZERO_EXTRACT:
4457 case SIGN_EXTRACT:
4458 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4459 return true;
4460
4461 case MULT:
4462 op0 = XEXP (x, 0);
4463 op1 = XEXP (x, 1);
4464
4465 *cost = COSTS_N_INSNS (1);
4466 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4467 {
4468 if (CONST_INT_P (op1)
4469 && exact_log2 (INTVAL (op1)) > 0)
4470 {
4471 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4472 return true;
4473 }
4474
4475 if ((GET_CODE (op0) == ZERO_EXTEND
4476 && GET_CODE (op1) == ZERO_EXTEND)
4477 || (GET_CODE (op0) == SIGN_EXTEND
4478 && GET_CODE (op1) == SIGN_EXTEND))
4479 {
4480 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4481 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4482 if (speed)
4483 *cost += extra_cost->int_multiply_extend;
4484 return true;
4485 }
4486
4487 if (speed)
4488 *cost += extra_cost->int_multiply;
4489 }
4490 else if (speed)
4491 {
4492 if (GET_MODE (x) == DFmode)
4493 *cost += extra_cost->double_multiply;
4494 else if (GET_MODE (x) == SFmode)
4495 *cost += extra_cost->float_multiply;
4496 }
4497
4498 return false; /* All arguments need to be in registers. */
4499
4500 case MOD:
4501 case UMOD:
4502 *cost = COSTS_N_INSNS (2);
4503 if (speed)
4504 {
4505 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4506 *cost += (extra_cost->int_multiply_add
4507 + extra_cost->int_divide);
4508 else if (GET_MODE (x) == DFmode)
4509 *cost += (extra_cost->double_multiply
4510 + extra_cost->double_divide);
4511 else if (GET_MODE (x) == SFmode)
4512 *cost += (extra_cost->float_multiply
4513 + extra_cost->float_divide);
4514 }
4515 return false; /* All arguments need to be in registers. */
4516
4517 case DIV:
4518 case UDIV:
4519 *cost = COSTS_N_INSNS (1);
4520 if (speed)
4521 {
4522 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4523 *cost += extra_cost->int_divide;
4524 else if (GET_MODE (x) == DFmode)
4525 *cost += extra_cost->double_divide;
4526 else if (GET_MODE (x) == SFmode)
4527 *cost += extra_cost->float_divide;
4528 }
4529 return false; /* All arguments need to be in registers. */
4530
4531 default:
4532 break;
4533 }
4534 return false;
4535}
4536
4537static int
4538aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4539 enum machine_mode mode ATTRIBUTE_UNUSED,
4540 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4541{
4542 enum rtx_code c = GET_CODE (x);
4543 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4544
4545 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4546 return addr_cost->pre_modify;
4547
4548 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4549 return addr_cost->post_modify;
4550
4551 if (c == PLUS)
4552 {
4553 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4554 return addr_cost->imm_offset;
4555 else if (GET_CODE (XEXP (x, 0)) == MULT
4556 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4557 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4558 return addr_cost->register_extend;
4559
4560 return addr_cost->register_offset;
4561 }
4562 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4563 return addr_cost->imm_offset;
4564
4565 return 0;
4566}
4567
4568static int
4569aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4570 reg_class_t from, reg_class_t to)
4571{
4572 const struct cpu_regmove_cost *regmove_cost
4573 = aarch64_tune_params->regmove_cost;
4574
4575 if (from == GENERAL_REGS && to == GENERAL_REGS)
4576 return regmove_cost->GP2GP;
4577 else if (from == GENERAL_REGS)
4578 return regmove_cost->GP2FP;
4579 else if (to == GENERAL_REGS)
4580 return regmove_cost->FP2GP;
4581
4582 /* When AdvSIMD instructions are disabled it is not possible to move
4583 a 128-bit value directly between Q registers. This is handled in
4584 secondary reload. A general register is used as a scratch to move
4585 the upper DI value and the lower DI value is moved directly,
4586 hence the cost is the sum of three moves. */
4587
4588 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4589 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4590
4591 return regmove_cost->FP2FP;
4592}
4593
4594static int
4595aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4596 reg_class_t rclass ATTRIBUTE_UNUSED,
4597 bool in ATTRIBUTE_UNUSED)
4598{
4599 return aarch64_tune_params->memmov_cost;
4600}
4601
4602static void initialize_aarch64_code_model (void);
4603
4604/* Parse the architecture extension string. */
4605
4606static void
4607aarch64_parse_extension (char *str)
4608{
4609 /* The extension string is parsed left to right. */
4610 const struct aarch64_option_extension *opt = NULL;
4611
4612 /* Flag to say whether we are adding or removing an extension. */
4613 int adding_ext = -1;
4614
4615 while (str != NULL && *str != 0)
4616 {
4617 char *ext;
4618 size_t len;
4619
4620 str++;
4621 ext = strchr (str, '+');
4622
4623 if (ext != NULL)
4624 len = ext - str;
4625 else
4626 len = strlen (str);
4627
4628 if (len >= 2 && strncmp (str, "no", 2) == 0)
4629 {
4630 adding_ext = 0;
4631 len -= 2;
4632 str += 2;
4633 }
4634 else if (len > 0)
4635 adding_ext = 1;
4636
4637 if (len == 0)
4638 {
4639 error ("missing feature modifier after %qs", "+no");
4640 return;
4641 }
4642
4643 /* Scan over the extensions table trying to find an exact match. */
4644 for (opt = all_extensions; opt->name != NULL; opt++)
4645 {
4646 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4647 {
4648 /* Add or remove the extension. */
4649 if (adding_ext)
4650 aarch64_isa_flags |= opt->flags_on;
4651 else
4652 aarch64_isa_flags &= ~(opt->flags_off);
4653 break;
4654 }
4655 }
4656
4657 if (opt->name == NULL)
4658 {
4659 /* Extension not found in list. */
4660 error ("unknown feature modifier %qs", str);
4661 return;
4662 }
4663
4664 str = ext;
4665 };
4666
4667 return;
4668}
4669
4670/* Parse the ARCH string. */
4671
4672static void
4673aarch64_parse_arch (void)
4674{
4675 char *ext;
4676 const struct processor *arch;
4677 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4678 size_t len;
4679
4680 strcpy (str, aarch64_arch_string);
4681
4682 ext = strchr (str, '+');
4683
4684 if (ext != NULL)
4685 len = ext - str;
4686 else
4687 len = strlen (str);
4688
4689 if (len == 0)
4690 {
4691 error ("missing arch name in -march=%qs", str);
4692 return;
4693 }
4694
4695 /* Loop through the list of supported ARCHs to find a match. */
4696 for (arch = all_architectures; arch->name != NULL; arch++)
4697 {
4698 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4699 {
4700 selected_arch = arch;
4701 aarch64_isa_flags = selected_arch->flags;
4702 selected_cpu = &all_cores[selected_arch->core];
4703
4704 if (ext != NULL)
4705 {
4706 /* ARCH string contains at least one extension. */
4707 aarch64_parse_extension (ext);
4708 }
4709
4710 return;
4711 }
4712 }
4713
4714 /* ARCH name not found in list. */
4715 error ("unknown value %qs for -march", str);
4716 return;
4717}
4718
4719/* Parse the CPU string. */
4720
4721static void
4722aarch64_parse_cpu (void)
4723{
4724 char *ext;
4725 const struct processor *cpu;
4726 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4727 size_t len;
4728
4729 strcpy (str, aarch64_cpu_string);
4730
4731 ext = strchr (str, '+');
4732
4733 if (ext != NULL)
4734 len = ext - str;
4735 else
4736 len = strlen (str);
4737
4738 if (len == 0)
4739 {
4740 error ("missing cpu name in -mcpu=%qs", str);
4741 return;
4742 }
4743
4744 /* Loop through the list of supported CPUs to find a match. */
4745 for (cpu = all_cores; cpu->name != NULL; cpu++)
4746 {
4747 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4748 {
4749 selected_cpu = cpu;
4750 aarch64_isa_flags = selected_cpu->flags;
4751
4752 if (ext != NULL)
4753 {
4754 /* CPU string contains at least one extension. */
4755 aarch64_parse_extension (ext);
4756 }
4757
4758 return;
4759 }
4760 }
4761
4762 /* CPU name not found in list. */
4763 error ("unknown value %qs for -mcpu", str);
4764 return;
4765}
4766
4767/* Parse the TUNE string. */
4768
4769static void
4770aarch64_parse_tune (void)
4771{
4772 const struct processor *cpu;
4773 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4774 strcpy (str, aarch64_tune_string);
4775
4776 /* Loop through the list of supported CPUs to find a match. */
4777 for (cpu = all_cores; cpu->name != NULL; cpu++)
4778 {
4779 if (strcmp (cpu->name, str) == 0)
4780 {
4781 selected_tune = cpu;
4782 return;
4783 }
4784 }
4785
4786 /* CPU name not found in list. */
4787 error ("unknown value %qs for -mtune", str);
4788 return;
4789}
4790
4791
4792/* Implement TARGET_OPTION_OVERRIDE. */
4793
4794static void
4795aarch64_override_options (void)
4796{
4797 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4798 otherwise march remains undefined. mtune can be used with either march or
4799 mcpu. */
4800
4801 if (aarch64_arch_string)
4802 {
4803 aarch64_parse_arch ();
4804 aarch64_cpu_string = NULL;
4805 }
4806
4807 if (aarch64_cpu_string)
4808 {
4809 aarch64_parse_cpu ();
4810 selected_arch = NULL;
4811 }
4812
4813 if (aarch64_tune_string)
4814 {
4815 aarch64_parse_tune ();
4816 }
4817
4818 initialize_aarch64_code_model ();
4819
4820 aarch64_build_bitmask_table ();
4821
4822 /* This target defaults to strict volatile bitfields. */
4823 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4824 flag_strict_volatile_bitfields = 1;
4825
4826 /* If the user did not specify a processor, choose the default
4827 one for them. This will be the CPU set during configuration using
4828 --with-cpu, otherwise it is "generic". */
4829 if (!selected_cpu)
4830 {
4831 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4832 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4833 }
4834
4835 gcc_assert (selected_cpu);
4836
4837 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4838 if (!selected_tune)
4839 selected_tune = &all_cores[selected_cpu->core];
4840
4841 aarch64_tune_flags = selected_tune->flags;
4842 aarch64_tune = selected_tune->core;
4843 aarch64_tune_params = selected_tune->tune;
4844
4845 aarch64_override_options_after_change ();
4846}
4847
4848/* Implement targetm.override_options_after_change. */
4849
4850static void
4851aarch64_override_options_after_change (void)
4852{
4853 faked_omit_frame_pointer = false;
4854
4855 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4856 that aarch64_frame_pointer_required will be called. We need to remember
4857 whether flag_omit_frame_pointer was turned on normally or just faked. */
4858
4859 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4860 {
4861 flag_omit_frame_pointer = true;
4862 faked_omit_frame_pointer = true;
4863 }
4864}
4865
4866static struct machine_function *
4867aarch64_init_machine_status (void)
4868{
4869 struct machine_function *machine;
4870 machine = ggc_alloc_cleared_machine_function ();
4871 return machine;
4872}
4873
4874void
4875aarch64_init_expanders (void)
4876{
4877 init_machine_status = aarch64_init_machine_status;
4878}
4879
4880/* A checking mechanism for the implementation of the various code models. */
4881static void
4882initialize_aarch64_code_model (void)
4883{
4884 if (flag_pic)
4885 {
4886 switch (aarch64_cmodel_var)
4887 {
4888 case AARCH64_CMODEL_TINY:
4889 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4890 break;
4891 case AARCH64_CMODEL_SMALL:
4892 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4893 break;
4894 case AARCH64_CMODEL_LARGE:
4895 sorry ("code model %qs with -f%s", "large",
4896 flag_pic > 1 ? "PIC" : "pic");
4897 default:
4898 gcc_unreachable ();
4899 }
4900 }
4901 else
4902 aarch64_cmodel = aarch64_cmodel_var;
4903}
4904
4905/* Return true if SYMBOL_REF X binds locally. */
4906
4907static bool
4908aarch64_symbol_binds_local_p (const_rtx x)
4909{
4910 return (SYMBOL_REF_DECL (x)
4911 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4912 : SYMBOL_REF_LOCAL_P (x));
4913}
4914
4915/* Return true if SYMBOL_REF X is thread local */
4916static bool
4917aarch64_tls_symbol_p (rtx x)
4918{
4919 if (! TARGET_HAVE_TLS)
4920 return false;
4921
4922 if (GET_CODE (x) != SYMBOL_REF)
4923 return false;
4924
4925 return SYMBOL_REF_TLS_MODEL (x) != 0;
4926}
4927
4928/* Classify a TLS symbol into one of the TLS kinds. */
4929enum aarch64_symbol_type
4930aarch64_classify_tls_symbol (rtx x)
4931{
4932 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4933
4934 switch (tls_kind)
4935 {
4936 case TLS_MODEL_GLOBAL_DYNAMIC:
4937 case TLS_MODEL_LOCAL_DYNAMIC:
4938 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4939
4940 case TLS_MODEL_INITIAL_EXEC:
4941 return SYMBOL_SMALL_GOTTPREL;
4942
4943 case TLS_MODEL_LOCAL_EXEC:
4944 return SYMBOL_SMALL_TPREL;
4945
4946 case TLS_MODEL_EMULATED:
4947 case TLS_MODEL_NONE:
4948 return SYMBOL_FORCE_TO_MEM;
4949
4950 default:
4951 gcc_unreachable ();
4952 }
4953}
4954
4955/* Return the method that should be used to access SYMBOL_REF or
4956 LABEL_REF X in context CONTEXT. */
4957enum aarch64_symbol_type
4958aarch64_classify_symbol (rtx x,
4959 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4960{
4961 if (GET_CODE (x) == LABEL_REF)
4962 {
4963 switch (aarch64_cmodel)
4964 {
4965 case AARCH64_CMODEL_LARGE:
4966 return SYMBOL_FORCE_TO_MEM;
4967
4968 case AARCH64_CMODEL_TINY_PIC:
4969 case AARCH64_CMODEL_TINY:
4970 case AARCH64_CMODEL_SMALL_PIC:
4971 case AARCH64_CMODEL_SMALL:
4972 return SYMBOL_SMALL_ABSOLUTE;
4973
4974 default:
4975 gcc_unreachable ();
4976 }
4977 }
4978
4979 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4980
4981 switch (aarch64_cmodel)
4982 {
4983 case AARCH64_CMODEL_LARGE:
4984 return SYMBOL_FORCE_TO_MEM;
4985
4986 case AARCH64_CMODEL_TINY:
4987 case AARCH64_CMODEL_SMALL:
4988
4989 /* This is needed to get DFmode, TImode constants to be loaded off
4990 the constant pool. Is it necessary to dump TImode values into
4991 the constant pool. We don't handle TImode constant loads properly
4992 yet and hence need to use the constant pool. */
4993 if (CONSTANT_POOL_ADDRESS_P (x))
4994 return SYMBOL_FORCE_TO_MEM;
4995
4996 if (aarch64_tls_symbol_p (x))
4997 return aarch64_classify_tls_symbol (x);
4998
4999 if (SYMBOL_REF_WEAK (x))
5000 return SYMBOL_FORCE_TO_MEM;
5001
5002 return SYMBOL_SMALL_ABSOLUTE;
5003
5004 case AARCH64_CMODEL_TINY_PIC:
5005 case AARCH64_CMODEL_SMALL_PIC:
5006
5007 if (CONSTANT_POOL_ADDRESS_P (x))
5008 return SYMBOL_FORCE_TO_MEM;
5009
5010 if (aarch64_tls_symbol_p (x))
5011 return aarch64_classify_tls_symbol (x);
5012
5013 if (!aarch64_symbol_binds_local_p (x))
5014 return SYMBOL_SMALL_GOT;
5015
5016 return SYMBOL_SMALL_ABSOLUTE;
5017
5018 default:
5019 gcc_unreachable ();
5020 }
5021 /* By default push everything into the constant pool. */
5022 return SYMBOL_FORCE_TO_MEM;
5023}
5024
5025/* Return true if X is a symbolic constant that can be used in context
5026 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
5027
5028bool
5029aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5030 enum aarch64_symbol_type *symbol_type)
5031{
5032 rtx offset;
5033 split_const (x, &x, &offset);
5034 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5035 *symbol_type = aarch64_classify_symbol (x, context);
5036 else
5037 return false;
5038
5039 /* No checking of offset at this point. */
5040 return true;
5041}
5042
5043bool
5044aarch64_constant_address_p (rtx x)
5045{
5046 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5047}
5048
5049bool
5050aarch64_legitimate_pic_operand_p (rtx x)
5051{
5052 if (GET_CODE (x) == SYMBOL_REF
5053 || (GET_CODE (x) == CONST
5054 && GET_CODE (XEXP (x, 0)) == PLUS
5055 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5056 return false;
5057
5058 return true;
5059}
5060
3520f7cc
JG
5061/* Return true if X holds either a quarter-precision or
5062 floating-point +0.0 constant. */
5063static bool
5064aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5065{
5066 if (!CONST_DOUBLE_P (x))
5067 return false;
5068
5069 /* TODO: We could handle moving 0.0 to a TFmode register,
5070 but first we would like to refactor the movtf_aarch64
5071 to be more amicable to split moves properly and
5072 correctly gate on TARGET_SIMD. For now - reject all
5073 constants which are not to SFmode or DFmode registers. */
5074 if (!(mode == SFmode || mode == DFmode))
5075 return false;
5076
5077 if (aarch64_float_const_zero_rtx_p (x))
5078 return true;
5079 return aarch64_float_const_representable_p (x);
5080}
5081
43e9d192
IB
5082static bool
5083aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5084{
5085 /* Do not allow vector struct mode constants. We could support
5086 0 and -1 easily, but they need support in aarch64-simd.md. */
5087 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5088 return false;
5089
5090 /* This could probably go away because
5091 we now decompose CONST_INTs according to expand_mov_immediate. */
5092 if ((GET_CODE (x) == CONST_VECTOR
5093 && aarch64_simd_valid_immediate (x, mode, false,
5094 NULL, NULL, NULL, NULL, NULL) != -1)
3520f7cc
JG
5095 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5096 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5097
5098 if (GET_CODE (x) == HIGH
5099 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5100 return true;
5101
5102 return aarch64_constant_address_p (x);
5103}
5104
a5bc806c 5105rtx
43e9d192
IB
5106aarch64_load_tp (rtx target)
5107{
5108 if (!target
5109 || GET_MODE (target) != Pmode
5110 || !register_operand (target, Pmode))
5111 target = gen_reg_rtx (Pmode);
5112
5113 /* Can return in any reg. */
5114 emit_insn (gen_aarch64_load_tp_hard (target));
5115 return target;
5116}
5117
43e9d192
IB
5118/* On AAPCS systems, this is the "struct __va_list". */
5119static GTY(()) tree va_list_type;
5120
5121/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5122 Return the type to use as __builtin_va_list.
5123
5124 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5125
5126 struct __va_list
5127 {
5128 void *__stack;
5129 void *__gr_top;
5130 void *__vr_top;
5131 int __gr_offs;
5132 int __vr_offs;
5133 }; */
5134
5135static tree
5136aarch64_build_builtin_va_list (void)
5137{
5138 tree va_list_name;
5139 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5140
5141 /* Create the type. */
5142 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5143 /* Give it the required name. */
5144 va_list_name = build_decl (BUILTINS_LOCATION,
5145 TYPE_DECL,
5146 get_identifier ("__va_list"),
5147 va_list_type);
5148 DECL_ARTIFICIAL (va_list_name) = 1;
5149 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5150 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5151
5152 /* Create the fields. */
5153 f_stack = build_decl (BUILTINS_LOCATION,
5154 FIELD_DECL, get_identifier ("__stack"),
5155 ptr_type_node);
5156 f_grtop = build_decl (BUILTINS_LOCATION,
5157 FIELD_DECL, get_identifier ("__gr_top"),
5158 ptr_type_node);
5159 f_vrtop = build_decl (BUILTINS_LOCATION,
5160 FIELD_DECL, get_identifier ("__vr_top"),
5161 ptr_type_node);
5162 f_groff = build_decl (BUILTINS_LOCATION,
5163 FIELD_DECL, get_identifier ("__gr_offs"),
5164 integer_type_node);
5165 f_vroff = build_decl (BUILTINS_LOCATION,
5166 FIELD_DECL, get_identifier ("__vr_offs"),
5167 integer_type_node);
5168
5169 DECL_ARTIFICIAL (f_stack) = 1;
5170 DECL_ARTIFICIAL (f_grtop) = 1;
5171 DECL_ARTIFICIAL (f_vrtop) = 1;
5172 DECL_ARTIFICIAL (f_groff) = 1;
5173 DECL_ARTIFICIAL (f_vroff) = 1;
5174
5175 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5176 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5177 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5178 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5179 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5180
5181 TYPE_FIELDS (va_list_type) = f_stack;
5182 DECL_CHAIN (f_stack) = f_grtop;
5183 DECL_CHAIN (f_grtop) = f_vrtop;
5184 DECL_CHAIN (f_vrtop) = f_groff;
5185 DECL_CHAIN (f_groff) = f_vroff;
5186
5187 /* Compute its layout. */
5188 layout_type (va_list_type);
5189
5190 return va_list_type;
5191}
5192
5193/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5194static void
5195aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5196{
5197 const CUMULATIVE_ARGS *cum;
5198 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5199 tree stack, grtop, vrtop, groff, vroff;
5200 tree t;
5201 int gr_save_area_size;
5202 int vr_save_area_size;
5203 int vr_offset;
5204
5205 cum = &crtl->args.info;
5206 gr_save_area_size
5207 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5208 vr_save_area_size
5209 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5210
5211 if (TARGET_GENERAL_REGS_ONLY)
5212 {
5213 if (cum->aapcs_nvrn > 0)
5214 sorry ("%qs and floating point or vector arguments",
5215 "-mgeneral-regs-only");
5216 vr_save_area_size = 0;
5217 }
5218
5219 f_stack = TYPE_FIELDS (va_list_type_node);
5220 f_grtop = DECL_CHAIN (f_stack);
5221 f_vrtop = DECL_CHAIN (f_grtop);
5222 f_groff = DECL_CHAIN (f_vrtop);
5223 f_vroff = DECL_CHAIN (f_groff);
5224
5225 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5226 NULL_TREE);
5227 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5228 NULL_TREE);
5229 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5230 NULL_TREE);
5231 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5232 NULL_TREE);
5233 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5234 NULL_TREE);
5235
5236 /* Emit code to initialize STACK, which points to the next varargs stack
5237 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5238 by named arguments. STACK is 8-byte aligned. */
5239 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5240 if (cum->aapcs_stack_size > 0)
5241 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5242 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5244
5245 /* Emit code to initialize GRTOP, the top of the GR save area.
5246 virtual_incoming_args_rtx should have been 16 byte aligned. */
5247 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5248 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5249 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5250
5251 /* Emit code to initialize VRTOP, the top of the VR save area.
5252 This address is gr_save_area_bytes below GRTOP, rounded
5253 down to the next 16-byte boundary. */
5254 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5255 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5256 STACK_BOUNDARY / BITS_PER_UNIT);
5257
5258 if (vr_offset)
5259 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5260 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5262
5263 /* Emit code to initialize GROFF, the offset from GRTOP of the
5264 next GPR argument. */
5265 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5266 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5267 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5268
5269 /* Likewise emit code to initialize VROFF, the offset from FTOP
5270 of the next VR argument. */
5271 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5272 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5273 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5274}
5275
5276/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5277
5278static tree
5279aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5280 gimple_seq *post_p ATTRIBUTE_UNUSED)
5281{
5282 tree addr;
5283 bool indirect_p;
5284 bool is_ha; /* is HFA or HVA. */
5285 bool dw_align; /* double-word align. */
5286 enum machine_mode ag_mode = VOIDmode;
5287 int nregs;
5288 enum machine_mode mode;
5289
5290 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5291 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5292 HOST_WIDE_INT size, rsize, adjust, align;
5293 tree t, u, cond1, cond2;
5294
5295 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5296 if (indirect_p)
5297 type = build_pointer_type (type);
5298
5299 mode = TYPE_MODE (type);
5300
5301 f_stack = TYPE_FIELDS (va_list_type_node);
5302 f_grtop = DECL_CHAIN (f_stack);
5303 f_vrtop = DECL_CHAIN (f_grtop);
5304 f_groff = DECL_CHAIN (f_vrtop);
5305 f_vroff = DECL_CHAIN (f_groff);
5306
5307 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5308 f_stack, NULL_TREE);
5309 size = int_size_in_bytes (type);
5310 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5311
5312 dw_align = false;
5313 adjust = 0;
5314 if (aarch64_vfp_is_call_or_return_candidate (mode,
5315 type,
5316 &ag_mode,
5317 &nregs,
5318 &is_ha))
5319 {
5320 /* TYPE passed in fp/simd registers. */
5321 if (TARGET_GENERAL_REGS_ONLY)
5322 sorry ("%qs and floating point or vector arguments",
5323 "-mgeneral-regs-only");
5324
5325 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5326 unshare_expr (valist), f_vrtop, NULL_TREE);
5327 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5328 unshare_expr (valist), f_vroff, NULL_TREE);
5329
5330 rsize = nregs * UNITS_PER_VREG;
5331
5332 if (is_ha)
5333 {
5334 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5335 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5336 }
5337 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5338 && size < UNITS_PER_VREG)
5339 {
5340 adjust = UNITS_PER_VREG - size;
5341 }
5342 }
5343 else
5344 {
5345 /* TYPE passed in general registers. */
5346 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5347 unshare_expr (valist), f_grtop, NULL_TREE);
5348 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5349 unshare_expr (valist), f_groff, NULL_TREE);
5350 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5351 nregs = rsize / UNITS_PER_WORD;
5352
5353 if (align > 8)
5354 dw_align = true;
5355
5356 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5357 && size < UNITS_PER_WORD)
5358 {
5359 adjust = UNITS_PER_WORD - size;
5360 }
5361 }
5362
5363 /* Get a local temporary for the field value. */
5364 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5365
5366 /* Emit code to branch if off >= 0. */
5367 t = build2 (GE_EXPR, boolean_type_node, off,
5368 build_int_cst (TREE_TYPE (off), 0));
5369 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5370
5371 if (dw_align)
5372 {
5373 /* Emit: offs = (offs + 15) & -16. */
5374 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5375 build_int_cst (TREE_TYPE (off), 15));
5376 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5377 build_int_cst (TREE_TYPE (off), -16));
5378 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5379 }
5380 else
5381 roundup = NULL;
5382
5383 /* Update ap.__[g|v]r_offs */
5384 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5385 build_int_cst (TREE_TYPE (off), rsize));
5386 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5387
5388 /* String up. */
5389 if (roundup)
5390 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5391
5392 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5393 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5394 build_int_cst (TREE_TYPE (f_off), 0));
5395 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5396
5397 /* String up: make sure the assignment happens before the use. */
5398 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5399 COND_EXPR_ELSE (cond1) = t;
5400
5401 /* Prepare the trees handling the argument that is passed on the stack;
5402 the top level node will store in ON_STACK. */
5403 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5404 if (align > 8)
5405 {
5406 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5407 t = fold_convert (intDI_type_node, arg);
5408 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5409 build_int_cst (TREE_TYPE (t), 15));
5410 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5411 build_int_cst (TREE_TYPE (t), -16));
5412 t = fold_convert (TREE_TYPE (arg), t);
5413 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5414 }
5415 else
5416 roundup = NULL;
5417 /* Advance ap.__stack */
5418 t = fold_convert (intDI_type_node, arg);
5419 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5420 build_int_cst (TREE_TYPE (t), size + 7));
5421 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5422 build_int_cst (TREE_TYPE (t), -8));
5423 t = fold_convert (TREE_TYPE (arg), t);
5424 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5425 /* String up roundup and advance. */
5426 if (roundup)
5427 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5428 /* String up with arg */
5429 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5430 /* Big-endianness related address adjustment. */
5431 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5432 && size < UNITS_PER_WORD)
5433 {
5434 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5435 size_int (UNITS_PER_WORD - size));
5436 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5437 }
5438
5439 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5440 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5441
5442 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5443 t = off;
5444 if (adjust)
5445 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5446 build_int_cst (TREE_TYPE (off), adjust));
5447
5448 t = fold_convert (sizetype, t);
5449 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5450
5451 if (is_ha)
5452 {
5453 /* type ha; // treat as "struct {ftype field[n];}"
5454 ... [computing offs]
5455 for (i = 0; i <nregs; ++i, offs += 16)
5456 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5457 return ha; */
5458 int i;
5459 tree tmp_ha, field_t, field_ptr_t;
5460
5461 /* Declare a local variable. */
5462 tmp_ha = create_tmp_var_raw (type, "ha");
5463 gimple_add_tmp_var (tmp_ha);
5464
5465 /* Establish the base type. */
5466 switch (ag_mode)
5467 {
5468 case SFmode:
5469 field_t = float_type_node;
5470 field_ptr_t = float_ptr_type_node;
5471 break;
5472 case DFmode:
5473 field_t = double_type_node;
5474 field_ptr_t = double_ptr_type_node;
5475 break;
5476 case TFmode:
5477 field_t = long_double_type_node;
5478 field_ptr_t = long_double_ptr_type_node;
5479 break;
5480/* The half precision and quad precision are not fully supported yet. Enable
5481 the following code after the support is complete. Need to find the correct
5482 type node for __fp16 *. */
5483#if 0
5484 case HFmode:
5485 field_t = float_type_node;
5486 field_ptr_t = float_ptr_type_node;
5487 break;
5488#endif
5489 case V2SImode:
5490 case V4SImode:
5491 {
5492 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5493 field_t = build_vector_type_for_mode (innertype, ag_mode);
5494 field_ptr_t = build_pointer_type (field_t);
5495 }
5496 break;
5497 default:
5498 gcc_assert (0);
5499 }
5500
5501 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5502 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5503 addr = t;
5504 t = fold_convert (field_ptr_t, addr);
5505 t = build2 (MODIFY_EXPR, field_t,
5506 build1 (INDIRECT_REF, field_t, tmp_ha),
5507 build1 (INDIRECT_REF, field_t, t));
5508
5509 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5510 for (i = 1; i < nregs; ++i)
5511 {
5512 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5513 u = fold_convert (field_ptr_t, addr);
5514 u = build2 (MODIFY_EXPR, field_t,
5515 build2 (MEM_REF, field_t, tmp_ha,
5516 build_int_cst (field_ptr_t,
5517 (i *
5518 int_size_in_bytes (field_t)))),
5519 build1 (INDIRECT_REF, field_t, u));
5520 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5521 }
5522
5523 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5524 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5525 }
5526
5527 COND_EXPR_ELSE (cond2) = t;
5528 addr = fold_convert (build_pointer_type (type), cond1);
5529 addr = build_va_arg_indirect_ref (addr);
5530
5531 if (indirect_p)
5532 addr = build_va_arg_indirect_ref (addr);
5533
5534 return addr;
5535}
5536
5537/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5538
5539static void
5540aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5541 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5542 int no_rtl)
5543{
5544 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5545 CUMULATIVE_ARGS local_cum;
5546 int gr_saved, vr_saved;
5547
5548 /* The caller has advanced CUM up to, but not beyond, the last named
5549 argument. Advance a local copy of CUM past the last "real" named
5550 argument, to find out how many registers are left over. */
5551 local_cum = *cum;
5552 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5553
5554 /* Found out how many registers we need to save. */
5555 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5556 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5557
5558 if (TARGET_GENERAL_REGS_ONLY)
5559 {
5560 if (local_cum.aapcs_nvrn > 0)
5561 sorry ("%qs and floating point or vector arguments",
5562 "-mgeneral-regs-only");
5563 vr_saved = 0;
5564 }
5565
5566 if (!no_rtl)
5567 {
5568 if (gr_saved > 0)
5569 {
5570 rtx ptr, mem;
5571
5572 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5573 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5574 - gr_saved * UNITS_PER_WORD);
5575 mem = gen_frame_mem (BLKmode, ptr);
5576 set_mem_alias_set (mem, get_varargs_alias_set ());
5577
5578 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5579 mem, gr_saved);
5580 }
5581 if (vr_saved > 0)
5582 {
5583 /* We can't use move_block_from_reg, because it will use
5584 the wrong mode, storing D regs only. */
5585 enum machine_mode mode = TImode;
5586 int off, i;
5587
5588 /* Set OFF to the offset from virtual_incoming_args_rtx of
5589 the first vector register. The VR save area lies below
5590 the GR one, and is aligned to 16 bytes. */
5591 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5592 STACK_BOUNDARY / BITS_PER_UNIT);
5593 off -= vr_saved * UNITS_PER_VREG;
5594
5595 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5596 {
5597 rtx ptr, mem;
5598
5599 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5600 mem = gen_frame_mem (mode, ptr);
5601 set_mem_alias_set (mem, get_varargs_alias_set ());
5602 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5603 off += UNITS_PER_VREG;
5604 }
5605 }
5606 }
5607
5608 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5609 any complication of having crtl->args.pretend_args_size changed. */
5610 cfun->machine->saved_varargs_size
5611 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5612 STACK_BOUNDARY / BITS_PER_UNIT)
5613 + vr_saved * UNITS_PER_VREG);
5614}
5615
5616static void
5617aarch64_conditional_register_usage (void)
5618{
5619 int i;
5620 if (!TARGET_FLOAT)
5621 {
5622 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5623 {
5624 fixed_regs[i] = 1;
5625 call_used_regs[i] = 1;
5626 }
5627 }
5628}
5629
5630/* Walk down the type tree of TYPE counting consecutive base elements.
5631 If *MODEP is VOIDmode, then set it to the first valid floating point
5632 type. If a non-floating point type is found, or if a floating point
5633 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5634 otherwise return the count in the sub-tree. */
5635static int
5636aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5637{
5638 enum machine_mode mode;
5639 HOST_WIDE_INT size;
5640
5641 switch (TREE_CODE (type))
5642 {
5643 case REAL_TYPE:
5644 mode = TYPE_MODE (type);
5645 if (mode != DFmode && mode != SFmode && mode != TFmode)
5646 return -1;
5647
5648 if (*modep == VOIDmode)
5649 *modep = mode;
5650
5651 if (*modep == mode)
5652 return 1;
5653
5654 break;
5655
5656 case COMPLEX_TYPE:
5657 mode = TYPE_MODE (TREE_TYPE (type));
5658 if (mode != DFmode && mode != SFmode && mode != TFmode)
5659 return -1;
5660
5661 if (*modep == VOIDmode)
5662 *modep = mode;
5663
5664 if (*modep == mode)
5665 return 2;
5666
5667 break;
5668
5669 case VECTOR_TYPE:
5670 /* Use V2SImode and V4SImode as representatives of all 64-bit
5671 and 128-bit vector types. */
5672 size = int_size_in_bytes (type);
5673 switch (size)
5674 {
5675 case 8:
5676 mode = V2SImode;
5677 break;
5678 case 16:
5679 mode = V4SImode;
5680 break;
5681 default:
5682 return -1;
5683 }
5684
5685 if (*modep == VOIDmode)
5686 *modep = mode;
5687
5688 /* Vector modes are considered to be opaque: two vectors are
5689 equivalent for the purposes of being homogeneous aggregates
5690 if they are the same size. */
5691 if (*modep == mode)
5692 return 1;
5693
5694 break;
5695
5696 case ARRAY_TYPE:
5697 {
5698 int count;
5699 tree index = TYPE_DOMAIN (type);
5700
5701 /* Can't handle incomplete types. */
5702 if (!COMPLETE_TYPE_P (type))
5703 return -1;
5704
5705 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5706 if (count == -1
5707 || !index
5708 || !TYPE_MAX_VALUE (index)
5709 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5710 || !TYPE_MIN_VALUE (index)
5711 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5712 || count < 0)
5713 return -1;
5714
5715 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5716 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5717
5718 /* There must be no padding. */
5719 if (!host_integerp (TYPE_SIZE (type), 1)
5720 || (tree_low_cst (TYPE_SIZE (type), 1)
5721 != count * GET_MODE_BITSIZE (*modep)))
5722 return -1;
5723
5724 return count;
5725 }
5726
5727 case RECORD_TYPE:
5728 {
5729 int count = 0;
5730 int sub_count;
5731 tree field;
5732
5733 /* Can't handle incomplete types. */
5734 if (!COMPLETE_TYPE_P (type))
5735 return -1;
5736
5737 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5738 {
5739 if (TREE_CODE (field) != FIELD_DECL)
5740 continue;
5741
5742 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5743 if (sub_count < 0)
5744 return -1;
5745 count += sub_count;
5746 }
5747
5748 /* There must be no padding. */
5749 if (!host_integerp (TYPE_SIZE (type), 1)
5750 || (tree_low_cst (TYPE_SIZE (type), 1)
5751 != count * GET_MODE_BITSIZE (*modep)))
5752 return -1;
5753
5754 return count;
5755 }
5756
5757 case UNION_TYPE:
5758 case QUAL_UNION_TYPE:
5759 {
5760 /* These aren't very interesting except in a degenerate case. */
5761 int count = 0;
5762 int sub_count;
5763 tree field;
5764
5765 /* Can't handle incomplete types. */
5766 if (!COMPLETE_TYPE_P (type))
5767 return -1;
5768
5769 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5770 {
5771 if (TREE_CODE (field) != FIELD_DECL)
5772 continue;
5773
5774 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5775 if (sub_count < 0)
5776 return -1;
5777 count = count > sub_count ? count : sub_count;
5778 }
5779
5780 /* There must be no padding. */
5781 if (!host_integerp (TYPE_SIZE (type), 1)
5782 || (tree_low_cst (TYPE_SIZE (type), 1)
5783 != count * GET_MODE_BITSIZE (*modep)))
5784 return -1;
5785
5786 return count;
5787 }
5788
5789 default:
5790 break;
5791 }
5792
5793 return -1;
5794}
5795
5796/* Return TRUE if the type, as described by TYPE and MODE, is a composite
5797 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5798 array types. The C99 floating-point complex types are also considered
5799 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5800 types, which are GCC extensions and out of the scope of AAPCS64, are
5801 treated as composite types here as well.
5802
5803 Note that MODE itself is not sufficient in determining whether a type
5804 is such a composite type or not. This is because
5805 stor-layout.c:compute_record_mode may have already changed the MODE
5806 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5807 structure with only one field may have its MODE set to the mode of the
5808 field. Also an integer mode whose size matches the size of the
5809 RECORD_TYPE type may be used to substitute the original mode
5810 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5811 solely relied on. */
5812
5813static bool
5814aarch64_composite_type_p (const_tree type,
5815 enum machine_mode mode)
5816{
5817 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5818 return true;
5819
5820 if (mode == BLKmode
5821 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5822 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5823 return true;
5824
5825 return false;
5826}
5827
5828/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5829 type as described in AAPCS64 \S 4.1.2.
5830
5831 See the comment above aarch64_composite_type_p for the notes on MODE. */
5832
5833static bool
5834aarch64_short_vector_p (const_tree type,
5835 enum machine_mode mode)
5836{
5837 HOST_WIDE_INT size = -1;
5838
5839 if (type && TREE_CODE (type) == VECTOR_TYPE)
5840 size = int_size_in_bytes (type);
5841 else if (!aarch64_composite_type_p (type, mode)
5842 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5843 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5844 size = GET_MODE_SIZE (mode);
5845
5846 return (size == 8 || size == 16) ? true : false;
5847}
5848
5849/* Return TRUE if an argument, whose type is described by TYPE and MODE,
5850 shall be passed or returned in simd/fp register(s) (providing these
5851 parameter passing registers are available).
5852
5853 Upon successful return, *COUNT returns the number of needed registers,
5854 *BASE_MODE returns the mode of the individual register and when IS_HAF
5855 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5856 floating-point aggregate or a homogeneous short-vector aggregate. */
5857
5858static bool
5859aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5860 const_tree type,
5861 enum machine_mode *base_mode,
5862 int *count,
5863 bool *is_ha)
5864{
5865 enum machine_mode new_mode = VOIDmode;
5866 bool composite_p = aarch64_composite_type_p (type, mode);
5867
5868 if (is_ha != NULL) *is_ha = false;
5869
5870 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5871 || aarch64_short_vector_p (type, mode))
5872 {
5873 *count = 1;
5874 new_mode = mode;
5875 }
5876 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5877 {
5878 if (is_ha != NULL) *is_ha = true;
5879 *count = 2;
5880 new_mode = GET_MODE_INNER (mode);
5881 }
5882 else if (type && composite_p)
5883 {
5884 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5885
5886 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5887 {
5888 if (is_ha != NULL) *is_ha = true;
5889 *count = ag_count;
5890 }
5891 else
5892 return false;
5893 }
5894 else
5895 return false;
5896
5897 *base_mode = new_mode;
5898 return true;
5899}
5900
5901/* Implement TARGET_STRUCT_VALUE_RTX. */
5902
5903static rtx
5904aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5905 int incoming ATTRIBUTE_UNUSED)
5906{
5907 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5908}
5909
5910/* Implements target hook vector_mode_supported_p. */
5911static bool
5912aarch64_vector_mode_supported_p (enum machine_mode mode)
5913{
5914 if (TARGET_SIMD
5915 && (mode == V4SImode || mode == V8HImode
5916 || mode == V16QImode || mode == V2DImode
5917 || mode == V2SImode || mode == V4HImode
5918 || mode == V8QImode || mode == V2SFmode
5919 || mode == V4SFmode || mode == V2DFmode))
5920 return true;
5921
5922 return false;
5923}
5924
5925/* Return quad mode as the preferred SIMD mode. */
5926static enum machine_mode
5927aarch64_preferred_simd_mode (enum machine_mode mode)
5928{
5929 if (TARGET_SIMD)
5930 switch (mode)
5931 {
5932 case DFmode:
5933 return V2DFmode;
5934 case SFmode:
5935 return V4SFmode;
5936 case SImode:
5937 return V4SImode;
5938 case HImode:
5939 return V8HImode;
5940 case QImode:
5941 return V16QImode;
5942 case DImode:
5943 return V2DImode;
5944 break;
5945
5946 default:;
5947 }
5948 return word_mode;
5949}
5950
3b357264
JG
5951/* Return the bitmask of possible vector sizes for the vectorizer
5952 to iterate over. */
5953static unsigned int
5954aarch64_autovectorize_vector_sizes (void)
5955{
5956 return (16 | 8);
5957}
5958
c6fc9e43
YZ
5959/* A table to help perform AArch64-specific name mangling for AdvSIMD
5960 vector types in order to conform to the AAPCS64 (see "Procedure
5961 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5962 qualify for emission with the mangled names defined in that document,
5963 a vector type must not only be of the correct mode but also be
5964 composed of AdvSIMD vector element types (e.g.
5965 _builtin_aarch64_simd_qi); these types are registered by
5966 aarch64_init_simd_builtins (). In other words, vector types defined
5967 in other ways e.g. via vector_size attribute will get default
5968 mangled names. */
5969typedef struct
5970{
5971 enum machine_mode mode;
5972 const char *element_type_name;
5973 const char *mangled_name;
5974} aarch64_simd_mangle_map_entry;
5975
5976static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5977 /* 64-bit containerized types. */
5978 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5979 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5980 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5981 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5982 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5983 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5984 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
5985 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
5986 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5987 /* 128-bit containerized types. */
5988 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
5989 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
5990 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
5991 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
5992 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
5993 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
5994 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
5995 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
5996 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
5997 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
5998 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
5999 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6000 { VOIDmode, NULL, NULL }
6001};
6002
ac2b960f
YZ
6003/* Implement TARGET_MANGLE_TYPE. */
6004
6f549691 6005static const char *
ac2b960f
YZ
6006aarch64_mangle_type (const_tree type)
6007{
6008 /* The AArch64 ABI documents say that "__va_list" has to be
6009 managled as if it is in the "std" namespace. */
6010 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6011 return "St9__va_list";
6012
c6fc9e43
YZ
6013 /* Check the mode of the vector type, and the name of the vector
6014 element type, against the table. */
6015 if (TREE_CODE (type) == VECTOR_TYPE)
6016 {
6017 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6018
6019 while (pos->mode != VOIDmode)
6020 {
6021 tree elt_type = TREE_TYPE (type);
6022
6023 if (pos->mode == TYPE_MODE (type)
6024 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6025 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6026 pos->element_type_name))
6027 return pos->mangled_name;
6028
6029 pos++;
6030 }
6031 }
6032
ac2b960f
YZ
6033 /* Use the default mangling. */
6034 return NULL;
6035}
6036
43e9d192
IB
6037/* Return the equivalent letter for size. */
6038static unsigned char
6039sizetochar (int size)
6040{
6041 switch (size)
6042 {
6043 case 64: return 'd';
6044 case 32: return 's';
6045 case 16: return 'h';
6046 case 8 : return 'b';
6047 default: gcc_unreachable ();
6048 }
6049}
6050
3520f7cc
JG
6051/* Return true iff x is a uniform vector of floating-point
6052 constants, and the constant can be represented in
6053 quarter-precision form. Note, as aarch64_float_const_representable
6054 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6055static bool
6056aarch64_vect_float_const_representable_p (rtx x)
6057{
6058 int i = 0;
6059 REAL_VALUE_TYPE r0, ri;
6060 rtx x0, xi;
6061
6062 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6063 return false;
6064
6065 x0 = CONST_VECTOR_ELT (x, 0);
6066 if (!CONST_DOUBLE_P (x0))
6067 return false;
6068
6069 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6070
6071 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6072 {
6073 xi = CONST_VECTOR_ELT (x, i);
6074 if (!CONST_DOUBLE_P (xi))
6075 return false;
6076
6077 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6078 if (!REAL_VALUES_EQUAL (r0, ri))
6079 return false;
6080 }
6081
6082 return aarch64_float_const_representable_p (x0);
6083}
6084
6085/* TODO: This function returns values similar to those
6086 returned by neon_valid_immediate in gcc/config/arm/arm.c
6087 but the API here is different enough that these magic numbers
6088 are not used. It should be sufficient to return true or false. */
43e9d192
IB
6089static int
6090aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6091 rtx *modconst, int *elementwidth,
6092 unsigned char *elementchar,
6093 int *mvn, int *shift)
6094{
6095#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6096 matches = 1; \
6097 for (i = 0; i < idx; i += (STRIDE)) \
6098 if (!(TEST)) \
6099 matches = 0; \
6100 if (matches) \
6101 { \
6102 immtype = (CLASS); \
6103 elsize = (ELSIZE); \
6104 elchar = sizetochar (elsize); \
6105 eshift = (SHIFT); \
6106 emvn = (NEG); \
6107 break; \
6108 }
6109
6110 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6111 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6112 unsigned char bytes[16];
6113 unsigned char elchar = 0;
6114 int immtype = -1, matches;
6115 unsigned int invmask = inverse ? 0xff : 0;
6116 int eshift, emvn;
6117
43e9d192 6118 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc
JG
6119 {
6120 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6121 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6122
6123 if (!(simd_imm_zero
6124 || aarch64_vect_float_const_representable_p (op)))
6125 return -1;
6126
6127 if (modconst)
6128 *modconst = CONST_VECTOR_ELT (op, 0);
6129
6130 if (elementwidth)
6131 *elementwidth = elem_width;
6132
6133 if (elementchar)
6134 *elementchar = sizetochar (elem_width);
6135
6136 if (shift)
6137 *shift = 0;
6138
6139 if (simd_imm_zero)
6140 return 19;
6141 else
6142 return 18;
6143 }
43e9d192
IB
6144
6145 /* Splat vector constant out into a byte vector. */
6146 for (i = 0; i < n_elts; i++)
6147 {
6148 rtx el = CONST_VECTOR_ELT (op, i);
6149 unsigned HOST_WIDE_INT elpart;
6150 unsigned int part, parts;
6151
6152 if (GET_CODE (el) == CONST_INT)
6153 {
6154 elpart = INTVAL (el);
6155 parts = 1;
6156 }
6157 else if (GET_CODE (el) == CONST_DOUBLE)
6158 {
6159 elpart = CONST_DOUBLE_LOW (el);
6160 parts = 2;
6161 }
6162 else
6163 gcc_unreachable ();
6164
6165 for (part = 0; part < parts; part++)
6166 {
6167 unsigned int byte;
6168 for (byte = 0; byte < innersize; byte++)
6169 {
6170 bytes[idx++] = (elpart & 0xff) ^ invmask;
6171 elpart >>= BITS_PER_UNIT;
6172 }
6173 if (GET_CODE (el) == CONST_DOUBLE)
6174 elpart = CONST_DOUBLE_HIGH (el);
6175 }
6176 }
6177
6178 /* Sanity check. */
6179 gcc_assert (idx == GET_MODE_SIZE (mode));
6180
6181 do
6182 {
6183 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6184 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6185
6186 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6187 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6188
6189 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6190 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6191
6192 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6193 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6194
6195 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6196
6197 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6198
6199 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6200 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6201
6202 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6203 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6204
6205 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6206 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6207
6208 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6209 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6210
6211 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6212
6213 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6214
6215 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6216 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6217
6218 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6219 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6220
6221 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6222 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6223
6224 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6225 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6226
6227 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6228
6229 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6230 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6231 }
6232 while (0);
6233
6234 /* TODO: Currently the assembler cannot handle types 12 to 15.
6235 And there is no way to specify cmode through the compiler.
6236 Disable them till there is support in the assembler. */
6237 if (immtype == -1
6238 || (immtype >= 12 && immtype <= 15)
6239 || immtype == 18)
6240 return -1;
6241
6242
6243 if (elementwidth)
6244 *elementwidth = elsize;
6245
6246 if (elementchar)
6247 *elementchar = elchar;
6248
6249 if (mvn)
6250 *mvn = emvn;
6251
6252 if (shift)
6253 *shift = eshift;
6254
6255 if (modconst)
6256 {
6257 unsigned HOST_WIDE_INT imm = 0;
6258
6259 /* Un-invert bytes of recognized vector, if necessary. */
6260 if (invmask != 0)
6261 for (i = 0; i < idx; i++)
6262 bytes[i] ^= invmask;
6263
6264 if (immtype == 17)
6265 {
6266 /* FIXME: Broken on 32-bit H_W_I hosts. */
6267 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6268
6269 for (i = 0; i < 8; i++)
6270 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6271 << (i * BITS_PER_UNIT);
6272
6273 *modconst = GEN_INT (imm);
6274 }
6275 else
6276 {
6277 unsigned HOST_WIDE_INT imm = 0;
6278
6279 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6280 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6281
6282 /* Construct 'abcdefgh' because the assembler cannot handle
6283 generic constants. */
6284 gcc_assert (shift != NULL && mvn != NULL);
6285 if (*mvn)
6286 imm = ~imm;
6287 imm = (imm >> *shift) & 0xff;
6288 *modconst = GEN_INT (imm);
6289 }
6290 }
6291
6292 return immtype;
6293#undef CHECK
6294}
6295
6296/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6297 (or, implicitly, MVNI) immediate. Write back width per element
3520f7cc
JG
6298 to *ELEMENTWIDTH, and a modified constant (whatever should be output
6299 for a MOVI instruction) in *MODCONST. */
43e9d192
IB
6300int
6301aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6302 rtx *modconst, int *elementwidth,
6303 unsigned char *elementchar,
6304 int *mvn, int *shift)
6305{
6306 rtx tmpconst;
6307 int tmpwidth;
6308 unsigned char tmpwidthc;
6309 int tmpmvn = 0, tmpshift = 0;
6310 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6311 &tmpwidth, &tmpwidthc,
6312 &tmpmvn, &tmpshift);
6313
6314 if (retval == -1)
6315 return 0;
6316
6317 if (modconst)
6318 *modconst = tmpconst;
6319
6320 if (elementwidth)
6321 *elementwidth = tmpwidth;
6322
6323 if (elementchar)
6324 *elementchar = tmpwidthc;
6325
6326 if (mvn)
6327 *mvn = tmpmvn;
6328
6329 if (shift)
6330 *shift = tmpshift;
6331
6332 return 1;
6333}
6334
6335static bool
6336aarch64_const_vec_all_same_int_p (rtx x,
6337 HOST_WIDE_INT minval,
6338 HOST_WIDE_INT maxval)
6339{
6340 HOST_WIDE_INT firstval;
6341 int count, i;
6342
6343 if (GET_CODE (x) != CONST_VECTOR
6344 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6345 return false;
6346
6347 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6348 if (firstval < minval || firstval > maxval)
6349 return false;
6350
6351 count = CONST_VECTOR_NUNITS (x);
6352 for (i = 1; i < count; i++)
6353 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6354 return false;
6355
6356 return true;
6357}
6358
6359/* Check of immediate shift constants are within range. */
6360bool
6361aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6362{
6363 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6364 if (left)
6365 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6366 else
6367 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6368}
6369
3520f7cc
JG
6370/* Return true if X is a uniform vector where all elements
6371 are either the floating-point constant 0.0 or the
6372 integer constant 0. */
43e9d192
IB
6373bool
6374aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6375{
3520f7cc 6376 return x == CONST0_RTX (mode);
43e9d192
IB
6377}
6378
6379bool
6380aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6381{
6382 HOST_WIDE_INT imm = INTVAL (x);
6383 int i;
6384
6385 for (i = 0; i < 8; i++)
6386 {
6387 unsigned int byte = imm & 0xff;
6388 if (byte != 0xff && byte != 0)
6389 return false;
6390 imm >>= 8;
6391 }
6392
6393 return true;
6394}
6395
6396/* Return a const_int vector of VAL. */
6397rtx
6398aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6399{
6400 int nunits = GET_MODE_NUNITS (mode);
6401 rtvec v = rtvec_alloc (nunits);
6402 int i;
6403
6404 for (i=0; i < nunits; i++)
6405 RTVEC_ELT (v, i) = GEN_INT (val);
6406
6407 return gen_rtx_CONST_VECTOR (mode, v);
6408}
6409
051d0e2f
SN
6410/* Check OP is a legal scalar immediate for the MOVI instruction. */
6411
6412bool
6413aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6414{
6415 enum machine_mode vmode;
6416
6417 gcc_assert (!VECTOR_MODE_P (mode));
6418 vmode = aarch64_preferred_simd_mode (mode);
6419 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6420 int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
6421 NULL, NULL, NULL, NULL);
6422 return retval;
6423}
6424
43e9d192
IB
6425/* Construct and return a PARALLEL RTX vector. */
6426rtx
6427aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6428{
6429 int nunits = GET_MODE_NUNITS (mode);
6430 rtvec v = rtvec_alloc (nunits / 2);
6431 int base = high ? nunits / 2 : 0;
6432 rtx t1;
6433 int i;
6434
6435 for (i=0; i < nunits / 2; i++)
6436 RTVEC_ELT (v, i) = GEN_INT (base + i);
6437
6438 t1 = gen_rtx_PARALLEL (mode, v);
6439 return t1;
6440}
6441
6442/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6443 HIGH (exclusive). */
6444void
6445aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6446{
6447 HOST_WIDE_INT lane;
6448 gcc_assert (GET_CODE (operand) == CONST_INT);
6449 lane = INTVAL (operand);
6450
6451 if (lane < low || lane >= high)
6452 error ("lane out of range");
6453}
6454
6455void
6456aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6457{
6458 gcc_assert (GET_CODE (operand) == CONST_INT);
6459 HOST_WIDE_INT lane = INTVAL (operand);
6460
6461 if (lane < low || lane >= high)
6462 error ("constant out of range");
6463}
6464
6465/* Emit code to reinterpret one AdvSIMD type as another,
6466 without altering bits. */
6467void
6468aarch64_simd_reinterpret (rtx dest, rtx src)
6469{
6470 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6471}
6472
6473/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6474 registers). */
6475void
6476aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6477 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6478 rtx op1)
6479{
6480 rtx mem = gen_rtx_MEM (mode, destaddr);
6481 rtx tmp1 = gen_reg_rtx (mode);
6482 rtx tmp2 = gen_reg_rtx (mode);
6483
6484 emit_insn (intfn (tmp1, op1, tmp2));
6485
6486 emit_move_insn (mem, tmp1);
6487 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6488 emit_move_insn (mem, tmp2);
6489}
6490
6491/* Return TRUE if OP is a valid vector addressing mode. */
6492bool
6493aarch64_simd_mem_operand_p (rtx op)
6494{
6495 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6496 || GET_CODE (XEXP (op, 0)) == REG);
6497}
6498
6499/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6500 not to early-clobber SRC registers in the process.
6501
6502 We assume that the operands described by SRC and DEST represent a
6503 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6504 number of components into which the copy has been decomposed. */
6505void
6506aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6507 rtx *src, unsigned int count)
6508{
6509 unsigned int i;
6510
6511 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6512 || REGNO (operands[0]) < REGNO (operands[1]))
6513 {
6514 for (i = 0; i < count; i++)
6515 {
6516 operands[2 * i] = dest[i];
6517 operands[2 * i + 1] = src[i];
6518 }
6519 }
6520 else
6521 {
6522 for (i = 0; i < count; i++)
6523 {
6524 operands[2 * i] = dest[count - i - 1];
6525 operands[2 * i + 1] = src[count - i - 1];
6526 }
6527 }
6528}
6529
6530/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6531 one of VSTRUCT modes: OI, CI or XI. */
6532int
6533aarch64_simd_attr_length_move (rtx insn)
6534{
43e9d192
IB
6535 enum machine_mode mode;
6536
6537 extract_insn_cached (insn);
6538
6539 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6540 {
6541 mode = GET_MODE (recog_data.operand[0]);
6542 switch (mode)
6543 {
6544 case OImode:
6545 return 8;
6546 case CImode:
6547 return 12;
6548 case XImode:
6549 return 16;
6550 default:
6551 gcc_unreachable ();
6552 }
6553 }
6554 return 4;
6555}
6556
db0253a4
TB
6557/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6558 alignment of a vector to 128 bits. */
6559static HOST_WIDE_INT
6560aarch64_simd_vector_alignment (const_tree type)
6561{
6562 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6563 return MIN (align, 128);
6564}
6565
6566/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6567static bool
6568aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6569{
6570 if (is_packed)
6571 return false;
6572
6573 /* We guarantee alignment for vectors up to 128-bits. */
6574 if (tree_int_cst_compare (TYPE_SIZE (type),
6575 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6576 return false;
6577
6578 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6579 return true;
6580}
6581
4369c11e
TB
6582/* If VALS is a vector constant that can be loaded into a register
6583 using DUP, generate instructions to do so and return an RTX to
6584 assign to the register. Otherwise return NULL_RTX. */
6585static rtx
6586aarch64_simd_dup_constant (rtx vals)
6587{
6588 enum machine_mode mode = GET_MODE (vals);
6589 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6590 int n_elts = GET_MODE_NUNITS (mode);
6591 bool all_same = true;
6592 rtx x;
6593 int i;
6594
6595 if (GET_CODE (vals) != CONST_VECTOR)
6596 return NULL_RTX;
6597
6598 for (i = 1; i < n_elts; ++i)
6599 {
6600 x = CONST_VECTOR_ELT (vals, i);
6601 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6602 all_same = false;
6603 }
6604
6605 if (!all_same)
6606 return NULL_RTX;
6607
6608 /* We can load this constant by using DUP and a constant in a
6609 single ARM register. This will be cheaper than a vector
6610 load. */
6611 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6612 return gen_rtx_VEC_DUPLICATE (mode, x);
6613}
6614
6615
6616/* Generate code to load VALS, which is a PARALLEL containing only
6617 constants (for vec_init) or CONST_VECTOR, efficiently into a
6618 register. Returns an RTX to copy into the register, or NULL_RTX
6619 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6620static rtx
4369c11e
TB
6621aarch64_simd_make_constant (rtx vals)
6622{
6623 enum machine_mode mode = GET_MODE (vals);
6624 rtx const_dup;
6625 rtx const_vec = NULL_RTX;
6626 int n_elts = GET_MODE_NUNITS (mode);
6627 int n_const = 0;
6628 int i;
6629
6630 if (GET_CODE (vals) == CONST_VECTOR)
6631 const_vec = vals;
6632 else if (GET_CODE (vals) == PARALLEL)
6633 {
6634 /* A CONST_VECTOR must contain only CONST_INTs and
6635 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6636 Only store valid constants in a CONST_VECTOR. */
6637 for (i = 0; i < n_elts; ++i)
6638 {
6639 rtx x = XVECEXP (vals, 0, i);
6640 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6641 n_const++;
6642 }
6643 if (n_const == n_elts)
6644 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6645 }
6646 else
6647 gcc_unreachable ();
6648
6649 if (const_vec != NULL_RTX
6650 && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6651 NULL, NULL, NULL))
6652 /* Load using MOVI/MVNI. */
6653 return const_vec;
6654 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6655 /* Loaded using DUP. */
6656 return const_dup;
6657 else if (const_vec != NULL_RTX)
6658 /* Load from constant pool. We can not take advantage of single-cycle
6659 LD1 because we need a PC-relative addressing mode. */
6660 return const_vec;
6661 else
6662 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6663 We can not construct an initializer. */
6664 return NULL_RTX;
6665}
6666
6667void
6668aarch64_expand_vector_init (rtx target, rtx vals)
6669{
6670 enum machine_mode mode = GET_MODE (target);
6671 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6672 int n_elts = GET_MODE_NUNITS (mode);
6673 int n_var = 0, one_var = -1;
6674 bool all_same = true;
6675 rtx x, mem;
6676 int i;
6677
6678 x = XVECEXP (vals, 0, 0);
6679 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6680 n_var = 1, one_var = 0;
6681
6682 for (i = 1; i < n_elts; ++i)
6683 {
6684 x = XVECEXP (vals, 0, i);
6685 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6686 ++n_var, one_var = i;
6687
6688 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6689 all_same = false;
6690 }
6691
6692 if (n_var == 0)
6693 {
6694 rtx constant = aarch64_simd_make_constant (vals);
6695 if (constant != NULL_RTX)
6696 {
6697 emit_move_insn (target, constant);
6698 return;
6699 }
6700 }
6701
6702 /* Splat a single non-constant element if we can. */
6703 if (all_same)
6704 {
6705 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6706 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6707 return;
6708 }
6709
6710 /* One field is non-constant. Load constant then overwrite varying
6711 field. This is more efficient than using the stack. */
6712 if (n_var == 1)
6713 {
6714 rtx copy = copy_rtx (vals);
6715 rtx index = GEN_INT (one_var);
6716 enum insn_code icode;
6717
6718 /* Load constant part of vector, substitute neighboring value for
6719 varying element. */
6720 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6721 aarch64_expand_vector_init (target, copy);
6722
6723 /* Insert variable. */
6724 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6725 icode = optab_handler (vec_set_optab, mode);
6726 gcc_assert (icode != CODE_FOR_nothing);
6727 emit_insn (GEN_FCN (icode) (target, x, index));
6728 return;
6729 }
6730
6731 /* Construct the vector in memory one field at a time
6732 and load the whole vector. */
6733 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6734 for (i = 0; i < n_elts; i++)
6735 emit_move_insn (adjust_address_nv (mem, inner_mode,
6736 i * GET_MODE_SIZE (inner_mode)),
6737 XVECEXP (vals, 0, i));
6738 emit_move_insn (target, mem);
6739
6740}
6741
43e9d192
IB
6742static unsigned HOST_WIDE_INT
6743aarch64_shift_truncation_mask (enum machine_mode mode)
6744{
6745 return
6746 (aarch64_vector_mode_supported_p (mode)
6747 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6748}
6749
6750#ifndef TLS_SECTION_ASM_FLAG
6751#define TLS_SECTION_ASM_FLAG 'T'
6752#endif
6753
6754void
6755aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6756 tree decl ATTRIBUTE_UNUSED)
6757{
6758 char flagchars[10], *f = flagchars;
6759
6760 /* If we have already declared this section, we can use an
6761 abbreviated form to switch back to it -- unless this section is
6762 part of a COMDAT groups, in which case GAS requires the full
6763 declaration every time. */
6764 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6765 && (flags & SECTION_DECLARED))
6766 {
6767 fprintf (asm_out_file, "\t.section\t%s\n", name);
6768 return;
6769 }
6770
6771 if (!(flags & SECTION_DEBUG))
6772 *f++ = 'a';
6773 if (flags & SECTION_WRITE)
6774 *f++ = 'w';
6775 if (flags & SECTION_CODE)
6776 *f++ = 'x';
6777 if (flags & SECTION_SMALL)
6778 *f++ = 's';
6779 if (flags & SECTION_MERGE)
6780 *f++ = 'M';
6781 if (flags & SECTION_STRINGS)
6782 *f++ = 'S';
6783 if (flags & SECTION_TLS)
6784 *f++ = TLS_SECTION_ASM_FLAG;
6785 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6786 *f++ = 'G';
6787 *f = '\0';
6788
6789 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6790
6791 if (!(flags & SECTION_NOTYPE))
6792 {
6793 const char *type;
6794 const char *format;
6795
6796 if (flags & SECTION_BSS)
6797 type = "nobits";
6798 else
6799 type = "progbits";
6800
6801#ifdef TYPE_OPERAND_FMT
6802 format = "," TYPE_OPERAND_FMT;
6803#else
6804 format = ",@%s";
6805#endif
6806
6807 fprintf (asm_out_file, format, type);
6808
6809 if (flags & SECTION_ENTSIZE)
6810 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6811 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6812 {
6813 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6814 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6815 else
6816 fprintf (asm_out_file, ",%s,comdat",
6817 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6818 }
6819 }
6820
6821 putc ('\n', asm_out_file);
6822}
6823
6824/* Select a format to encode pointers in exception handling data. */
6825int
6826aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6827{
6828 int type;
6829 switch (aarch64_cmodel)
6830 {
6831 case AARCH64_CMODEL_TINY:
6832 case AARCH64_CMODEL_TINY_PIC:
6833 case AARCH64_CMODEL_SMALL:
6834 case AARCH64_CMODEL_SMALL_PIC:
6835 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6836 for everything. */
6837 type = DW_EH_PE_sdata4;
6838 break;
6839 default:
6840 /* No assumptions here. 8-byte relocs required. */
6841 type = DW_EH_PE_sdata8;
6842 break;
6843 }
6844 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6845}
6846
0462169c
SN
6847/* Emit load exclusive. */
6848
6849static void
6850aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6851 rtx mem, rtx model_rtx)
6852{
6853 rtx (*gen) (rtx, rtx, rtx);
6854
6855 switch (mode)
6856 {
6857 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6858 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6859 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6860 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6861 default:
6862 gcc_unreachable ();
6863 }
6864
6865 emit_insn (gen (rval, mem, model_rtx));
6866}
6867
6868/* Emit store exclusive. */
6869
6870static void
6871aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6872 rtx rval, rtx mem, rtx model_rtx)
6873{
6874 rtx (*gen) (rtx, rtx, rtx, rtx);
6875
6876 switch (mode)
6877 {
6878 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6879 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6880 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6881 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6882 default:
6883 gcc_unreachable ();
6884 }
6885
6886 emit_insn (gen (bval, rval, mem, model_rtx));
6887}
6888
6889/* Mark the previous jump instruction as unlikely. */
6890
6891static void
6892aarch64_emit_unlikely_jump (rtx insn)
6893{
6894 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6895
6896 insn = emit_jump_insn (insn);
6897 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6898}
6899
6900/* Expand a compare and swap pattern. */
6901
6902void
6903aarch64_expand_compare_and_swap (rtx operands[])
6904{
6905 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6906 enum machine_mode mode, cmp_mode;
6907 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6908
6909 bval = operands[0];
6910 rval = operands[1];
6911 mem = operands[2];
6912 oldval = operands[3];
6913 newval = operands[4];
6914 is_weak = operands[5];
6915 mod_s = operands[6];
6916 mod_f = operands[7];
6917 mode = GET_MODE (mem);
6918 cmp_mode = mode;
6919
6920 /* Normally the succ memory model must be stronger than fail, but in the
6921 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6922 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6923
6924 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6925 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6926 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6927
6928 switch (mode)
6929 {
6930 case QImode:
6931 case HImode:
6932 /* For short modes, we're going to perform the comparison in SImode,
6933 so do the zero-extension now. */
6934 cmp_mode = SImode;
6935 rval = gen_reg_rtx (SImode);
6936 oldval = convert_modes (SImode, mode, oldval, true);
6937 /* Fall through. */
6938
6939 case SImode:
6940 case DImode:
6941 /* Force the value into a register if needed. */
6942 if (!aarch64_plus_operand (oldval, mode))
6943 oldval = force_reg (cmp_mode, oldval);
6944 break;
6945
6946 default:
6947 gcc_unreachable ();
6948 }
6949
6950 switch (mode)
6951 {
6952 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6953 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6954 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6955 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6956 default:
6957 gcc_unreachable ();
6958 }
6959
6960 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6961
6962 if (mode == QImode || mode == HImode)
6963 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6964
6965 x = gen_rtx_REG (CCmode, CC_REGNUM);
6966 x = gen_rtx_EQ (SImode, x, const0_rtx);
6967 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6968}
6969
6970/* Split a compare and swap pattern. */
6971
6972void
6973aarch64_split_compare_and_swap (rtx operands[])
6974{
6975 rtx rval, mem, oldval, newval, scratch;
6976 enum machine_mode mode;
0462169c
SN
6977 bool is_weak;
6978 rtx label1, label2, x, cond;
6979
6980 rval = operands[0];
6981 mem = operands[1];
6982 oldval = operands[2];
6983 newval = operands[3];
6984 is_weak = (operands[4] != const0_rtx);
0462169c
SN
6985 scratch = operands[7];
6986 mode = GET_MODE (mem);
6987
6988 label1 = NULL_RTX;
6989 if (!is_weak)
6990 {
6991 label1 = gen_label_rtx ();
6992 emit_label (label1);
6993 }
6994 label2 = gen_label_rtx ();
6995
6996 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6997
6998 cond = aarch64_gen_compare_reg (NE, rval, oldval);
6999 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7000 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7001 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7002 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7003
7004 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7005
7006 if (!is_weak)
7007 {
7008 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7009 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7010 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7011 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7012 }
7013 else
7014 {
7015 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7016 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7017 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7018 }
7019
7020 emit_label (label2);
7021}
7022
7023/* Split an atomic operation. */
7024
7025void
7026aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7027 rtx value, rtx model_rtx, rtx cond)
7028{
7029 enum machine_mode mode = GET_MODE (mem);
7030 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7031 rtx label, x;
7032
7033 label = gen_label_rtx ();
7034 emit_label (label);
7035
7036 if (new_out)
7037 new_out = gen_lowpart (wmode, new_out);
7038 if (old_out)
7039 old_out = gen_lowpart (wmode, old_out);
7040 else
7041 old_out = new_out;
7042 value = simplify_gen_subreg (wmode, value, mode, 0);
7043
7044 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7045
7046 switch (code)
7047 {
7048 case SET:
7049 new_out = value;
7050 break;
7051
7052 case NOT:
7053 x = gen_rtx_AND (wmode, old_out, value);
7054 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7055 x = gen_rtx_NOT (wmode, new_out);
7056 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7057 break;
7058
7059 case MINUS:
7060 if (CONST_INT_P (value))
7061 {
7062 value = GEN_INT (-INTVAL (value));
7063 code = PLUS;
7064 }
7065 /* Fall through. */
7066
7067 default:
7068 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7069 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7070 break;
7071 }
7072
7073 aarch64_emit_store_exclusive (mode, cond, mem,
7074 gen_lowpart (mode, new_out), model_rtx);
7075
7076 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7077 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7078 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7079 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7080}
7081
43e9d192
IB
7082static void
7083aarch64_start_file (void)
7084{
7085 if (selected_arch)
7086 asm_fprintf (asm_out_file, "\t.arch %s\n", selected_arch->name);
7087 else if (selected_cpu)
7088 asm_fprintf (asm_out_file, "\t.cpu %s\n", selected_cpu->name);
7089 default_file_start();
7090}
7091
7092/* Target hook for c_mode_for_suffix. */
7093static enum machine_mode
7094aarch64_c_mode_for_suffix (char suffix)
7095{
7096 if (suffix == 'q')
7097 return TFmode;
7098
7099 return VOIDmode;
7100}
7101
3520f7cc
JG
7102/* We can only represent floating point constants which will fit in
7103 "quarter-precision" values. These values are characterised by
7104 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7105 by:
7106
7107 (-1)^s * (n/16) * 2^r
7108
7109 Where:
7110 's' is the sign bit.
7111 'n' is an integer in the range 16 <= n <= 31.
7112 'r' is an integer in the range -3 <= r <= 4. */
7113
7114/* Return true iff X can be represented by a quarter-precision
7115 floating point immediate operand X. Note, we cannot represent 0.0. */
7116bool
7117aarch64_float_const_representable_p (rtx x)
7118{
7119 /* This represents our current view of how many bits
7120 make up the mantissa. */
7121 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7122 int exponent;
3520f7cc
JG
7123 unsigned HOST_WIDE_INT mantissa, mask;
7124 HOST_WIDE_INT m1, m2;
7125 REAL_VALUE_TYPE r, m;
7126
7127 if (!CONST_DOUBLE_P (x))
7128 return false;
7129
7130 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7131
7132 /* We cannot represent infinities, NaNs or +/-zero. We won't
7133 know if we have +zero until we analyse the mantissa, but we
7134 can reject the other invalid values. */
7135 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7136 || REAL_VALUE_MINUS_ZERO (r))
7137 return false;
7138
ba96cdfb 7139 /* Extract exponent. */
3520f7cc
JG
7140 r = real_value_abs (&r);
7141 exponent = REAL_EXP (&r);
7142
7143 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7144 highest (sign) bit, with a fixed binary point at bit point_pos.
7145 m1 holds the low part of the mantissa, m2 the high part.
7146 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7147 bits for the mantissa, this can fail (low bits will be lost). */
7148 real_ldexp (&m, &r, point_pos - exponent);
7149 REAL_VALUE_TO_INT (&m1, &m2, m);
7150
7151 /* If the low part of the mantissa has bits set we cannot represent
7152 the value. */
7153 if (m1 != 0)
7154 return false;
7155 /* We have rejected the lower HOST_WIDE_INT, so update our
7156 understanding of how many bits lie in the mantissa and
7157 look only at the high HOST_WIDE_INT. */
7158 mantissa = m2;
7159 point_pos -= HOST_BITS_PER_WIDE_INT;
7160
7161 /* We can only represent values with a mantissa of the form 1.xxxx. */
7162 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7163 if ((mantissa & mask) != 0)
7164 return false;
7165
7166 /* Having filtered unrepresentable values, we may now remove all
7167 but the highest 5 bits. */
7168 mantissa >>= point_pos - 5;
7169
7170 /* We cannot represent the value 0.0, so reject it. This is handled
7171 elsewhere. */
7172 if (mantissa == 0)
7173 return false;
7174
7175 /* Then, as bit 4 is always set, we can mask it off, leaving
7176 the mantissa in the range [0, 15]. */
7177 mantissa &= ~(1 << 4);
7178 gcc_assert (mantissa <= 15);
7179
7180 /* GCC internally does not use IEEE754-like encoding (where normalized
7181 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7182 Our mantissa values are shifted 4 places to the left relative to
7183 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7184 by 5 places to correct for GCC's representation. */
7185 exponent = 5 - exponent;
7186
7187 return (exponent >= 0 && exponent <= 7);
7188}
7189
7190char*
7191aarch64_output_simd_mov_immediate (rtx *const_vector,
7192 enum machine_mode mode,
7193 unsigned width)
7194{
7195 int is_valid;
7196 unsigned char widthc;
7197 int lane_width_bits;
7198 static char templ[40];
7199 int shift = 0, mvn = 0;
7200 const char *mnemonic;
7201 unsigned int lane_count = 0;
7202
7203 is_valid =
7204 aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7205 const_vector, &lane_width_bits,
7206 &widthc, &mvn, &shift);
7207 gcc_assert (is_valid);
7208
7209 mode = GET_MODE_INNER (mode);
7210 if (mode == SFmode || mode == DFmode)
7211 {
7212 bool zero_p =
7213 aarch64_float_const_zero_rtx_p (*const_vector);
7214 gcc_assert (shift == 0);
7215 mnemonic = zero_p ? "movi" : "fmov";
7216 }
7217 else
7218 mnemonic = mvn ? "mvni" : "movi";
7219
7220 gcc_assert (lane_width_bits != 0);
7221 lane_count = width / lane_width_bits;
7222
7223 if (lane_count == 1)
7224 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7225 else if (shift)
7226 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7227 mnemonic, lane_count, widthc, shift);
7228 else
7229 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7230 mnemonic, lane_count, widthc);
7231 return templ;
7232}
7233
88b08073
JG
7234/* Split operands into moves from op[1] + op[2] into op[0]. */
7235
7236void
7237aarch64_split_combinev16qi (rtx operands[3])
7238{
7239 unsigned int dest = REGNO (operands[0]);
7240 unsigned int src1 = REGNO (operands[1]);
7241 unsigned int src2 = REGNO (operands[2]);
7242 enum machine_mode halfmode = GET_MODE (operands[1]);
7243 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7244 rtx destlo, desthi;
7245
7246 gcc_assert (halfmode == V16QImode);
7247
7248 if (src1 == dest && src2 == dest + halfregs)
7249 {
7250 /* No-op move. Can't split to nothing; emit something. */
7251 emit_note (NOTE_INSN_DELETED);
7252 return;
7253 }
7254
7255 /* Preserve register attributes for variable tracking. */
7256 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7257 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7258 GET_MODE_SIZE (halfmode));
7259
7260 /* Special case of reversed high/low parts. */
7261 if (reg_overlap_mentioned_p (operands[2], destlo)
7262 && reg_overlap_mentioned_p (operands[1], desthi))
7263 {
7264 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7265 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7266 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7267 }
7268 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7269 {
7270 /* Try to avoid unnecessary moves if part of the result
7271 is in the right place already. */
7272 if (src1 != dest)
7273 emit_move_insn (destlo, operands[1]);
7274 if (src2 != dest + halfregs)
7275 emit_move_insn (desthi, operands[2]);
7276 }
7277 else
7278 {
7279 if (src2 != dest + halfregs)
7280 emit_move_insn (desthi, operands[2]);
7281 if (src1 != dest)
7282 emit_move_insn (destlo, operands[1]);
7283 }
7284}
7285
7286/* vec_perm support. */
7287
7288#define MAX_VECT_LEN 16
7289
7290struct expand_vec_perm_d
7291{
7292 rtx target, op0, op1;
7293 unsigned char perm[MAX_VECT_LEN];
7294 enum machine_mode vmode;
7295 unsigned char nelt;
7296 bool one_vector_p;
7297 bool testing_p;
7298};
7299
7300/* Generate a variable permutation. */
7301
7302static void
7303aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7304{
7305 enum machine_mode vmode = GET_MODE (target);
7306 bool one_vector_p = rtx_equal_p (op0, op1);
7307
7308 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7309 gcc_checking_assert (GET_MODE (op0) == vmode);
7310 gcc_checking_assert (GET_MODE (op1) == vmode);
7311 gcc_checking_assert (GET_MODE (sel) == vmode);
7312 gcc_checking_assert (TARGET_SIMD);
7313
7314 if (one_vector_p)
7315 {
7316 if (vmode == V8QImode)
7317 {
7318 /* Expand the argument to a V16QI mode by duplicating it. */
7319 rtx pair = gen_reg_rtx (V16QImode);
7320 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7321 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7322 }
7323 else
7324 {
7325 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7326 }
7327 }
7328 else
7329 {
7330 rtx pair;
7331
7332 if (vmode == V8QImode)
7333 {
7334 pair = gen_reg_rtx (V16QImode);
7335 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7336 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7337 }
7338 else
7339 {
7340 pair = gen_reg_rtx (OImode);
7341 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7342 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7343 }
7344 }
7345}
7346
7347void
7348aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7349{
7350 enum machine_mode vmode = GET_MODE (target);
7351 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7352 bool one_vector_p = rtx_equal_p (op0, op1);
7353 rtx rmask[MAX_VECT_LEN], mask;
7354
7355 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7356
7357 /* The TBL instruction does not use a modulo index, so we must take care
7358 of that ourselves. */
7359 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7360 for (i = 0; i < nelt; ++i)
7361 rmask[i] = mask;
7362 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7363 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7364
7365 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7366}
7367
cc4d934f
JG
7368/* Recognize patterns suitable for the TRN instructions. */
7369static bool
7370aarch64_evpc_trn (struct expand_vec_perm_d *d)
7371{
7372 unsigned int i, odd, mask, nelt = d->nelt;
7373 rtx out, in0, in1, x;
7374 rtx (*gen) (rtx, rtx, rtx);
7375 enum machine_mode vmode = d->vmode;
7376
7377 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7378 return false;
7379
7380 /* Note that these are little-endian tests.
7381 We correct for big-endian later. */
7382 if (d->perm[0] == 0)
7383 odd = 0;
7384 else if (d->perm[0] == 1)
7385 odd = 1;
7386 else
7387 return false;
7388 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7389
7390 for (i = 0; i < nelt; i += 2)
7391 {
7392 if (d->perm[i] != i + odd)
7393 return false;
7394 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7395 return false;
7396 }
7397
7398 /* Success! */
7399 if (d->testing_p)
7400 return true;
7401
7402 in0 = d->op0;
7403 in1 = d->op1;
7404 if (BYTES_BIG_ENDIAN)
7405 {
7406 x = in0, in0 = in1, in1 = x;
7407 odd = !odd;
7408 }
7409 out = d->target;
7410
7411 if (odd)
7412 {
7413 switch (vmode)
7414 {
7415 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7416 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7417 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7418 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7419 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7420 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7421 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7422 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7423 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7424 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7425 default:
7426 return false;
7427 }
7428 }
7429 else
7430 {
7431 switch (vmode)
7432 {
7433 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7434 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7435 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7436 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7437 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7438 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7439 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7440 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7441 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7442 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7443 default:
7444 return false;
7445 }
7446 }
7447
7448 emit_insn (gen (out, in0, in1));
7449 return true;
7450}
7451
7452/* Recognize patterns suitable for the UZP instructions. */
7453static bool
7454aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7455{
7456 unsigned int i, odd, mask, nelt = d->nelt;
7457 rtx out, in0, in1, x;
7458 rtx (*gen) (rtx, rtx, rtx);
7459 enum machine_mode vmode = d->vmode;
7460
7461 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7462 return false;
7463
7464 /* Note that these are little-endian tests.
7465 We correct for big-endian later. */
7466 if (d->perm[0] == 0)
7467 odd = 0;
7468 else if (d->perm[0] == 1)
7469 odd = 1;
7470 else
7471 return false;
7472 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7473
7474 for (i = 0; i < nelt; i++)
7475 {
7476 unsigned elt = (i * 2 + odd) & mask;
7477 if (d->perm[i] != elt)
7478 return false;
7479 }
7480
7481 /* Success! */
7482 if (d->testing_p)
7483 return true;
7484
7485 in0 = d->op0;
7486 in1 = d->op1;
7487 if (BYTES_BIG_ENDIAN)
7488 {
7489 x = in0, in0 = in1, in1 = x;
7490 odd = !odd;
7491 }
7492 out = d->target;
7493
7494 if (odd)
7495 {
7496 switch (vmode)
7497 {
7498 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7499 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7500 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7501 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7502 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7503 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7504 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7505 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7506 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7507 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7508 default:
7509 return false;
7510 }
7511 }
7512 else
7513 {
7514 switch (vmode)
7515 {
7516 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7517 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7518 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7519 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7520 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7521 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7522 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7523 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7524 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7525 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7526 default:
7527 return false;
7528 }
7529 }
7530
7531 emit_insn (gen (out, in0, in1));
7532 return true;
7533}
7534
7535/* Recognize patterns suitable for the ZIP instructions. */
7536static bool
7537aarch64_evpc_zip (struct expand_vec_perm_d *d)
7538{
7539 unsigned int i, high, mask, nelt = d->nelt;
7540 rtx out, in0, in1, x;
7541 rtx (*gen) (rtx, rtx, rtx);
7542 enum machine_mode vmode = d->vmode;
7543
7544 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7545 return false;
7546
7547 /* Note that these are little-endian tests.
7548 We correct for big-endian later. */
7549 high = nelt / 2;
7550 if (d->perm[0] == high)
7551 /* Do Nothing. */
7552 ;
7553 else if (d->perm[0] == 0)
7554 high = 0;
7555 else
7556 return false;
7557 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7558
7559 for (i = 0; i < nelt / 2; i++)
7560 {
7561 unsigned elt = (i + high) & mask;
7562 if (d->perm[i * 2] != elt)
7563 return false;
7564 elt = (elt + nelt) & mask;
7565 if (d->perm[i * 2 + 1] != elt)
7566 return false;
7567 }
7568
7569 /* Success! */
7570 if (d->testing_p)
7571 return true;
7572
7573 in0 = d->op0;
7574 in1 = d->op1;
7575 if (BYTES_BIG_ENDIAN)
7576 {
7577 x = in0, in0 = in1, in1 = x;
7578 high = !high;
7579 }
7580 out = d->target;
7581
7582 if (high)
7583 {
7584 switch (vmode)
7585 {
7586 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7587 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7588 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7589 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7590 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7591 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7592 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7593 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7594 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7595 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7596 default:
7597 return false;
7598 }
7599 }
7600 else
7601 {
7602 switch (vmode)
7603 {
7604 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7605 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7606 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7607 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7608 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7609 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7610 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7611 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7612 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7613 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7614 default:
7615 return false;
7616 }
7617 }
7618
7619 emit_insn (gen (out, in0, in1));
7620 return true;
7621}
7622
88b08073
JG
7623static bool
7624aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7625{
7626 rtx rperm[MAX_VECT_LEN], sel;
7627 enum machine_mode vmode = d->vmode;
7628 unsigned int i, nelt = d->nelt;
7629
7630 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7631 numbering of elements for big-endian, we must reverse the order. */
7632 if (BYTES_BIG_ENDIAN)
7633 return false;
7634
7635 if (d->testing_p)
7636 return true;
7637
7638 /* Generic code will try constant permutation twice. Once with the
7639 original mode and again with the elements lowered to QImode.
7640 So wait and don't do the selector expansion ourselves. */
7641 if (vmode != V8QImode && vmode != V16QImode)
7642 return false;
7643
7644 for (i = 0; i < nelt; ++i)
7645 rperm[i] = GEN_INT (d->perm[i]);
7646 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7647 sel = force_reg (vmode, sel);
7648
7649 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7650 return true;
7651}
7652
7653static bool
7654aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7655{
7656 /* The pattern matching functions above are written to look for a small
7657 number to begin the sequence (0, 1, N/2). If we begin with an index
7658 from the second operand, we can swap the operands. */
7659 if (d->perm[0] >= d->nelt)
7660 {
7661 unsigned i, nelt = d->nelt;
7662 rtx x;
7663
7664 for (i = 0; i < nelt; ++i)
7665 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7666
7667 x = d->op0;
7668 d->op0 = d->op1;
7669 d->op1 = x;
7670 }
7671
7672 if (TARGET_SIMD)
cc4d934f
JG
7673 {
7674 if (aarch64_evpc_zip (d))
7675 return true;
7676 else if (aarch64_evpc_uzp (d))
7677 return true;
7678 else if (aarch64_evpc_trn (d))
7679 return true;
7680 return aarch64_evpc_tbl (d);
7681 }
88b08073
JG
7682 return false;
7683}
7684
7685/* Expand a vec_perm_const pattern. */
7686
7687bool
7688aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7689{
7690 struct expand_vec_perm_d d;
7691 int i, nelt, which;
7692
7693 d.target = target;
7694 d.op0 = op0;
7695 d.op1 = op1;
7696
7697 d.vmode = GET_MODE (target);
7698 gcc_assert (VECTOR_MODE_P (d.vmode));
7699 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7700 d.testing_p = false;
7701
7702 for (i = which = 0; i < nelt; ++i)
7703 {
7704 rtx e = XVECEXP (sel, 0, i);
7705 int ei = INTVAL (e) & (2 * nelt - 1);
7706 which |= (ei < nelt ? 1 : 2);
7707 d.perm[i] = ei;
7708 }
7709
7710 switch (which)
7711 {
7712 default:
7713 gcc_unreachable ();
7714
7715 case 3:
7716 d.one_vector_p = false;
7717 if (!rtx_equal_p (op0, op1))
7718 break;
7719
7720 /* The elements of PERM do not suggest that only the first operand
7721 is used, but both operands are identical. Allow easier matching
7722 of the permutation by folding the permutation into the single
7723 input vector. */
7724 /* Fall Through. */
7725 case 2:
7726 for (i = 0; i < nelt; ++i)
7727 d.perm[i] &= nelt - 1;
7728 d.op0 = op1;
7729 d.one_vector_p = true;
7730 break;
7731
7732 case 1:
7733 d.op1 = op0;
7734 d.one_vector_p = true;
7735 break;
7736 }
7737
7738 return aarch64_expand_vec_perm_const_1 (&d);
7739}
7740
7741static bool
7742aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7743 const unsigned char *sel)
7744{
7745 struct expand_vec_perm_d d;
7746 unsigned int i, nelt, which;
7747 bool ret;
7748
7749 d.vmode = vmode;
7750 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7751 d.testing_p = true;
7752 memcpy (d.perm, sel, nelt);
7753
7754 /* Calculate whether all elements are in one vector. */
7755 for (i = which = 0; i < nelt; ++i)
7756 {
7757 unsigned char e = d.perm[i];
7758 gcc_assert (e < 2 * nelt);
7759 which |= (e < nelt ? 1 : 2);
7760 }
7761
7762 /* If all elements are from the second vector, reindex as if from the
7763 first vector. */
7764 if (which == 2)
7765 for (i = 0; i < nelt; ++i)
7766 d.perm[i] -= nelt;
7767
7768 /* Check whether the mask can be applied to a single vector. */
7769 d.one_vector_p = (which != 3);
7770
7771 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7772 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7773 if (!d.one_vector_p)
7774 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7775
7776 start_sequence ();
7777 ret = aarch64_expand_vec_perm_const_1 (&d);
7778 end_sequence ();
7779
7780 return ret;
7781}
7782
43e9d192
IB
7783#undef TARGET_ADDRESS_COST
7784#define TARGET_ADDRESS_COST aarch64_address_cost
7785
7786/* This hook will determines whether unnamed bitfields affect the alignment
7787 of the containing structure. The hook returns true if the structure
7788 should inherit the alignment requirements of an unnamed bitfield's
7789 type. */
7790#undef TARGET_ALIGN_ANON_BITFIELD
7791#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7792
7793#undef TARGET_ASM_ALIGNED_DI_OP
7794#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7795
7796#undef TARGET_ASM_ALIGNED_HI_OP
7797#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7798
7799#undef TARGET_ASM_ALIGNED_SI_OP
7800#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7801
7802#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7803#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7804 hook_bool_const_tree_hwi_hwi_const_tree_true
7805
7806#undef TARGET_ASM_FILE_START
7807#define TARGET_ASM_FILE_START aarch64_start_file
7808
7809#undef TARGET_ASM_OUTPUT_MI_THUNK
7810#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7811
7812#undef TARGET_ASM_SELECT_RTX_SECTION
7813#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7814
7815#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7816#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7817
7818#undef TARGET_BUILD_BUILTIN_VA_LIST
7819#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7820
7821#undef TARGET_CALLEE_COPIES
7822#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7823
7824#undef TARGET_CAN_ELIMINATE
7825#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7826
7827#undef TARGET_CANNOT_FORCE_CONST_MEM
7828#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7829
7830#undef TARGET_CONDITIONAL_REGISTER_USAGE
7831#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7832
7833/* Only the least significant bit is used for initialization guard
7834 variables. */
7835#undef TARGET_CXX_GUARD_MASK_BIT
7836#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7837
7838#undef TARGET_C_MODE_FOR_SUFFIX
7839#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7840
7841#ifdef TARGET_BIG_ENDIAN_DEFAULT
7842#undef TARGET_DEFAULT_TARGET_FLAGS
7843#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7844#endif
7845
7846#undef TARGET_CLASS_MAX_NREGS
7847#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7848
119103ca
JG
7849#undef TARGET_BUILTIN_DECL
7850#define TARGET_BUILTIN_DECL aarch64_builtin_decl
7851
43e9d192
IB
7852#undef TARGET_EXPAND_BUILTIN
7853#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7854
7855#undef TARGET_EXPAND_BUILTIN_VA_START
7856#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7857
7858#undef TARGET_FUNCTION_ARG
7859#define TARGET_FUNCTION_ARG aarch64_function_arg
7860
7861#undef TARGET_FUNCTION_ARG_ADVANCE
7862#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7863
7864#undef TARGET_FUNCTION_ARG_BOUNDARY
7865#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7866
7867#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7868#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7869
7870#undef TARGET_FUNCTION_VALUE
7871#define TARGET_FUNCTION_VALUE aarch64_function_value
7872
7873#undef TARGET_FUNCTION_VALUE_REGNO_P
7874#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7875
7876#undef TARGET_FRAME_POINTER_REQUIRED
7877#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7878
7879#undef TARGET_GIMPLIFY_VA_ARG_EXPR
7880#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7881
7882#undef TARGET_INIT_BUILTINS
7883#define TARGET_INIT_BUILTINS aarch64_init_builtins
7884
7885#undef TARGET_LEGITIMATE_ADDRESS_P
7886#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7887
7888#undef TARGET_LEGITIMATE_CONSTANT_P
7889#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7890
7891#undef TARGET_LIBGCC_CMP_RETURN_MODE
7892#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7893
ac2b960f
YZ
7894#undef TARGET_MANGLE_TYPE
7895#define TARGET_MANGLE_TYPE aarch64_mangle_type
7896
43e9d192
IB
7897#undef TARGET_MEMORY_MOVE_COST
7898#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7899
7900#undef TARGET_MUST_PASS_IN_STACK
7901#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7902
7903/* This target hook should return true if accesses to volatile bitfields
7904 should use the narrowest mode possible. It should return false if these
7905 accesses should use the bitfield container type. */
7906#undef TARGET_NARROW_VOLATILE_BITFIELD
7907#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7908
7909#undef TARGET_OPTION_OVERRIDE
7910#define TARGET_OPTION_OVERRIDE aarch64_override_options
7911
7912#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7913#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7914 aarch64_override_options_after_change
7915
7916#undef TARGET_PASS_BY_REFERENCE
7917#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7918
7919#undef TARGET_PREFERRED_RELOAD_CLASS
7920#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7921
7922#undef TARGET_SECONDARY_RELOAD
7923#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7924
7925#undef TARGET_SHIFT_TRUNCATION_MASK
7926#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7927
7928#undef TARGET_SETUP_INCOMING_VARARGS
7929#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7930
7931#undef TARGET_STRUCT_VALUE_RTX
7932#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
7933
7934#undef TARGET_REGISTER_MOVE_COST
7935#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7936
7937#undef TARGET_RETURN_IN_MEMORY
7938#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7939
7940#undef TARGET_RETURN_IN_MSB
7941#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7942
7943#undef TARGET_RTX_COSTS
7944#define TARGET_RTX_COSTS aarch64_rtx_costs
7945
7946#undef TARGET_TRAMPOLINE_INIT
7947#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7948
7949#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7950#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7951
7952#undef TARGET_VECTOR_MODE_SUPPORTED_P
7953#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7954
7955#undef TARGET_ARRAY_MODE_SUPPORTED_P
7956#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7957
7958#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7959#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7960
42fc9a7f
JG
7961#undef TARGET_VECTORIZE_BUILTINS
7962#define TARGET_VECTORIZE_BUILTINS
7963
7964#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7965#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7966 aarch64_builtin_vectorized_function
7967
3b357264
JG
7968#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7969#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7970 aarch64_autovectorize_vector_sizes
7971
43e9d192
IB
7972/* Section anchor support. */
7973
7974#undef TARGET_MIN_ANCHOR_OFFSET
7975#define TARGET_MIN_ANCHOR_OFFSET -256
7976
7977/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
7978 byte offset; we can do much more for larger data types, but have no way
7979 to determine the size of the access. We assume accesses are aligned. */
7980#undef TARGET_MAX_ANCHOR_OFFSET
7981#define TARGET_MAX_ANCHOR_OFFSET 4095
7982
db0253a4
TB
7983#undef TARGET_VECTOR_ALIGNMENT
7984#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
7985
7986#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7987#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
7988 aarch64_simd_vector_alignment_reachable
7989
88b08073
JG
7990/* vec_perm support. */
7991
7992#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
7993#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
7994 aarch64_vectorize_vec_perm_const_ok
7995
70f09188 7996
706b2314 7997#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
7998#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
7999
43e9d192
IB
8000struct gcc_target targetm = TARGET_INITIALIZER;
8001
8002#include "gt-aarch64.h"