]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Update comment w.r.t SYMBOL_TINY_ABSOLUTE address model.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
43e9d192 1/* Machine description for AArch64 architecture.
d1e082c2 2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
29#include "regs.h"
30#include "df.h"
31#include "hard-reg-set.h"
32#include "output.h"
33#include "expr.h"
34#include "reload.h"
35#include "toplev.h"
36#include "target.h"
37#include "target-def.h"
38#include "targhooks.h"
39#include "ggc.h"
40#include "function.h"
41#include "tm_p.h"
42#include "recog.h"
43#include "langhooks.h"
44#include "diagnostic-core.h"
45#include "gimple.h"
46#include "optabs.h"
47#include "dwarf2.h"
48
49/* Classifies an address.
50
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
53
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
56
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
59
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
62
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
65
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
68
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
71
72enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
80};
81
82struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
88};
89
48063b9d
IB
90struct simd_immediate_info
91{
92 rtx value;
93 int shift;
94 int element_width;
48063b9d
IB
95 bool mvn;
96};
97
43e9d192
IB
98/* The current code model. */
99enum aarch64_code_model aarch64_cmodel;
100
101#ifdef HAVE_AS_TLS
102#undef TARGET_HAVE_TLS
103#define TARGET_HAVE_TLS 1
104#endif
105
106static bool aarch64_composite_type_p (const_tree, enum machine_mode);
107static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
108 const_tree,
109 enum machine_mode *, int *,
110 bool *);
111static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
112static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 113static void aarch64_override_options_after_change (void);
43e9d192
IB
114static bool aarch64_vector_mode_supported_p (enum machine_mode);
115static unsigned bit_count (unsigned HOST_WIDE_INT);
116static bool aarch64_const_vec_all_same_int_p (rtx,
117 HOST_WIDE_INT, HOST_WIDE_INT);
118
88b08073
JG
119static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
120 const unsigned char *sel);
121
43e9d192
IB
122/* The processor for which instructions should be scheduled. */
123enum aarch64_processor aarch64_tune = generic;
124
125/* The current tuning set. */
126const struct tune_params *aarch64_tune_params;
127
128/* Mask to specify which instructions we are allowed to generate. */
129unsigned long aarch64_isa_flags = 0;
130
131/* Mask to specify which instruction scheduling options should be used. */
132unsigned long aarch64_tune_flags = 0;
133
134/* Tuning parameters. */
135
136#if HAVE_DESIGNATED_INITIALIZERS
137#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
138#else
139#define NAMED_PARAM(NAME, VAL) (VAL)
140#endif
141
142#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
143__extension__
144#endif
145static const struct cpu_rtx_cost_table generic_rtx_cost_table =
146{
147 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
148 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
149 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
151 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
152 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
153 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
154 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
155 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
156 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
157 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
158 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
159};
160
161#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
162__extension__
163#endif
164static const struct cpu_addrcost_table generic_addrcost_table =
165{
166 NAMED_PARAM (pre_modify, 0),
167 NAMED_PARAM (post_modify, 0),
168 NAMED_PARAM (register_offset, 0),
169 NAMED_PARAM (register_extend, 0),
170 NAMED_PARAM (imm_offset, 0)
171};
172
173#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174__extension__
175#endif
176static const struct cpu_regmove_cost generic_regmove_cost =
177{
178 NAMED_PARAM (GP2GP, 1),
179 NAMED_PARAM (GP2FP, 2),
180 NAMED_PARAM (FP2GP, 2),
181 /* We currently do not provide direct support for TFmode Q->Q move.
182 Therefore we need to raise the cost above 2 in order to have
183 reload handle the situation. */
184 NAMED_PARAM (FP2FP, 4)
185};
186
187#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
188__extension__
189#endif
190static const struct tune_params generic_tunings =
191{
192 &generic_rtx_cost_table,
193 &generic_addrcost_table,
194 &generic_regmove_cost,
195 NAMED_PARAM (memmov_cost, 4)
196};
197
198/* A processor implementing AArch64. */
199struct processor
200{
201 const char *const name;
202 enum aarch64_processor core;
203 const char *arch;
204 const unsigned long flags;
205 const struct tune_params *const tune;
206};
207
208/* Processor cores implementing AArch64. */
209static const struct processor all_cores[] =
210{
211#define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
212 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
213#include "aarch64-cores.def"
214#undef AARCH64_CORE
215 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
216 {NULL, aarch64_none, NULL, 0, NULL}
217};
218
219/* Architectures implementing AArch64. */
220static const struct processor all_architectures[] =
221{
222#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
223 {NAME, CORE, #ARCH, FLAGS, NULL},
224#include "aarch64-arches.def"
225#undef AARCH64_ARCH
226 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
227 {NULL, aarch64_none, NULL, 0, NULL}
228};
229
230/* Target specification. These are populated as commandline arguments
231 are processed, or NULL if not specified. */
232static const struct processor *selected_arch;
233static const struct processor *selected_cpu;
234static const struct processor *selected_tune;
235
236#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
237
238/* An ISA extension in the co-processor and main instruction set space. */
239struct aarch64_option_extension
240{
241 const char *const name;
242 const unsigned long flags_on;
243 const unsigned long flags_off;
244};
245
246/* ISA extensions in AArch64. */
247static const struct aarch64_option_extension all_extensions[] =
248{
249#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
250 {NAME, FLAGS_ON, FLAGS_OFF},
251#include "aarch64-option-extensions.def"
252#undef AARCH64_OPT_EXTENSION
253 {NULL, 0, 0}
254};
255
256/* Used to track the size of an address when generating a pre/post
257 increment address. */
258static enum machine_mode aarch64_memory_reference_mode;
259
260/* Used to force GTY into this file. */
261static GTY(()) int gty_dummy;
262
263/* A table of valid AArch64 "bitmask immediate" values for
264 logical instructions. */
265
266#define AARCH64_NUM_BITMASKS 5334
267static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
268
269/* Did we set flag_omit_frame_pointer just so
270 aarch64_frame_pointer_required would be called? */
271static bool faked_omit_frame_pointer;
272
273typedef enum aarch64_cond_code
274{
275 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
276 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
277 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
278}
279aarch64_cc;
280
281#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
282
283/* The condition codes of the processor, and the inverse function. */
284static const char * const aarch64_condition_codes[] =
285{
286 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
287 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
288};
289
290/* Provide a mapping from gcc register numbers to dwarf register numbers. */
291unsigned
292aarch64_dbx_register_number (unsigned regno)
293{
294 if (GP_REGNUM_P (regno))
295 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
296 else if (regno == SP_REGNUM)
297 return AARCH64_DWARF_SP;
298 else if (FP_REGNUM_P (regno))
299 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
300
301 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
302 equivalent DWARF register. */
303 return DWARF_FRAME_REGISTERS;
304}
305
306/* Return TRUE if MODE is any of the large INT modes. */
307static bool
308aarch64_vect_struct_mode_p (enum machine_mode mode)
309{
310 return mode == OImode || mode == CImode || mode == XImode;
311}
312
313/* Return TRUE if MODE is any of the vector modes. */
314static bool
315aarch64_vector_mode_p (enum machine_mode mode)
316{
317 return aarch64_vector_mode_supported_p (mode)
318 || aarch64_vect_struct_mode_p (mode);
319}
320
321/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
322static bool
323aarch64_array_mode_supported_p (enum machine_mode mode,
324 unsigned HOST_WIDE_INT nelems)
325{
326 if (TARGET_SIMD
327 && AARCH64_VALID_SIMD_QREG_MODE (mode)
328 && (nelems >= 2 && nelems <= 4))
329 return true;
330
331 return false;
332}
333
334/* Implement HARD_REGNO_NREGS. */
335
336int
337aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
338{
339 switch (aarch64_regno_regclass (regno))
340 {
341 case FP_REGS:
342 case FP_LO_REGS:
343 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
344 default:
345 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
346 }
347 gcc_unreachable ();
348}
349
350/* Implement HARD_REGNO_MODE_OK. */
351
352int
353aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
354{
355 if (GET_MODE_CLASS (mode) == MODE_CC)
356 return regno == CC_REGNUM;
357
358 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
359 || regno == ARG_POINTER_REGNUM)
360 return mode == Pmode;
361
362 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
363 return 1;
364
365 if (FP_REGNUM_P (regno))
366 {
367 if (aarch64_vect_struct_mode_p (mode))
368 return
369 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
370 else
371 return 1;
372 }
373
374 return 0;
375}
376
377/* Return true if calls to DECL should be treated as
378 long-calls (ie called via a register). */
379static bool
380aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
381{
382 return false;
383}
384
385/* Return true if calls to symbol-ref SYM should be treated as
386 long-calls (ie called via a register). */
387bool
388aarch64_is_long_call_p (rtx sym)
389{
390 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
391}
392
393/* Return true if the offsets to a zero/sign-extract operation
394 represent an expression that matches an extend operation. The
395 operands represent the paramters from
396
397 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
398bool
399aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
400 rtx extract_imm)
401{
402 HOST_WIDE_INT mult_val, extract_val;
403
404 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
405 return false;
406
407 mult_val = INTVAL (mult_imm);
408 extract_val = INTVAL (extract_imm);
409
410 if (extract_val > 8
411 && extract_val < GET_MODE_BITSIZE (mode)
412 && exact_log2 (extract_val & ~7) > 0
413 && (extract_val & 7) <= 4
414 && mult_val == (1 << (extract_val & 7)))
415 return true;
416
417 return false;
418}
419
420/* Emit an insn that's a simple single-set. Both the operands must be
421 known to be valid. */
422inline static rtx
423emit_set_insn (rtx x, rtx y)
424{
425 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
426}
427
428/* X and Y are two things to compare using CODE. Emit the compare insn and
429 return the rtx for register 0 in the proper mode. */
430rtx
431aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
432{
433 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
434 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
435
436 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
437 return cc_reg;
438}
439
440/* Build the SYMBOL_REF for __tls_get_addr. */
441
442static GTY(()) rtx tls_get_addr_libfunc;
443
444rtx
445aarch64_tls_get_addr (void)
446{
447 if (!tls_get_addr_libfunc)
448 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
449 return tls_get_addr_libfunc;
450}
451
452/* Return the TLS model to use for ADDR. */
453
454static enum tls_model
455tls_symbolic_operand_type (rtx addr)
456{
457 enum tls_model tls_kind = TLS_MODEL_NONE;
458 rtx sym, addend;
459
460 if (GET_CODE (addr) == CONST)
461 {
462 split_const (addr, &sym, &addend);
463 if (GET_CODE (sym) == SYMBOL_REF)
464 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
465 }
466 else if (GET_CODE (addr) == SYMBOL_REF)
467 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
468
469 return tls_kind;
470}
471
472/* We'll allow lo_sum's in addresses in our legitimate addresses
473 so that combine would take care of combining addresses where
474 necessary, but for generation purposes, we'll generate the address
475 as :
476 RTL Absolute
477 tmp = hi (symbol_ref); adrp x1, foo
478 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
479 nop
480
481 PIC TLS
482 adrp x1, :got:foo adrp tmp, :tlsgd:foo
483 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
484 bl __tls_get_addr
485 nop
486
487 Load TLS symbol, depending on TLS mechanism and TLS access model.
488
489 Global Dynamic - Traditional TLS:
490 adrp tmp, :tlsgd:imm
491 add dest, tmp, #:tlsgd_lo12:imm
492 bl __tls_get_addr
493
494 Global Dynamic - TLS Descriptors:
495 adrp dest, :tlsdesc:imm
496 ldr tmp, [dest, #:tlsdesc_lo12:imm]
497 add dest, dest, #:tlsdesc_lo12:imm
498 blr tmp
499 mrs tp, tpidr_el0
500 add dest, dest, tp
501
502 Initial Exec:
503 mrs tp, tpidr_el0
504 adrp tmp, :gottprel:imm
505 ldr dest, [tmp, #:gottprel_lo12:imm]
506 add dest, dest, tp
507
508 Local Exec:
509 mrs tp, tpidr_el0
510 add t0, tp, #:tprel_hi12:imm
511 add t0, #:tprel_lo12_nc:imm
512*/
513
514static void
515aarch64_load_symref_appropriately (rtx dest, rtx imm,
516 enum aarch64_symbol_type type)
517{
518 switch (type)
519 {
520 case SYMBOL_SMALL_ABSOLUTE:
521 {
522 rtx tmp_reg = dest;
523 if (can_create_pseudo_p ())
524 {
525 tmp_reg = gen_reg_rtx (Pmode);
526 }
527
528 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
529 emit_insn (gen_add_losym (dest, tmp_reg, imm));
530 return;
531 }
532
a5350ddc
CSS
533 case SYMBOL_TINY_ABSOLUTE:
534 emit_insn (gen_rtx_SET (Pmode, dest, imm));
535 return;
536
43e9d192
IB
537 case SYMBOL_SMALL_GOT:
538 {
539 rtx tmp_reg = dest;
540 if (can_create_pseudo_p ())
e19c9de2 541 tmp_reg = gen_reg_rtx (Pmode);
43e9d192
IB
542 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
543 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
544 return;
545 }
546
547 case SYMBOL_SMALL_TLSGD:
548 {
549 rtx insns;
550 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
551
552 start_sequence ();
553 emit_call_insn (gen_tlsgd_small (result, imm));
554 insns = get_insns ();
555 end_sequence ();
556
557 RTL_CONST_CALL_P (insns) = 1;
558 emit_libcall_block (insns, dest, result, imm);
559 return;
560 }
561
562 case SYMBOL_SMALL_TLSDESC:
563 {
564 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
565 rtx tp;
566
567 emit_insn (gen_tlsdesc_small (imm));
568 tp = aarch64_load_tp (NULL);
569 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
570 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
571 return;
572 }
573
574 case SYMBOL_SMALL_GOTTPREL:
575 {
576 rtx tmp_reg = gen_reg_rtx (Pmode);
577 rtx tp = aarch64_load_tp (NULL);
578 emit_insn (gen_tlsie_small (tmp_reg, imm));
579 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
582 }
583
584 case SYMBOL_SMALL_TPREL:
585 {
586 rtx tp = aarch64_load_tp (NULL);
587 emit_insn (gen_tlsle_small (dest, tp, imm));
588 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
589 return;
590 }
591
592 default:
593 gcc_unreachable ();
594 }
595}
596
597/* Emit a move from SRC to DEST. Assume that the move expanders can
598 handle all moves if !can_create_pseudo_p (). The distinction is
599 important because, unlike emit_move_insn, the move expanders know
600 how to force Pmode objects into the constant pool even when the
601 constant pool address is not itself legitimate. */
602static rtx
603aarch64_emit_move (rtx dest, rtx src)
604{
605 return (can_create_pseudo_p ()
606 ? emit_move_insn (dest, src)
607 : emit_move_insn_1 (dest, src));
608}
609
610void
611aarch64_split_128bit_move (rtx dst, rtx src)
612{
613 rtx low_dst;
614
12dc6974
SN
615 enum machine_mode src_mode = GET_MODE (src);
616 enum machine_mode dst_mode = GET_MODE (dst);
617 int src_regno = REGNO (src);
618 int dst_regno = REGNO (dst);
619
620 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
43e9d192
IB
621
622 if (REG_P (dst) && REG_P (src))
623 {
12dc6974 624 gcc_assert (src_mode == TImode || src_mode == TFmode);
43e9d192
IB
625
626 /* Handle r -> w, w -> r. */
627 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
628 {
12dc6974
SN
629 switch (src_mode) {
630 case TImode:
631 emit_insn
632 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
633 emit_insn
634 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
635 return;
636 case TFmode:
637 emit_insn
638 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
639 emit_insn
640 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
641 return;
642 default:
643 gcc_unreachable ();
644 }
43e9d192
IB
645 }
646 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
647 {
12dc6974
SN
648 switch (src_mode) {
649 case TImode:
650 emit_insn
651 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
652 emit_insn
653 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
654 return;
655 case TFmode:
656 emit_insn
657 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
658 emit_insn
659 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
660 return;
661 default:
662 gcc_unreachable ();
663 }
43e9d192
IB
664 }
665 /* Fall through to r -> r cases. */
666 }
667
12dc6974
SN
668 switch (dst_mode) {
669 case TImode:
670 low_dst = gen_lowpart (word_mode, dst);
671 if (REG_P (low_dst)
672 && reg_overlap_mentioned_p (low_dst, src))
673 {
674 aarch64_emit_move (gen_highpart (word_mode, dst),
675 gen_highpart_mode (word_mode, TImode, src));
676 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
677 }
678 else
679 {
680 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
681 aarch64_emit_move (gen_highpart (word_mode, dst),
682 gen_highpart_mode (word_mode, TImode, src));
683 }
684 return;
685 case TFmode:
686 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
687 gen_rtx_REG (DFmode, src_regno));
688 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
689 gen_rtx_REG (DFmode, src_regno + 1));
690 return;
691 default:
692 gcc_unreachable ();
693 }
43e9d192
IB
694}
695
696bool
697aarch64_split_128bit_move_p (rtx dst, rtx src)
698{
699 return (! REG_P (src)
700 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
701}
702
8b033a8a
SN
703/* Split a complex SIMD combine. */
704
705void
706aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
707{
708 enum machine_mode src_mode = GET_MODE (src1);
709 enum machine_mode dst_mode = GET_MODE (dst);
710
711 gcc_assert (VECTOR_MODE_P (dst_mode));
712
713 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
714 {
715 rtx (*gen) (rtx, rtx, rtx);
716
717 switch (src_mode)
718 {
719 case V8QImode:
720 gen = gen_aarch64_simd_combinev8qi;
721 break;
722 case V4HImode:
723 gen = gen_aarch64_simd_combinev4hi;
724 break;
725 case V2SImode:
726 gen = gen_aarch64_simd_combinev2si;
727 break;
728 case V2SFmode:
729 gen = gen_aarch64_simd_combinev2sf;
730 break;
731 case DImode:
732 gen = gen_aarch64_simd_combinedi;
733 break;
734 case DFmode:
735 gen = gen_aarch64_simd_combinedf;
736 break;
737 default:
738 gcc_unreachable ();
739 }
740
741 emit_insn (gen (dst, src1, src2));
742 return;
743 }
744}
745
fd4842cd
SN
746/* Split a complex SIMD move. */
747
748void
749aarch64_split_simd_move (rtx dst, rtx src)
750{
751 enum machine_mode src_mode = GET_MODE (src);
752 enum machine_mode dst_mode = GET_MODE (dst);
753
754 gcc_assert (VECTOR_MODE_P (dst_mode));
755
756 if (REG_P (dst) && REG_P (src))
757 {
c59b7e28
SN
758 rtx (*gen) (rtx, rtx);
759
fd4842cd
SN
760 gcc_assert (VECTOR_MODE_P (src_mode));
761
762 switch (src_mode)
763 {
764 case V16QImode:
c59b7e28 765 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
766 break;
767 case V8HImode:
c59b7e28 768 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
769 break;
770 case V4SImode:
c59b7e28 771 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
772 break;
773 case V2DImode:
c59b7e28 774 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
775 break;
776 case V4SFmode:
c59b7e28 777 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
778 break;
779 case V2DFmode:
c59b7e28 780 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
781 break;
782 default:
783 gcc_unreachable ();
784 }
c59b7e28
SN
785
786 emit_insn (gen (dst, src));
fd4842cd
SN
787 return;
788 }
789}
790
43e9d192 791static rtx
e18b4a81 792aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
793{
794 if (can_create_pseudo_p ())
e18b4a81 795 return force_reg (mode, value);
43e9d192
IB
796 else
797 {
798 x = aarch64_emit_move (x, value);
799 return x;
800 }
801}
802
803
804static rtx
805aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
806{
9c023bf0 807 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
808 {
809 rtx high;
810 /* Load the full offset into a register. This
811 might be improvable in the future. */
812 high = GEN_INT (offset);
813 offset = 0;
e18b4a81
YZ
814 high = aarch64_force_temporary (mode, temp, high);
815 reg = aarch64_force_temporary (mode, temp,
816 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
817 }
818 return plus_constant (mode, reg, offset);
819}
820
821void
822aarch64_expand_mov_immediate (rtx dest, rtx imm)
823{
824 enum machine_mode mode = GET_MODE (dest);
825 unsigned HOST_WIDE_INT mask;
826 int i;
827 bool first;
828 unsigned HOST_WIDE_INT val;
829 bool subtargets;
830 rtx subtarget;
831 int one_match, zero_match;
832
833 gcc_assert (mode == SImode || mode == DImode);
834
835 /* Check on what type of symbol it is. */
836 if (GET_CODE (imm) == SYMBOL_REF
837 || GET_CODE (imm) == LABEL_REF
838 || GET_CODE (imm) == CONST)
839 {
840 rtx mem, base, offset;
841 enum aarch64_symbol_type sty;
842
843 /* If we have (const (plus symbol offset)), separate out the offset
844 before we start classifying the symbol. */
845 split_const (imm, &base, &offset);
846
847 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
848 switch (sty)
849 {
850 case SYMBOL_FORCE_TO_MEM:
851 if (offset != const0_rtx
852 && targetm.cannot_force_const_mem (mode, imm))
853 {
854 gcc_assert(can_create_pseudo_p ());
e18b4a81 855 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
856 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
857 aarch64_emit_move (dest, base);
858 return;
859 }
860 mem = force_const_mem (mode, imm);
861 gcc_assert (mem);
862 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
863 return;
864
865 case SYMBOL_SMALL_TLSGD:
866 case SYMBOL_SMALL_TLSDESC:
867 case SYMBOL_SMALL_GOTTPREL:
868 case SYMBOL_SMALL_GOT:
869 if (offset != const0_rtx)
870 {
871 gcc_assert(can_create_pseudo_p ());
e18b4a81 872 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
873 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
874 aarch64_emit_move (dest, base);
875 return;
876 }
877 /* FALLTHRU */
878
879 case SYMBOL_SMALL_TPREL:
880 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 881 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
882 aarch64_load_symref_appropriately (dest, imm, sty);
883 return;
884
885 default:
886 gcc_unreachable ();
887 }
888 }
889
890 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
891 {
892 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
893 return;
894 }
895
896 if (!CONST_INT_P (imm))
897 {
898 if (GET_CODE (imm) == HIGH)
899 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
900 else
901 {
902 rtx mem = force_const_mem (mode, imm);
903 gcc_assert (mem);
904 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
905 }
906
907 return;
908 }
909
910 if (mode == SImode)
911 {
912 /* We know we can't do this in 1 insn, and we must be able to do it
913 in two; so don't mess around looking for sequences that don't buy
914 us anything. */
915 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
916 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
917 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
918 return;
919 }
920
921 /* Remaining cases are all for DImode. */
922
923 val = INTVAL (imm);
924 subtargets = optimize && can_create_pseudo_p ();
925
926 one_match = 0;
927 zero_match = 0;
928 mask = 0xffff;
929
930 for (i = 0; i < 64; i += 16, mask <<= 16)
931 {
932 if ((val & mask) == 0)
933 zero_match++;
934 else if ((val & mask) == mask)
935 one_match++;
936 }
937
938 if (one_match == 2)
939 {
940 mask = 0xffff;
941 for (i = 0; i < 64; i += 16, mask <<= 16)
942 {
943 if ((val & mask) != mask)
944 {
945 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
946 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
947 GEN_INT ((val >> i) & 0xffff)));
948 return;
949 }
950 }
951 gcc_unreachable ();
952 }
953
954 if (zero_match == 2)
955 goto simple_sequence;
956
957 mask = 0x0ffff0000UL;
958 for (i = 16; i < 64; i += 16, mask <<= 16)
959 {
960 HOST_WIDE_INT comp = mask & ~(mask - 1);
961
962 if (aarch64_uimm12_shift (val - (val & mask)))
963 {
964 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
965
966 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
967 emit_insn (gen_adddi3 (dest, subtarget,
968 GEN_INT (val - (val & mask))));
969 return;
970 }
971 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
972 {
973 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
974
975 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
976 GEN_INT ((val + comp) & mask)));
977 emit_insn (gen_adddi3 (dest, subtarget,
978 GEN_INT (val - ((val + comp) & mask))));
979 return;
980 }
981 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
982 {
983 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
984
985 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
986 GEN_INT ((val - comp) | ~mask)));
987 emit_insn (gen_adddi3 (dest, subtarget,
988 GEN_INT (val - ((val - comp) | ~mask))));
989 return;
990 }
991 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
992 {
993 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
994
995 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
996 GEN_INT (val | ~mask)));
997 emit_insn (gen_adddi3 (dest, subtarget,
998 GEN_INT (val - (val | ~mask))));
999 return;
1000 }
1001 }
1002
1003 /* See if we can do it by arithmetically combining two
1004 immediates. */
1005 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1006 {
1007 int j;
1008 mask = 0xffff;
1009
1010 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1011 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1012 {
1013 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1014 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1015 GEN_INT (aarch64_bitmasks[i])));
1016 emit_insn (gen_adddi3 (dest, subtarget,
1017 GEN_INT (val - aarch64_bitmasks[i])));
1018 return;
1019 }
1020
1021 for (j = 0; j < 64; j += 16, mask <<= 16)
1022 {
1023 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1024 {
1025 emit_insn (gen_rtx_SET (VOIDmode, dest,
1026 GEN_INT (aarch64_bitmasks[i])));
1027 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1028 GEN_INT ((val >> j) & 0xffff)));
1029 return;
1030 }
1031 }
1032 }
1033
1034 /* See if we can do it by logically combining two immediates. */
1035 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1036 {
1037 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1038 {
1039 int j;
1040
1041 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1042 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1043 {
1044 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1045 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1046 GEN_INT (aarch64_bitmasks[i])));
1047 emit_insn (gen_iordi3 (dest, subtarget,
1048 GEN_INT (aarch64_bitmasks[j])));
1049 return;
1050 }
1051 }
1052 else if ((val & aarch64_bitmasks[i]) == val)
1053 {
1054 int j;
1055
1056 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1057 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1058 {
1059
1060 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1061 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1062 GEN_INT (aarch64_bitmasks[j])));
1063 emit_insn (gen_anddi3 (dest, subtarget,
1064 GEN_INT (aarch64_bitmasks[i])));
1065 return;
1066 }
1067 }
1068 }
1069
1070 simple_sequence:
1071 first = true;
1072 mask = 0xffff;
1073 for (i = 0; i < 64; i += 16, mask <<= 16)
1074 {
1075 if ((val & mask) != 0)
1076 {
1077 if (first)
1078 {
1079 emit_insn (gen_rtx_SET (VOIDmode, dest,
1080 GEN_INT (val & mask)));
1081 first = false;
1082 }
1083 else
1084 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1085 GEN_INT ((val >> i) & 0xffff)));
1086 }
1087 }
1088}
1089
1090static bool
1091aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1092{
1093 /* Indirect calls are not currently supported. */
1094 if (decl == NULL)
1095 return false;
1096
1097 /* Cannot tail-call to long-calls, since these are outside of the
1098 range of a branch instruction (we could handle this if we added
1099 support for indirect tail-calls. */
1100 if (aarch64_decl_is_long_call_p (decl))
1101 return false;
1102
1103 return true;
1104}
1105
1106/* Implement TARGET_PASS_BY_REFERENCE. */
1107
1108static bool
1109aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1110 enum machine_mode mode,
1111 const_tree type,
1112 bool named ATTRIBUTE_UNUSED)
1113{
1114 HOST_WIDE_INT size;
1115 enum machine_mode dummymode;
1116 int nregs;
1117
1118 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1119 size = (mode == BLKmode && type)
1120 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1121
1122 if (type)
1123 {
1124 /* Arrays always passed by reference. */
1125 if (TREE_CODE (type) == ARRAY_TYPE)
1126 return true;
1127 /* Other aggregates based on their size. */
1128 if (AGGREGATE_TYPE_P (type))
1129 size = int_size_in_bytes (type);
1130 }
1131
1132 /* Variable sized arguments are always returned by reference. */
1133 if (size < 0)
1134 return true;
1135
1136 /* Can this be a candidate to be passed in fp/simd register(s)? */
1137 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1138 &dummymode, &nregs,
1139 NULL))
1140 return false;
1141
1142 /* Arguments which are variable sized or larger than 2 registers are
1143 passed by reference unless they are a homogenous floating point
1144 aggregate. */
1145 return size > 2 * UNITS_PER_WORD;
1146}
1147
1148/* Return TRUE if VALTYPE is padded to its least significant bits. */
1149static bool
1150aarch64_return_in_msb (const_tree valtype)
1151{
1152 enum machine_mode dummy_mode;
1153 int dummy_int;
1154
1155 /* Never happens in little-endian mode. */
1156 if (!BYTES_BIG_ENDIAN)
1157 return false;
1158
1159 /* Only composite types smaller than or equal to 16 bytes can
1160 be potentially returned in registers. */
1161 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1162 || int_size_in_bytes (valtype) <= 0
1163 || int_size_in_bytes (valtype) > 16)
1164 return false;
1165
1166 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1167 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1168 is always passed/returned in the least significant bits of fp/simd
1169 register(s). */
1170 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1171 &dummy_mode, &dummy_int, NULL))
1172 return false;
1173
1174 return true;
1175}
1176
1177/* Implement TARGET_FUNCTION_VALUE.
1178 Define how to find the value returned by a function. */
1179
1180static rtx
1181aarch64_function_value (const_tree type, const_tree func,
1182 bool outgoing ATTRIBUTE_UNUSED)
1183{
1184 enum machine_mode mode;
1185 int unsignedp;
1186 int count;
1187 enum machine_mode ag_mode;
1188
1189 mode = TYPE_MODE (type);
1190 if (INTEGRAL_TYPE_P (type))
1191 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1192
1193 if (aarch64_return_in_msb (type))
1194 {
1195 HOST_WIDE_INT size = int_size_in_bytes (type);
1196
1197 if (size % UNITS_PER_WORD != 0)
1198 {
1199 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1200 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1201 }
1202 }
1203
1204 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1205 &ag_mode, &count, NULL))
1206 {
1207 if (!aarch64_composite_type_p (type, mode))
1208 {
1209 gcc_assert (count == 1 && mode == ag_mode);
1210 return gen_rtx_REG (mode, V0_REGNUM);
1211 }
1212 else
1213 {
1214 int i;
1215 rtx par;
1216
1217 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1218 for (i = 0; i < count; i++)
1219 {
1220 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1221 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1222 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1223 XVECEXP (par, 0, i) = tmp;
1224 }
1225 return par;
1226 }
1227 }
1228 else
1229 return gen_rtx_REG (mode, R0_REGNUM);
1230}
1231
1232/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1233 Return true if REGNO is the number of a hard register in which the values
1234 of called function may come back. */
1235
1236static bool
1237aarch64_function_value_regno_p (const unsigned int regno)
1238{
1239 /* Maximum of 16 bytes can be returned in the general registers. Examples
1240 of 16-byte return values are: 128-bit integers and 16-byte small
1241 structures (excluding homogeneous floating-point aggregates). */
1242 if (regno == R0_REGNUM || regno == R1_REGNUM)
1243 return true;
1244
1245 /* Up to four fp/simd registers can return a function value, e.g. a
1246 homogeneous floating-point aggregate having four members. */
1247 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1248 return !TARGET_GENERAL_REGS_ONLY;
1249
1250 return false;
1251}
1252
1253/* Implement TARGET_RETURN_IN_MEMORY.
1254
1255 If the type T of the result of a function is such that
1256 void func (T arg)
1257 would require that arg be passed as a value in a register (or set of
1258 registers) according to the parameter passing rules, then the result
1259 is returned in the same registers as would be used for such an
1260 argument. */
1261
1262static bool
1263aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1264{
1265 HOST_WIDE_INT size;
1266 enum machine_mode ag_mode;
1267 int count;
1268
1269 if (!AGGREGATE_TYPE_P (type)
1270 && TREE_CODE (type) != COMPLEX_TYPE
1271 && TREE_CODE (type) != VECTOR_TYPE)
1272 /* Simple scalar types always returned in registers. */
1273 return false;
1274
1275 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1276 type,
1277 &ag_mode,
1278 &count,
1279 NULL))
1280 return false;
1281
1282 /* Types larger than 2 registers returned in memory. */
1283 size = int_size_in_bytes (type);
1284 return (size < 0 || size > 2 * UNITS_PER_WORD);
1285}
1286
1287static bool
1288aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1289 const_tree type, int *nregs)
1290{
1291 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1292 return aarch64_vfp_is_call_or_return_candidate (mode,
1293 type,
1294 &pcum->aapcs_vfp_rmode,
1295 nregs,
1296 NULL);
1297}
1298
1299/* Given MODE and TYPE of a function argument, return the alignment in
1300 bits. The idea is to suppress any stronger alignment requested by
1301 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1302 This is a helper function for local use only. */
1303
1304static unsigned int
1305aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1306{
1307 unsigned int alignment;
1308
1309 if (type)
1310 {
1311 if (!integer_zerop (TYPE_SIZE (type)))
1312 {
1313 if (TYPE_MODE (type) == mode)
1314 alignment = TYPE_ALIGN (type);
1315 else
1316 alignment = GET_MODE_ALIGNMENT (mode);
1317 }
1318 else
1319 alignment = 0;
1320 }
1321 else
1322 alignment = GET_MODE_ALIGNMENT (mode);
1323
1324 return alignment;
1325}
1326
1327/* Layout a function argument according to the AAPCS64 rules. The rule
1328 numbers refer to the rule numbers in the AAPCS64. */
1329
1330static void
1331aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1332 const_tree type,
1333 bool named ATTRIBUTE_UNUSED)
1334{
1335 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1336 int ncrn, nvrn, nregs;
1337 bool allocate_ncrn, allocate_nvrn;
1338
1339 /* We need to do this once per argument. */
1340 if (pcum->aapcs_arg_processed)
1341 return;
1342
1343 pcum->aapcs_arg_processed = true;
1344
1345 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1346 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1347 mode,
1348 type,
1349 &nregs);
1350
1351 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1352 The following code thus handles passing by SIMD/FP registers first. */
1353
1354 nvrn = pcum->aapcs_nvrn;
1355
1356 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1357 and homogenous short-vector aggregates (HVA). */
1358 if (allocate_nvrn)
1359 {
1360 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1361 {
1362 pcum->aapcs_nextnvrn = nvrn + nregs;
1363 if (!aarch64_composite_type_p (type, mode))
1364 {
1365 gcc_assert (nregs == 1);
1366 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1367 }
1368 else
1369 {
1370 rtx par;
1371 int i;
1372 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1373 for (i = 0; i < nregs; i++)
1374 {
1375 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1376 V0_REGNUM + nvrn + i);
1377 tmp = gen_rtx_EXPR_LIST
1378 (VOIDmode, tmp,
1379 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1380 XVECEXP (par, 0, i) = tmp;
1381 }
1382 pcum->aapcs_reg = par;
1383 }
1384 return;
1385 }
1386 else
1387 {
1388 /* C.3 NSRN is set to 8. */
1389 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1390 goto on_stack;
1391 }
1392 }
1393
1394 ncrn = pcum->aapcs_ncrn;
1395 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1396 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1397
1398
1399 /* C6 - C9. though the sign and zero extension semantics are
1400 handled elsewhere. This is the case where the argument fits
1401 entirely general registers. */
1402 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1403 {
1404 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1405
1406 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1407
1408 /* C.8 if the argument has an alignment of 16 then the NGRN is
1409 rounded up to the next even number. */
1410 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1411 {
1412 ++ncrn;
1413 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1414 }
1415 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1416 A reg is still generated for it, but the caller should be smart
1417 enough not to use it. */
1418 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1419 {
1420 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1421 }
1422 else
1423 {
1424 rtx par;
1425 int i;
1426
1427 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1428 for (i = 0; i < nregs; i++)
1429 {
1430 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1431 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1432 GEN_INT (i * UNITS_PER_WORD));
1433 XVECEXP (par, 0, i) = tmp;
1434 }
1435 pcum->aapcs_reg = par;
1436 }
1437
1438 pcum->aapcs_nextncrn = ncrn + nregs;
1439 return;
1440 }
1441
1442 /* C.11 */
1443 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1444
1445 /* The argument is passed on stack; record the needed number of words for
1446 this argument (we can re-use NREGS) and align the total size if
1447 necessary. */
1448on_stack:
1449 pcum->aapcs_stack_words = nregs;
1450 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1451 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1452 16 / UNITS_PER_WORD) + 1;
1453 return;
1454}
1455
1456/* Implement TARGET_FUNCTION_ARG. */
1457
1458static rtx
1459aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1460 const_tree type, bool named)
1461{
1462 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1463 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1464
1465 if (mode == VOIDmode)
1466 return NULL_RTX;
1467
1468 aarch64_layout_arg (pcum_v, mode, type, named);
1469 return pcum->aapcs_reg;
1470}
1471
1472void
1473aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1474 const_tree fntype ATTRIBUTE_UNUSED,
1475 rtx libname ATTRIBUTE_UNUSED,
1476 const_tree fndecl ATTRIBUTE_UNUSED,
1477 unsigned n_named ATTRIBUTE_UNUSED)
1478{
1479 pcum->aapcs_ncrn = 0;
1480 pcum->aapcs_nvrn = 0;
1481 pcum->aapcs_nextncrn = 0;
1482 pcum->aapcs_nextnvrn = 0;
1483 pcum->pcs_variant = ARM_PCS_AAPCS64;
1484 pcum->aapcs_reg = NULL_RTX;
1485 pcum->aapcs_arg_processed = false;
1486 pcum->aapcs_stack_words = 0;
1487 pcum->aapcs_stack_size = 0;
1488
1489 return;
1490}
1491
1492static void
1493aarch64_function_arg_advance (cumulative_args_t pcum_v,
1494 enum machine_mode mode,
1495 const_tree type,
1496 bool named)
1497{
1498 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1499 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1500 {
1501 aarch64_layout_arg (pcum_v, mode, type, named);
1502 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1503 != (pcum->aapcs_stack_words != 0));
1504 pcum->aapcs_arg_processed = false;
1505 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1506 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1507 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1508 pcum->aapcs_stack_words = 0;
1509 pcum->aapcs_reg = NULL_RTX;
1510 }
1511}
1512
1513bool
1514aarch64_function_arg_regno_p (unsigned regno)
1515{
1516 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1517 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1518}
1519
1520/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1521 PARM_BOUNDARY bits of alignment, but will be given anything up
1522 to STACK_BOUNDARY bits if the type requires it. This makes sure
1523 that both before and after the layout of each argument, the Next
1524 Stacked Argument Address (NSAA) will have a minimum alignment of
1525 8 bytes. */
1526
1527static unsigned int
1528aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1529{
1530 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1531
1532 if (alignment < PARM_BOUNDARY)
1533 alignment = PARM_BOUNDARY;
1534 if (alignment > STACK_BOUNDARY)
1535 alignment = STACK_BOUNDARY;
1536 return alignment;
1537}
1538
1539/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1540
1541 Return true if an argument passed on the stack should be padded upwards,
1542 i.e. if the least-significant byte of the stack slot has useful data.
1543
1544 Small aggregate types are placed in the lowest memory address.
1545
1546 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1547
1548bool
1549aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1550{
1551 /* On little-endian targets, the least significant byte of every stack
1552 argument is passed at the lowest byte address of the stack slot. */
1553 if (!BYTES_BIG_ENDIAN)
1554 return true;
1555
1556 /* Otherwise, integral types and floating point types are padded downward:
1557 the least significant byte of a stack argument is passed at the highest
1558 byte address of the stack slot. */
1559 if (type
1560 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1561 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1562 return false;
1563
1564 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1565 return true;
1566}
1567
1568/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1569
1570 It specifies padding for the last (may also be the only)
1571 element of a block move between registers and memory. If
1572 assuming the block is in the memory, padding upward means that
1573 the last element is padded after its highest significant byte,
1574 while in downward padding, the last element is padded at the
1575 its least significant byte side.
1576
1577 Small aggregates and small complex types are always padded
1578 upwards.
1579
1580 We don't need to worry about homogeneous floating-point or
1581 short-vector aggregates; their move is not affected by the
1582 padding direction determined here. Regardless of endianness,
1583 each element of such an aggregate is put in the least
1584 significant bits of a fp/simd register.
1585
1586 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1587 register has useful data, and return the opposite if the most
1588 significant byte does. */
1589
1590bool
1591aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1592 bool first ATTRIBUTE_UNUSED)
1593{
1594
1595 /* Small composite types are always padded upward. */
1596 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1597 {
1598 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1599 : GET_MODE_SIZE (mode));
1600 if (size < 2 * UNITS_PER_WORD)
1601 return true;
1602 }
1603
1604 /* Otherwise, use the default padding. */
1605 return !BYTES_BIG_ENDIAN;
1606}
1607
1608static enum machine_mode
1609aarch64_libgcc_cmp_return_mode (void)
1610{
1611 return SImode;
1612}
1613
1614static bool
1615aarch64_frame_pointer_required (void)
1616{
1617 /* If the function contains dynamic stack allocations, we need to
1618 use the frame pointer to access the static parts of the frame. */
1619 if (cfun->calls_alloca)
1620 return true;
1621
1622 /* We may have turned flag_omit_frame_pointer on in order to have this
1623 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1624 and we'll check it here.
1625 If we really did set flag_omit_frame_pointer normally, then we return false
1626 (no frame pointer required) in all cases. */
1627
1628 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1629 return false;
1630 else if (flag_omit_leaf_frame_pointer)
1631 return !crtl->is_leaf;
1632 return true;
1633}
1634
1635/* Mark the registers that need to be saved by the callee and calculate
1636 the size of the callee-saved registers area and frame record (both FP
1637 and LR may be omitted). */
1638static void
1639aarch64_layout_frame (void)
1640{
1641 HOST_WIDE_INT offset = 0;
1642 int regno;
1643
1644 if (reload_completed && cfun->machine->frame.laid_out)
1645 return;
1646
1647 cfun->machine->frame.fp_lr_offset = 0;
1648
1649 /* First mark all the registers that really need to be saved... */
1650 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1651 cfun->machine->frame.reg_offset[regno] = -1;
1652
1653 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1654 cfun->machine->frame.reg_offset[regno] = -1;
1655
1656 /* ... that includes the eh data registers (if needed)... */
1657 if (crtl->calls_eh_return)
1658 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1659 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1660
1661 /* ... and any callee saved register that dataflow says is live. */
1662 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1663 if (df_regs_ever_live_p (regno)
1664 && !call_used_regs[regno])
1665 cfun->machine->frame.reg_offset[regno] = 0;
1666
1667 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1668 if (df_regs_ever_live_p (regno)
1669 && !call_used_regs[regno])
1670 cfun->machine->frame.reg_offset[regno] = 0;
1671
1672 if (frame_pointer_needed)
1673 {
1674 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1675 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1676 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1677 }
1678
1679 /* Now assign stack slots for them. */
1680 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1681 if (cfun->machine->frame.reg_offset[regno] != -1)
1682 {
1683 cfun->machine->frame.reg_offset[regno] = offset;
1684 offset += UNITS_PER_WORD;
1685 }
1686
1687 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1688 if (cfun->machine->frame.reg_offset[regno] != -1)
1689 {
1690 cfun->machine->frame.reg_offset[regno] = offset;
1691 offset += UNITS_PER_WORD;
1692 }
1693
1694 if (frame_pointer_needed)
1695 {
1696 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1697 offset += UNITS_PER_WORD;
1698 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1699 }
1700
1701 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1702 {
1703 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1704 offset += UNITS_PER_WORD;
1705 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1706 }
1707
1708 cfun->machine->frame.padding0 =
1709 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1710 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1711
1712 cfun->machine->frame.saved_regs_size = offset;
1713 cfun->machine->frame.laid_out = true;
1714}
1715
1716/* Make the last instruction frame-related and note that it performs
1717 the operation described by FRAME_PATTERN. */
1718
1719static void
1720aarch64_set_frame_expr (rtx frame_pattern)
1721{
1722 rtx insn;
1723
1724 insn = get_last_insn ();
1725 RTX_FRAME_RELATED_P (insn) = 1;
1726 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1727 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1728 frame_pattern,
1729 REG_NOTES (insn));
1730}
1731
1732static bool
1733aarch64_register_saved_on_entry (int regno)
1734{
1735 return cfun->machine->frame.reg_offset[regno] != -1;
1736}
1737
1738
1739static void
1740aarch64_save_or_restore_fprs (int start_offset, int increment,
1741 bool restore, rtx base_rtx)
1742
1743{
1744 unsigned regno;
1745 unsigned regno2;
1746 rtx insn;
1747 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1748
1749
1750 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1751 {
1752 if (aarch64_register_saved_on_entry (regno))
1753 {
1754 rtx mem;
1755 mem = gen_mem_ref (DFmode,
1756 plus_constant (Pmode,
1757 base_rtx,
1758 start_offset));
1759
1760 for (regno2 = regno + 1;
1761 regno2 <= V31_REGNUM
1762 && !aarch64_register_saved_on_entry (regno2);
1763 regno2++)
1764 {
1765 /* Empty loop. */
1766 }
1767 if (regno2 <= V31_REGNUM &&
1768 aarch64_register_saved_on_entry (regno2))
1769 {
1770 rtx mem2;
1771 /* Next highest register to be saved. */
1772 mem2 = gen_mem_ref (DFmode,
1773 plus_constant
1774 (Pmode,
1775 base_rtx,
1776 start_offset + increment));
1777 if (restore == false)
1778 {
1779 insn = emit_insn
1780 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1781 mem2, gen_rtx_REG (DFmode, regno2)));
1782
1783 }
1784 else
1785 {
1786 insn = emit_insn
1787 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1788 gen_rtx_REG (DFmode, regno2), mem2));
1789
1790 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1791 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1792 }
1793
1794 /* The first part of a frame-related parallel insn
1795 is always assumed to be relevant to the frame
1796 calculations; subsequent parts, are only
1797 frame-related if explicitly marked. */
1798 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1799 1)) = 1;
1800 regno = regno2;
1801 start_offset += increment * 2;
1802 }
1803 else
1804 {
1805 if (restore == false)
1806 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1807 else
1808 {
1809 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1810 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1811 }
1812 start_offset += increment;
1813 }
1814 RTX_FRAME_RELATED_P (insn) = 1;
1815 }
1816 }
1817
1818}
1819
1820
1821/* offset from the stack pointer of where the saves and
1822 restore's have to happen. */
1823static void
1824aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1825 bool restore)
1826{
1827 rtx insn;
1828 rtx base_rtx = stack_pointer_rtx;
1829 HOST_WIDE_INT start_offset = offset;
1830 HOST_WIDE_INT increment = UNITS_PER_WORD;
1831 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1832 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1833 unsigned regno;
1834 unsigned regno2;
1835
1836 for (regno = R0_REGNUM; regno <= limit; regno++)
1837 {
1838 if (aarch64_register_saved_on_entry (regno))
1839 {
1840 rtx mem;
1841 mem = gen_mem_ref (Pmode,
1842 plus_constant (Pmode,
1843 base_rtx,
1844 start_offset));
1845
1846 for (regno2 = regno + 1;
1847 regno2 <= limit
1848 && !aarch64_register_saved_on_entry (regno2);
1849 regno2++)
1850 {
1851 /* Empty loop. */
1852 }
1853 if (regno2 <= limit &&
1854 aarch64_register_saved_on_entry (regno2))
1855 {
1856 rtx mem2;
1857 /* Next highest register to be saved. */
1858 mem2 = gen_mem_ref (Pmode,
1859 plus_constant
1860 (Pmode,
1861 base_rtx,
1862 start_offset + increment));
1863 if (restore == false)
1864 {
1865 insn = emit_insn
1866 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1867 mem2, gen_rtx_REG (DImode, regno2)));
1868
1869 }
1870 else
1871 {
1872 insn = emit_insn
1873 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1874 gen_rtx_REG (DImode, regno2), mem2));
1875
1876 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1877 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1878 }
1879
1880 /* The first part of a frame-related parallel insn
1881 is always assumed to be relevant to the frame
1882 calculations; subsequent parts, are only
1883 frame-related if explicitly marked. */
1884 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1885 1)) = 1;
1886 regno = regno2;
1887 start_offset += increment * 2;
1888 }
1889 else
1890 {
1891 if (restore == false)
1892 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1893 else
1894 {
1895 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1896 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1897 }
1898 start_offset += increment;
1899 }
1900 RTX_FRAME_RELATED_P (insn) = 1;
1901 }
1902 }
1903
1904 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1905
1906}
1907
1908/* AArch64 stack frames generated by this compiler look like:
1909
1910 +-------------------------------+
1911 | |
1912 | incoming stack arguments |
1913 | |
1914 +-------------------------------+ <-- arg_pointer_rtx
1915 | |
1916 | callee-allocated save area |
1917 | for register varargs |
1918 | |
1919 +-------------------------------+
1920 | |
1921 | local variables |
1922 | |
1923 +-------------------------------+ <-- frame_pointer_rtx
1924 | |
1925 | callee-saved registers |
1926 | |
1927 +-------------------------------+
1928 | LR' |
1929 +-------------------------------+
1930 | FP' |
1931 P +-------------------------------+ <-- hard_frame_pointer_rtx
1932 | dynamic allocation |
1933 +-------------------------------+
1934 | |
1935 | outgoing stack arguments |
1936 | |
1937 +-------------------------------+ <-- stack_pointer_rtx
1938
1939 Dynamic stack allocations such as alloca insert data at point P.
1940 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1941 hard_frame_pointer_rtx unchanged. */
1942
1943/* Generate the prologue instructions for entry into a function.
1944 Establish the stack frame by decreasing the stack pointer with a
1945 properly calculated size and, if necessary, create a frame record
1946 filled with the values of LR and previous frame pointer. The
1947 current FP is also set up is it is in use. */
1948
1949void
1950aarch64_expand_prologue (void)
1951{
1952 /* sub sp, sp, #<frame_size>
1953 stp {fp, lr}, [sp, #<frame_size> - 16]
1954 add fp, sp, #<frame_size> - hardfp_offset
1955 stp {cs_reg}, [fp, #-16] etc.
1956
1957 sub sp, sp, <final_adjustment_if_any>
1958 */
1959 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1960 HOST_WIDE_INT frame_size, offset;
1961 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1962 rtx insn;
1963
1964 aarch64_layout_frame ();
1965 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1966 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1967 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1968 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1969 + crtl->outgoing_args_size);
1970 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1971 STACK_BOUNDARY / BITS_PER_UNIT);
1972
1973 if (flag_stack_usage_info)
1974 current_function_static_stack_size = frame_size;
1975
1976 fp_offset = (offset
1977 - original_frame_size
1978 - cfun->machine->frame.saved_regs_size);
1979
44c0e7b9 1980 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
1981 if (offset >= 512)
1982 {
1983 /* When the frame has a large size, an initial decrease is done on
1984 the stack pointer to jump over the callee-allocated save area for
1985 register varargs, the local variable area and/or the callee-saved
1986 register area. This will allow the pre-index write-back
1987 store pair instructions to be used for setting up the stack frame
1988 efficiently. */
1989 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1990 if (offset >= 512)
1991 offset = cfun->machine->frame.saved_regs_size;
1992
1993 frame_size -= (offset + crtl->outgoing_args_size);
1994 fp_offset = 0;
1995
1996 if (frame_size >= 0x1000000)
1997 {
1998 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1999 emit_move_insn (op0, GEN_INT (-frame_size));
2000 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2001 aarch64_set_frame_expr (gen_rtx_SET
2002 (Pmode, stack_pointer_rtx,
2003 gen_rtx_PLUS (Pmode,
2004 stack_pointer_rtx,
2005 GEN_INT (-frame_size))));
2006 }
2007 else if (frame_size > 0)
2008 {
2009 if ((frame_size & 0xfff) != frame_size)
2010 {
2011 insn = emit_insn (gen_add2_insn
2012 (stack_pointer_rtx,
2013 GEN_INT (-(frame_size
2014 & ~(HOST_WIDE_INT)0xfff))));
2015 RTX_FRAME_RELATED_P (insn) = 1;
2016 }
2017 if ((frame_size & 0xfff) != 0)
2018 {
2019 insn = emit_insn (gen_add2_insn
2020 (stack_pointer_rtx,
2021 GEN_INT (-(frame_size
2022 & (HOST_WIDE_INT)0xfff))));
2023 RTX_FRAME_RELATED_P (insn) = 1;
2024 }
2025 }
2026 }
2027 else
2028 frame_size = -1;
2029
2030 if (offset > 0)
2031 {
2032 /* Save the frame pointer and lr if the frame pointer is needed
2033 first. Make the frame pointer point to the location of the
2034 old frame pointer on the stack. */
2035 if (frame_pointer_needed)
2036 {
2037 rtx mem_fp, mem_lr;
2038
2039 if (fp_offset)
2040 {
2041 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2042 GEN_INT (-offset)));
2043 RTX_FRAME_RELATED_P (insn) = 1;
2044 aarch64_set_frame_expr (gen_rtx_SET
2045 (Pmode, stack_pointer_rtx,
2046 gen_rtx_MINUS (Pmode,
2047 stack_pointer_rtx,
2048 GEN_INT (offset))));
2049 mem_fp = gen_frame_mem (DImode,
2050 plus_constant (Pmode,
2051 stack_pointer_rtx,
2052 fp_offset));
2053 mem_lr = gen_frame_mem (DImode,
2054 plus_constant (Pmode,
2055 stack_pointer_rtx,
2056 fp_offset
2057 + UNITS_PER_WORD));
2058 insn = emit_insn (gen_store_pairdi (mem_fp,
2059 hard_frame_pointer_rtx,
2060 mem_lr,
2061 gen_rtx_REG (DImode,
2062 LR_REGNUM)));
2063 }
2064 else
2065 {
2066 insn = emit_insn (gen_storewb_pairdi_di
2067 (stack_pointer_rtx, stack_pointer_rtx,
2068 hard_frame_pointer_rtx,
2069 gen_rtx_REG (DImode, LR_REGNUM),
2070 GEN_INT (-offset),
2071 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2072 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2073 }
2074
2075 /* The first part of a frame-related parallel insn is always
2076 assumed to be relevant to the frame calculations;
2077 subsequent parts, are only frame-related if explicitly
2078 marked. */
2079 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2080 RTX_FRAME_RELATED_P (insn) = 1;
2081
2082 /* Set up frame pointer to point to the location of the
2083 previous frame pointer on the stack. */
2084 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2085 stack_pointer_rtx,
2086 GEN_INT (fp_offset)));
2087 aarch64_set_frame_expr (gen_rtx_SET
2088 (Pmode, hard_frame_pointer_rtx,
2089 gen_rtx_PLUS (Pmode,
2090 stack_pointer_rtx,
2091 GEN_INT (fp_offset))));
2092 RTX_FRAME_RELATED_P (insn) = 1;
2093 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2094 hard_frame_pointer_rtx));
2095 }
2096 else
2097 {
2098 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2099 GEN_INT (-offset)));
2100 RTX_FRAME_RELATED_P (insn) = 1;
2101 }
2102
2103 aarch64_save_or_restore_callee_save_registers
2104 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2105 }
2106
2107 /* when offset >= 512,
2108 sub sp, sp, #<outgoing_args_size> */
2109 if (frame_size > -1)
2110 {
2111 if (crtl->outgoing_args_size > 0)
2112 {
2113 insn = emit_insn (gen_add2_insn
2114 (stack_pointer_rtx,
2115 GEN_INT (- crtl->outgoing_args_size)));
2116 RTX_FRAME_RELATED_P (insn) = 1;
2117 }
2118 }
2119}
2120
2121/* Generate the epilogue instructions for returning from a function. */
2122void
2123aarch64_expand_epilogue (bool for_sibcall)
2124{
2125 HOST_WIDE_INT original_frame_size, frame_size, offset;
2126 HOST_WIDE_INT fp_offset;
2127 rtx insn;
44c0e7b9 2128 rtx cfa_reg;
43e9d192
IB
2129
2130 aarch64_layout_frame ();
2131 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2132 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2133 + crtl->outgoing_args_size);
2134 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2135 STACK_BOUNDARY / BITS_PER_UNIT);
2136
2137 fp_offset = (offset
2138 - original_frame_size
2139 - cfun->machine->frame.saved_regs_size);
2140
44c0e7b9
YZ
2141 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2142
2143 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2144 if (offset >= 512)
2145 {
2146 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2147 if (offset >= 512)
2148 offset = cfun->machine->frame.saved_regs_size;
2149
2150 frame_size -= (offset + crtl->outgoing_args_size);
2151 fp_offset = 0;
2152 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2153 {
2154 insn = emit_insn (gen_add2_insn
2155 (stack_pointer_rtx,
2156 GEN_INT (crtl->outgoing_args_size)));
2157 RTX_FRAME_RELATED_P (insn) = 1;
2158 }
2159 }
2160 else
2161 frame_size = -1;
2162
2163 /* If there were outgoing arguments or we've done dynamic stack
2164 allocation, then restore the stack pointer from the frame
2165 pointer. This is at most one insn and more efficient than using
2166 GCC's internal mechanism. */
2167 if (frame_pointer_needed
2168 && (crtl->outgoing_args_size || cfun->calls_alloca))
2169 {
2170 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2171 hard_frame_pointer_rtx,
2172 GEN_INT (- fp_offset)));
2173 RTX_FRAME_RELATED_P (insn) = 1;
44c0e7b9
YZ
2174 /* As SP is set to (FP - fp_offset), according to the rules in
2175 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2176 from the value of SP from now on. */
2177 cfa_reg = stack_pointer_rtx;
43e9d192
IB
2178 }
2179
2180 aarch64_save_or_restore_callee_save_registers
2181 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2182
2183 /* Restore the frame pointer and lr if the frame pointer is needed. */
2184 if (offset > 0)
2185 {
2186 if (frame_pointer_needed)
2187 {
2188 rtx mem_fp, mem_lr;
2189
2190 if (fp_offset)
2191 {
2192 mem_fp = gen_frame_mem (DImode,
2193 plus_constant (Pmode,
2194 stack_pointer_rtx,
2195 fp_offset));
2196 mem_lr = gen_frame_mem (DImode,
2197 plus_constant (Pmode,
2198 stack_pointer_rtx,
2199 fp_offset
2200 + UNITS_PER_WORD));
2201 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2202 mem_fp,
2203 gen_rtx_REG (DImode,
2204 LR_REGNUM),
2205 mem_lr));
2206 }
2207 else
2208 {
2209 insn = emit_insn (gen_loadwb_pairdi_di
2210 (stack_pointer_rtx,
2211 stack_pointer_rtx,
2212 hard_frame_pointer_rtx,
2213 gen_rtx_REG (DImode, LR_REGNUM),
2214 GEN_INT (offset),
2215 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2216 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
44c0e7b9
YZ
2217 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2218 (gen_rtx_SET (Pmode, stack_pointer_rtx,
dc2d3c67
YZ
2219 plus_constant (Pmode, cfa_reg,
2220 offset))));
43e9d192
IB
2221 }
2222
2223 /* The first part of a frame-related parallel insn
2224 is always assumed to be relevant to the frame
2225 calculations; subsequent parts, are only
2226 frame-related if explicitly marked. */
2227 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2228 RTX_FRAME_RELATED_P (insn) = 1;
2229 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2230 add_reg_note (insn, REG_CFA_RESTORE,
2231 gen_rtx_REG (DImode, LR_REGNUM));
2232
2233 if (fp_offset)
2234 {
2235 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2236 GEN_INT (offset)));
2237 RTX_FRAME_RELATED_P (insn) = 1;
2238 }
2239 }
43e9d192
IB
2240 else
2241 {
2242 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2243 GEN_INT (offset)));
2244 RTX_FRAME_RELATED_P (insn) = 1;
2245 }
2246 }
2247
2248 /* Stack adjustment for exception handler. */
2249 if (crtl->calls_eh_return)
2250 {
2251 /* We need to unwind the stack by the offset computed by
2252 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2253 based on SP. Ideally we would update the SP and define the
2254 CFA along the lines of:
2255
2256 SP = SP + EH_RETURN_STACKADJ_RTX
2257 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2258
2259 However the dwarf emitter only understands a constant
2260 register offset.
2261
2262 The solution choosen here is to use the otherwise unused IP0
2263 as a temporary register to hold the current SP value. The
2264 CFA is described using IP0 then SP is modified. */
2265
2266 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2267
2268 insn = emit_move_insn (ip0, stack_pointer_rtx);
2269 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2270 RTX_FRAME_RELATED_P (insn) = 1;
2271
2272 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2273
2274 /* Ensure the assignment to IP0 does not get optimized away. */
2275 emit_use (ip0);
2276 }
2277
2278 if (frame_size > -1)
2279 {
2280 if (frame_size >= 0x1000000)
2281 {
2282 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2283 emit_move_insn (op0, GEN_INT (frame_size));
2284 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2285 aarch64_set_frame_expr (gen_rtx_SET
2286 (Pmode, stack_pointer_rtx,
2287 gen_rtx_PLUS (Pmode,
2288 stack_pointer_rtx,
2289 GEN_INT (frame_size))));
2290 }
2291 else if (frame_size > 0)
2292 {
2293 if ((frame_size & 0xfff) != 0)
2294 {
2295 insn = emit_insn (gen_add2_insn
2296 (stack_pointer_rtx,
2297 GEN_INT ((frame_size
2298 & (HOST_WIDE_INT) 0xfff))));
2299 RTX_FRAME_RELATED_P (insn) = 1;
2300 }
2301 if ((frame_size & 0xfff) != frame_size)
2302 {
2303 insn = emit_insn (gen_add2_insn
2304 (stack_pointer_rtx,
2305 GEN_INT ((frame_size
2306 & ~ (HOST_WIDE_INT) 0xfff))));
2307 RTX_FRAME_RELATED_P (insn) = 1;
2308 }
2309 }
2310
2311 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2312 gen_rtx_PLUS (Pmode,
2313 stack_pointer_rtx,
2314 GEN_INT (offset))));
2315 }
2316
2317 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2318 if (!for_sibcall)
2319 emit_jump_insn (ret_rtx);
2320}
2321
2322/* Return the place to copy the exception unwinding return address to.
2323 This will probably be a stack slot, but could (in theory be the
2324 return register). */
2325rtx
2326aarch64_final_eh_return_addr (void)
2327{
2328 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2329 aarch64_layout_frame ();
2330 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2331 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2332 + crtl->outgoing_args_size);
2333 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2334 STACK_BOUNDARY / BITS_PER_UNIT);
2335 fp_offset = offset
2336 - original_frame_size
2337 - cfun->machine->frame.saved_regs_size;
2338
2339 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2340 return gen_rtx_REG (DImode, LR_REGNUM);
2341
2342 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2343 result in a store to save LR introduced by builtin_eh_return () being
2344 incorrectly deleted because the alias is not detected.
2345 So in the calculation of the address to copy the exception unwinding
2346 return address to, we note 2 cases.
2347 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2348 we return a SP-relative location since all the addresses are SP-relative
2349 in this case. This prevents the store from being optimized away.
2350 If the fp_offset is not 0, then the addresses will be FP-relative and
2351 therefore we return a FP-relative location. */
2352
2353 if (frame_pointer_needed)
2354 {
2355 if (fp_offset)
2356 return gen_frame_mem (DImode,
2357 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2358 else
2359 return gen_frame_mem (DImode,
2360 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2361 }
2362
2363 /* If FP is not needed, we calculate the location of LR, which would be
2364 at the top of the saved registers block. */
2365
2366 return gen_frame_mem (DImode,
2367 plus_constant (Pmode,
2368 stack_pointer_rtx,
2369 fp_offset
2370 + cfun->machine->frame.saved_regs_size
2371 - 2 * UNITS_PER_WORD));
2372}
2373
2374/* Output code to build up a constant in a register. */
2375static void
d9600ae5 2376aarch64_build_constant (int regnum, HOST_WIDE_INT val)
43e9d192
IB
2377{
2378 if (aarch64_bitmask_imm (val, DImode))
d9600ae5 2379 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
43e9d192
IB
2380 else
2381 {
2382 int i;
2383 int ncount = 0;
2384 int zcount = 0;
2385 HOST_WIDE_INT valp = val >> 16;
2386 HOST_WIDE_INT valm;
2387 HOST_WIDE_INT tval;
2388
2389 for (i = 16; i < 64; i += 16)
2390 {
2391 valm = (valp & 0xffff);
2392
2393 if (valm != 0)
2394 ++ zcount;
2395
2396 if (valm != 0xffff)
2397 ++ ncount;
2398
2399 valp >>= 16;
2400 }
2401
2402 /* zcount contains the number of additional MOVK instructions
2403 required if the constant is built up with an initial MOVZ instruction,
2404 while ncount is the number of MOVK instructions required if starting
2405 with a MOVN instruction. Choose the sequence that yields the fewest
2406 number of instructions, preferring MOVZ instructions when they are both
2407 the same. */
2408 if (ncount < zcount)
2409 {
d9600ae5
SN
2410 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2411 GEN_INT ((~val) & 0xffff));
43e9d192
IB
2412 tval = 0xffff;
2413 }
2414 else
2415 {
d9600ae5
SN
2416 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2417 GEN_INT (val & 0xffff));
43e9d192
IB
2418 tval = 0;
2419 }
2420
2421 val >>= 16;
2422
2423 for (i = 16; i < 64; i += 16)
2424 {
2425 if ((val & 0xffff) != tval)
d9600ae5
SN
2426 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2427 GEN_INT (i), GEN_INT (val & 0xffff)));
43e9d192
IB
2428 val >>= 16;
2429 }
2430 }
2431}
2432
2433static void
d9600ae5 2434aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2435{
2436 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2437 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2438 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2439
2440 if (mdelta < 0)
2441 mdelta = -mdelta;
2442
2443 if (mdelta >= 4096 * 4096)
2444 {
d9600ae5
SN
2445 aarch64_build_constant (scratchreg, delta);
2446 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2447 }
2448 else if (mdelta > 0)
2449 {
43e9d192 2450 if (mdelta >= 4096)
d9600ae5
SN
2451 {
2452 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2453 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2454 if (delta < 0)
2455 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2456 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2457 else
2458 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2459 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2460 }
43e9d192 2461 if (mdelta % 4096 != 0)
d9600ae5
SN
2462 {
2463 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2464 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2465 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2466 }
43e9d192
IB
2467 }
2468}
2469
2470/* Output code to add DELTA to the first argument, and then jump
2471 to FUNCTION. Used for C++ multiple inheritance. */
2472static void
2473aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2474 HOST_WIDE_INT delta,
2475 HOST_WIDE_INT vcall_offset,
2476 tree function)
2477{
2478 /* The this pointer is always in x0. Note that this differs from
2479 Arm where the this pointer maybe bumped to r1 if r0 is required
2480 to return a pointer to an aggregate. On AArch64 a result value
2481 pointer will be in x8. */
2482 int this_regno = R0_REGNUM;
75f1d6fc 2483 rtx this_rtx, temp0, temp1, addr, insn, funexp;
43e9d192 2484
75f1d6fc
SN
2485 reload_completed = 1;
2486 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2487
2488 if (vcall_offset == 0)
d9600ae5 2489 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2490 else
2491 {
2492 gcc_assert ((vcall_offset & 0x7) == 0);
2493
75f1d6fc
SN
2494 this_rtx = gen_rtx_REG (Pmode, this_regno);
2495 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2496 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2497
75f1d6fc
SN
2498 addr = this_rtx;
2499 if (delta != 0)
2500 {
2501 if (delta >= -256 && delta < 256)
2502 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2503 plus_constant (Pmode, this_rtx, delta));
2504 else
d9600ae5 2505 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2506 }
2507
75f1d6fc
SN
2508 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2509
43e9d192 2510 if (vcall_offset >= -256 && vcall_offset < 32768)
75f1d6fc 2511 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2512 else
2513 {
d9600ae5 2514 aarch64_build_constant (IP1_REGNUM, vcall_offset);
75f1d6fc 2515 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2516 }
2517
75f1d6fc
SN
2518 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2519 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2520 }
2521
75f1d6fc
SN
2522 /* Generate a tail call to the target function. */
2523 if (!TREE_USED (function))
2524 {
2525 assemble_external (function);
2526 TREE_USED (function) = 1;
2527 }
2528 funexp = XEXP (DECL_RTL (function), 0);
2529 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2530 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2531 SIBLING_CALL_P (insn) = 1;
2532
2533 insn = get_insns ();
2534 shorten_branches (insn);
2535 final_start_function (insn, file, 1);
2536 final (insn, file, 1);
43e9d192 2537 final_end_function ();
75f1d6fc
SN
2538
2539 /* Stop pretending to be a post-reload pass. */
2540 reload_completed = 0;
43e9d192
IB
2541}
2542
43e9d192
IB
2543static int
2544aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2545{
2546 if (GET_CODE (*x) == SYMBOL_REF)
2547 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2548
2549 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2550 TLS offsets, not real symbol references. */
2551 if (GET_CODE (*x) == UNSPEC
2552 && XINT (*x, 1) == UNSPEC_TLS)
2553 return -1;
2554
2555 return 0;
2556}
2557
2558static bool
2559aarch64_tls_referenced_p (rtx x)
2560{
2561 if (!TARGET_HAVE_TLS)
2562 return false;
2563
2564 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2565}
2566
2567
2568static int
2569aarch64_bitmasks_cmp (const void *i1, const void *i2)
2570{
2571 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2572 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2573
2574 if (*imm1 < *imm2)
2575 return -1;
2576 if (*imm1 > *imm2)
2577 return +1;
2578 return 0;
2579}
2580
2581
2582static void
2583aarch64_build_bitmask_table (void)
2584{
2585 unsigned HOST_WIDE_INT mask, imm;
2586 unsigned int log_e, e, s, r;
2587 unsigned int nimms = 0;
2588
2589 for (log_e = 1; log_e <= 6; log_e++)
2590 {
2591 e = 1 << log_e;
2592 if (e == 64)
2593 mask = ~(HOST_WIDE_INT) 0;
2594 else
2595 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2596 for (s = 1; s < e; s++)
2597 {
2598 for (r = 0; r < e; r++)
2599 {
2600 /* set s consecutive bits to 1 (s < 64) */
2601 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2602 /* rotate right by r */
2603 if (r != 0)
2604 imm = ((imm >> r) | (imm << (e - r))) & mask;
2605 /* replicate the constant depending on SIMD size */
2606 switch (log_e) {
2607 case 1: imm |= (imm << 2);
2608 case 2: imm |= (imm << 4);
2609 case 3: imm |= (imm << 8);
2610 case 4: imm |= (imm << 16);
2611 case 5: imm |= (imm << 32);
2612 case 6:
2613 break;
2614 default:
2615 gcc_unreachable ();
2616 }
2617 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2618 aarch64_bitmasks[nimms++] = imm;
2619 }
2620 }
2621 }
2622
2623 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2624 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2625 aarch64_bitmasks_cmp);
2626}
2627
2628
2629/* Return true if val can be encoded as a 12-bit unsigned immediate with
2630 a left shift of 0 or 12 bits. */
2631bool
2632aarch64_uimm12_shift (HOST_WIDE_INT val)
2633{
2634 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2635 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2636 );
2637}
2638
2639
2640/* Return true if val is an immediate that can be loaded into a
2641 register by a MOVZ instruction. */
2642static bool
2643aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2644{
2645 if (GET_MODE_SIZE (mode) > 4)
2646 {
2647 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2648 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2649 return 1;
2650 }
2651 else
2652 {
2653 /* Ignore sign extension. */
2654 val &= (HOST_WIDE_INT) 0xffffffff;
2655 }
2656 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2657 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2658}
2659
2660
2661/* Return true if val is a valid bitmask immediate. */
2662bool
2663aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2664{
2665 if (GET_MODE_SIZE (mode) < 8)
2666 {
2667 /* Replicate bit pattern. */
2668 val &= (HOST_WIDE_INT) 0xffffffff;
2669 val |= val << 32;
2670 }
2671 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2672 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2673}
2674
2675
2676/* Return true if val is an immediate that can be loaded into a
2677 register in a single instruction. */
2678bool
2679aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2680{
2681 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2682 return 1;
2683 return aarch64_bitmask_imm (val, mode);
2684}
2685
2686static bool
2687aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2688{
2689 rtx base, offset;
2690 if (GET_CODE (x) == HIGH)
2691 return true;
2692
2693 split_const (x, &base, &offset);
2694 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2695 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2696
2697 return aarch64_tls_referenced_p (x);
2698}
2699
2700/* Return true if register REGNO is a valid index register.
2701 STRICT_P is true if REG_OK_STRICT is in effect. */
2702
2703bool
2704aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2705{
2706 if (!HARD_REGISTER_NUM_P (regno))
2707 {
2708 if (!strict_p)
2709 return true;
2710
2711 if (!reg_renumber)
2712 return false;
2713
2714 regno = reg_renumber[regno];
2715 }
2716 return GP_REGNUM_P (regno);
2717}
2718
2719/* Return true if register REGNO is a valid base register for mode MODE.
2720 STRICT_P is true if REG_OK_STRICT is in effect. */
2721
2722bool
2723aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2724{
2725 if (!HARD_REGISTER_NUM_P (regno))
2726 {
2727 if (!strict_p)
2728 return true;
2729
2730 if (!reg_renumber)
2731 return false;
2732
2733 regno = reg_renumber[regno];
2734 }
2735
2736 /* The fake registers will be eliminated to either the stack or
2737 hard frame pointer, both of which are usually valid base registers.
2738 Reload deals with the cases where the eliminated form isn't valid. */
2739 return (GP_REGNUM_P (regno)
2740 || regno == SP_REGNUM
2741 || regno == FRAME_POINTER_REGNUM
2742 || regno == ARG_POINTER_REGNUM);
2743}
2744
2745/* Return true if X is a valid base register for mode MODE.
2746 STRICT_P is true if REG_OK_STRICT is in effect. */
2747
2748static bool
2749aarch64_base_register_rtx_p (rtx x, bool strict_p)
2750{
2751 if (!strict_p && GET_CODE (x) == SUBREG)
2752 x = SUBREG_REG (x);
2753
2754 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2755}
2756
2757/* Return true if address offset is a valid index. If it is, fill in INFO
2758 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2759
2760static bool
2761aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2762 enum machine_mode mode, bool strict_p)
2763{
2764 enum aarch64_address_type type;
2765 rtx index;
2766 int shift;
2767
2768 /* (reg:P) */
2769 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2770 && GET_MODE (x) == Pmode)
2771 {
2772 type = ADDRESS_REG_REG;
2773 index = x;
2774 shift = 0;
2775 }
2776 /* (sign_extend:DI (reg:SI)) */
2777 else if ((GET_CODE (x) == SIGN_EXTEND
2778 || GET_CODE (x) == ZERO_EXTEND)
2779 && GET_MODE (x) == DImode
2780 && GET_MODE (XEXP (x, 0)) == SImode)
2781 {
2782 type = (GET_CODE (x) == SIGN_EXTEND)
2783 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2784 index = XEXP (x, 0);
2785 shift = 0;
2786 }
2787 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2788 else if (GET_CODE (x) == MULT
2789 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2790 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2791 && GET_MODE (XEXP (x, 0)) == DImode
2792 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2793 && CONST_INT_P (XEXP (x, 1)))
2794 {
2795 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2796 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2797 index = XEXP (XEXP (x, 0), 0);
2798 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2799 }
2800 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2801 else if (GET_CODE (x) == ASHIFT
2802 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2803 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2804 && GET_MODE (XEXP (x, 0)) == DImode
2805 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2806 && CONST_INT_P (XEXP (x, 1)))
2807 {
2808 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2809 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2810 index = XEXP (XEXP (x, 0), 0);
2811 shift = INTVAL (XEXP (x, 1));
2812 }
2813 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2814 else if ((GET_CODE (x) == SIGN_EXTRACT
2815 || GET_CODE (x) == ZERO_EXTRACT)
2816 && GET_MODE (x) == DImode
2817 && GET_CODE (XEXP (x, 0)) == MULT
2818 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2819 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2820 {
2821 type = (GET_CODE (x) == SIGN_EXTRACT)
2822 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2823 index = XEXP (XEXP (x, 0), 0);
2824 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2825 if (INTVAL (XEXP (x, 1)) != 32 + shift
2826 || INTVAL (XEXP (x, 2)) != 0)
2827 shift = -1;
2828 }
2829 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2830 (const_int 0xffffffff<<shift)) */
2831 else if (GET_CODE (x) == AND
2832 && GET_MODE (x) == DImode
2833 && GET_CODE (XEXP (x, 0)) == MULT
2834 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2835 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2836 && CONST_INT_P (XEXP (x, 1)))
2837 {
2838 type = ADDRESS_REG_UXTW;
2839 index = XEXP (XEXP (x, 0), 0);
2840 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2841 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2842 shift = -1;
2843 }
2844 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2845 else if ((GET_CODE (x) == SIGN_EXTRACT
2846 || GET_CODE (x) == ZERO_EXTRACT)
2847 && GET_MODE (x) == DImode
2848 && GET_CODE (XEXP (x, 0)) == ASHIFT
2849 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2850 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2851 {
2852 type = (GET_CODE (x) == SIGN_EXTRACT)
2853 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2854 index = XEXP (XEXP (x, 0), 0);
2855 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2856 if (INTVAL (XEXP (x, 1)) != 32 + shift
2857 || INTVAL (XEXP (x, 2)) != 0)
2858 shift = -1;
2859 }
2860 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2861 (const_int 0xffffffff<<shift)) */
2862 else if (GET_CODE (x) == AND
2863 && GET_MODE (x) == DImode
2864 && GET_CODE (XEXP (x, 0)) == ASHIFT
2865 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2866 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2867 && CONST_INT_P (XEXP (x, 1)))
2868 {
2869 type = ADDRESS_REG_UXTW;
2870 index = XEXP (XEXP (x, 0), 0);
2871 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2872 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2873 shift = -1;
2874 }
2875 /* (mult:P (reg:P) (const_int scale)) */
2876 else if (GET_CODE (x) == MULT
2877 && GET_MODE (x) == Pmode
2878 && GET_MODE (XEXP (x, 0)) == Pmode
2879 && CONST_INT_P (XEXP (x, 1)))
2880 {
2881 type = ADDRESS_REG_REG;
2882 index = XEXP (x, 0);
2883 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2884 }
2885 /* (ashift:P (reg:P) (const_int shift)) */
2886 else if (GET_CODE (x) == ASHIFT
2887 && GET_MODE (x) == Pmode
2888 && GET_MODE (XEXP (x, 0)) == Pmode
2889 && CONST_INT_P (XEXP (x, 1)))
2890 {
2891 type = ADDRESS_REG_REG;
2892 index = XEXP (x, 0);
2893 shift = INTVAL (XEXP (x, 1));
2894 }
2895 else
2896 return false;
2897
2898 if (GET_CODE (index) == SUBREG)
2899 index = SUBREG_REG (index);
2900
2901 if ((shift == 0 ||
2902 (shift > 0 && shift <= 3
2903 && (1 << shift) == GET_MODE_SIZE (mode)))
2904 && REG_P (index)
2905 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2906 {
2907 info->type = type;
2908 info->offset = index;
2909 info->shift = shift;
2910 return true;
2911 }
2912
2913 return false;
2914}
2915
2916static inline bool
2917offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2918{
2919 return (offset >= -64 * GET_MODE_SIZE (mode)
2920 && offset < 64 * GET_MODE_SIZE (mode)
2921 && offset % GET_MODE_SIZE (mode) == 0);
2922}
2923
2924static inline bool
2925offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2926 HOST_WIDE_INT offset)
2927{
2928 return offset >= -256 && offset < 256;
2929}
2930
2931static inline bool
2932offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2933{
2934 return (offset >= 0
2935 && offset < 4096 * GET_MODE_SIZE (mode)
2936 && offset % GET_MODE_SIZE (mode) == 0);
2937}
2938
2939/* Return true if X is a valid address for machine mode MODE. If it is,
2940 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2941 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2942
2943static bool
2944aarch64_classify_address (struct aarch64_address_info *info,
2945 rtx x, enum machine_mode mode,
2946 RTX_CODE outer_code, bool strict_p)
2947{
2948 enum rtx_code code = GET_CODE (x);
2949 rtx op0, op1;
2950 bool allow_reg_index_p =
2951 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2952
2953 /* Don't support anything other than POST_INC or REG addressing for
2954 AdvSIMD. */
2955 if (aarch64_vector_mode_p (mode)
2956 && (code != POST_INC && code != REG))
2957 return false;
2958
2959 switch (code)
2960 {
2961 case REG:
2962 case SUBREG:
2963 info->type = ADDRESS_REG_IMM;
2964 info->base = x;
2965 info->offset = const0_rtx;
2966 return aarch64_base_register_rtx_p (x, strict_p);
2967
2968 case PLUS:
2969 op0 = XEXP (x, 0);
2970 op1 = XEXP (x, 1);
2971 if (GET_MODE_SIZE (mode) != 0
2972 && CONST_INT_P (op1)
2973 && aarch64_base_register_rtx_p (op0, strict_p))
2974 {
2975 HOST_WIDE_INT offset = INTVAL (op1);
2976
2977 info->type = ADDRESS_REG_IMM;
2978 info->base = op0;
2979 info->offset = op1;
2980
2981 /* TImode and TFmode values are allowed in both pairs of X
2982 registers and individual Q registers. The available
2983 address modes are:
2984 X,X: 7-bit signed scaled offset
2985 Q: 9-bit signed offset
2986 We conservatively require an offset representable in either mode.
2987 */
2988 if (mode == TImode || mode == TFmode)
2989 return (offset_7bit_signed_scaled_p (mode, offset)
2990 && offset_9bit_signed_unscaled_p (mode, offset));
2991
2992 if (outer_code == PARALLEL)
2993 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2994 && offset_7bit_signed_scaled_p (mode, offset));
2995 else
2996 return (offset_9bit_signed_unscaled_p (mode, offset)
2997 || offset_12bit_unsigned_scaled_p (mode, offset));
2998 }
2999
3000 if (allow_reg_index_p)
3001 {
3002 /* Look for base + (scaled/extended) index register. */
3003 if (aarch64_base_register_rtx_p (op0, strict_p)
3004 && aarch64_classify_index (info, op1, mode, strict_p))
3005 {
3006 info->base = op0;
3007 return true;
3008 }
3009 if (aarch64_base_register_rtx_p (op1, strict_p)
3010 && aarch64_classify_index (info, op0, mode, strict_p))
3011 {
3012 info->base = op1;
3013 return true;
3014 }
3015 }
3016
3017 return false;
3018
3019 case POST_INC:
3020 case POST_DEC:
3021 case PRE_INC:
3022 case PRE_DEC:
3023 info->type = ADDRESS_REG_WB;
3024 info->base = XEXP (x, 0);
3025 info->offset = NULL_RTX;
3026 return aarch64_base_register_rtx_p (info->base, strict_p);
3027
3028 case POST_MODIFY:
3029 case PRE_MODIFY:
3030 info->type = ADDRESS_REG_WB;
3031 info->base = XEXP (x, 0);
3032 if (GET_CODE (XEXP (x, 1)) == PLUS
3033 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3034 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3035 && aarch64_base_register_rtx_p (info->base, strict_p))
3036 {
3037 HOST_WIDE_INT offset;
3038 info->offset = XEXP (XEXP (x, 1), 1);
3039 offset = INTVAL (info->offset);
3040
3041 /* TImode and TFmode values are allowed in both pairs of X
3042 registers and individual Q registers. The available
3043 address modes are:
3044 X,X: 7-bit signed scaled offset
3045 Q: 9-bit signed offset
3046 We conservatively require an offset representable in either mode.
3047 */
3048 if (mode == TImode || mode == TFmode)
3049 return (offset_7bit_signed_scaled_p (mode, offset)
3050 && offset_9bit_signed_unscaled_p (mode, offset));
3051
3052 if (outer_code == PARALLEL)
3053 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3054 && offset_7bit_signed_scaled_p (mode, offset));
3055 else
3056 return offset_9bit_signed_unscaled_p (mode, offset);
3057 }
3058 return false;
3059
3060 case CONST:
3061 case SYMBOL_REF:
3062 case LABEL_REF:
79517551
SN
3063 /* load literal: pc-relative constant pool entry. Only supported
3064 for SI mode or larger. */
43e9d192 3065 info->type = ADDRESS_SYMBOLIC;
79517551 3066 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3067 {
3068 rtx sym, addend;
3069
3070 split_const (x, &sym, &addend);
3071 return (GET_CODE (sym) == LABEL_REF
3072 || (GET_CODE (sym) == SYMBOL_REF
3073 && CONSTANT_POOL_ADDRESS_P (sym)));
3074 }
3075 return false;
3076
3077 case LO_SUM:
3078 info->type = ADDRESS_LO_SUM;
3079 info->base = XEXP (x, 0);
3080 info->offset = XEXP (x, 1);
3081 if (allow_reg_index_p
3082 && aarch64_base_register_rtx_p (info->base, strict_p))
3083 {
3084 rtx sym, offs;
3085 split_const (info->offset, &sym, &offs);
3086 if (GET_CODE (sym) == SYMBOL_REF
3087 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3088 == SYMBOL_SMALL_ABSOLUTE))
3089 {
3090 /* The symbol and offset must be aligned to the access size. */
3091 unsigned int align;
3092 unsigned int ref_size;
3093
3094 if (CONSTANT_POOL_ADDRESS_P (sym))
3095 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3096 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3097 {
3098 tree exp = SYMBOL_REF_DECL (sym);
3099 align = TYPE_ALIGN (TREE_TYPE (exp));
3100 align = CONSTANT_ALIGNMENT (exp, align);
3101 }
3102 else if (SYMBOL_REF_DECL (sym))
3103 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3104 else
3105 align = BITS_PER_UNIT;
3106
3107 ref_size = GET_MODE_SIZE (mode);
3108 if (ref_size == 0)
3109 ref_size = GET_MODE_SIZE (DImode);
3110
3111 return ((INTVAL (offs) & (ref_size - 1)) == 0
3112 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3113 }
3114 }
3115 return false;
3116
3117 default:
3118 return false;
3119 }
3120}
3121
3122bool
3123aarch64_symbolic_address_p (rtx x)
3124{
3125 rtx offset;
3126
3127 split_const (x, &x, &offset);
3128 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3129}
3130
3131/* Classify the base of symbolic expression X, given that X appears in
3132 context CONTEXT. */
da4f13a4
MS
3133
3134enum aarch64_symbol_type
3135aarch64_classify_symbolic_expression (rtx x,
3136 enum aarch64_symbol_context context)
43e9d192
IB
3137{
3138 rtx offset;
da4f13a4 3139
43e9d192
IB
3140 split_const (x, &x, &offset);
3141 return aarch64_classify_symbol (x, context);
3142}
3143
3144
3145/* Return TRUE if X is a legitimate address for accessing memory in
3146 mode MODE. */
3147static bool
3148aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3149{
3150 struct aarch64_address_info addr;
3151
3152 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3153}
3154
3155/* Return TRUE if X is a legitimate address for accessing memory in
3156 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3157 pair operation. */
3158bool
3159aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3160 RTX_CODE outer_code, bool strict_p)
3161{
3162 struct aarch64_address_info addr;
3163
3164 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3165}
3166
3167/* Return TRUE if rtx X is immediate constant 0.0 */
3168bool
3520f7cc 3169aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3170{
3171 REAL_VALUE_TYPE r;
3172
3173 if (GET_MODE (x) == VOIDmode)
3174 return false;
3175
3176 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3177 if (REAL_VALUE_MINUS_ZERO (r))
3178 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3179 return REAL_VALUES_EQUAL (r, dconst0);
3180}
3181
70f09188
AP
3182/* Return the fixed registers used for condition codes. */
3183
3184static bool
3185aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3186{
3187 *p1 = CC_REGNUM;
3188 *p2 = INVALID_REGNUM;
3189 return true;
3190}
3191
43e9d192
IB
3192enum machine_mode
3193aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3194{
3195 /* All floating point compares return CCFP if it is an equality
3196 comparison, and CCFPE otherwise. */
3197 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3198 {
3199 switch (code)
3200 {
3201 case EQ:
3202 case NE:
3203 case UNORDERED:
3204 case ORDERED:
3205 case UNLT:
3206 case UNLE:
3207 case UNGT:
3208 case UNGE:
3209 case UNEQ:
3210 case LTGT:
3211 return CCFPmode;
3212
3213 case LT:
3214 case LE:
3215 case GT:
3216 case GE:
3217 return CCFPEmode;
3218
3219 default:
3220 gcc_unreachable ();
3221 }
3222 }
3223
3224 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3225 && y == const0_rtx
3226 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3227 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3228 || GET_CODE (x) == NEG))
43e9d192
IB
3229 return CC_NZmode;
3230
3231 /* A compare with a shifted operand. Because of canonicalization,
3232 the comparison will have to be swapped when we emit the assembly
3233 code. */
3234 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3235 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3236 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3237 || GET_CODE (x) == LSHIFTRT
3238 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3239 return CC_SWPmode;
3240
3241 /* A compare of a mode narrower than SI mode against zero can be done
3242 by extending the value in the comparison. */
3243 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3244 && y == const0_rtx)
3245 /* Only use sign-extension if we really need it. */
3246 return ((code == GT || code == GE || code == LE || code == LT)
3247 ? CC_SESWPmode : CC_ZESWPmode);
3248
3249 /* For everything else, return CCmode. */
3250 return CCmode;
3251}
3252
3253static unsigned
3254aarch64_get_condition_code (rtx x)
3255{
3256 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3257 enum rtx_code comp_code = GET_CODE (x);
3258
3259 if (GET_MODE_CLASS (mode) != MODE_CC)
3260 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3261
3262 switch (mode)
3263 {
3264 case CCFPmode:
3265 case CCFPEmode:
3266 switch (comp_code)
3267 {
3268 case GE: return AARCH64_GE;
3269 case GT: return AARCH64_GT;
3270 case LE: return AARCH64_LS;
3271 case LT: return AARCH64_MI;
3272 case NE: return AARCH64_NE;
3273 case EQ: return AARCH64_EQ;
3274 case ORDERED: return AARCH64_VC;
3275 case UNORDERED: return AARCH64_VS;
3276 case UNLT: return AARCH64_LT;
3277 case UNLE: return AARCH64_LE;
3278 case UNGT: return AARCH64_HI;
3279 case UNGE: return AARCH64_PL;
3280 default: gcc_unreachable ();
3281 }
3282 break;
3283
3284 case CCmode:
3285 switch (comp_code)
3286 {
3287 case NE: return AARCH64_NE;
3288 case EQ: return AARCH64_EQ;
3289 case GE: return AARCH64_GE;
3290 case GT: return AARCH64_GT;
3291 case LE: return AARCH64_LE;
3292 case LT: return AARCH64_LT;
3293 case GEU: return AARCH64_CS;
3294 case GTU: return AARCH64_HI;
3295 case LEU: return AARCH64_LS;
3296 case LTU: return AARCH64_CC;
3297 default: gcc_unreachable ();
3298 }
3299 break;
3300
3301 case CC_SWPmode:
3302 case CC_ZESWPmode:
3303 case CC_SESWPmode:
3304 switch (comp_code)
3305 {
3306 case NE: return AARCH64_NE;
3307 case EQ: return AARCH64_EQ;
3308 case GE: return AARCH64_LE;
3309 case GT: return AARCH64_LT;
3310 case LE: return AARCH64_GE;
3311 case LT: return AARCH64_GT;
3312 case GEU: return AARCH64_LS;
3313 case GTU: return AARCH64_CC;
3314 case LEU: return AARCH64_CS;
3315 case LTU: return AARCH64_HI;
3316 default: gcc_unreachable ();
3317 }
3318 break;
3319
3320 case CC_NZmode:
3321 switch (comp_code)
3322 {
3323 case NE: return AARCH64_NE;
3324 case EQ: return AARCH64_EQ;
3325 case GE: return AARCH64_PL;
3326 case LT: return AARCH64_MI;
3327 default: gcc_unreachable ();
3328 }
3329 break;
3330
3331 default:
3332 gcc_unreachable ();
3333 break;
3334 }
3335}
3336
3337static unsigned
3338bit_count (unsigned HOST_WIDE_INT value)
3339{
3340 unsigned count = 0;
3341
3342 while (value)
3343 {
3344 count++;
3345 value &= value - 1;
3346 }
3347
3348 return count;
3349}
3350
3351void
3352aarch64_print_operand (FILE *f, rtx x, char code)
3353{
3354 switch (code)
3355 {
3356 case 'e':
3357 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3358 {
3359 int n;
3360
3361 if (GET_CODE (x) != CONST_INT
3362 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3363 {
3364 output_operand_lossage ("invalid operand for '%%%c'", code);
3365 return;
3366 }
3367
3368 switch (n)
3369 {
3370 case 3:
3371 fputc ('b', f);
3372 break;
3373 case 4:
3374 fputc ('h', f);
3375 break;
3376 case 5:
3377 fputc ('w', f);
3378 break;
3379 default:
3380 output_operand_lossage ("invalid operand for '%%%c'", code);
3381 return;
3382 }
3383 }
3384 break;
3385
3386 case 'p':
3387 {
3388 int n;
3389
3390 /* Print N such that 2^N == X. */
3391 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3392 {
3393 output_operand_lossage ("invalid operand for '%%%c'", code);
3394 return;
3395 }
3396
3397 asm_fprintf (f, "%d", n);
3398 }
3399 break;
3400
3401 case 'P':
3402 /* Print the number of non-zero bits in X (a const_int). */
3403 if (GET_CODE (x) != CONST_INT)
3404 {
3405 output_operand_lossage ("invalid operand for '%%%c'", code);
3406 return;
3407 }
3408
3409 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3410 break;
3411
3412 case 'H':
3413 /* Print the higher numbered register of a pair (TImode) of regs. */
3414 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3415 {
3416 output_operand_lossage ("invalid operand for '%%%c'", code);
3417 return;
3418 }
3419
01a3a324 3420 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3421 break;
3422
43e9d192
IB
3423 case 'm':
3424 /* Print a condition (eq, ne, etc). */
3425
3426 /* CONST_TRUE_RTX means always -- that's the default. */
3427 if (x == const_true_rtx)
3428 return;
3429
3430 if (!COMPARISON_P (x))
3431 {
3432 output_operand_lossage ("invalid operand for '%%%c'", code);
3433 return;
3434 }
3435
3436 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3437 break;
3438
3439 case 'M':
3440 /* Print the inverse of a condition (eq <-> ne, etc). */
3441
3442 /* CONST_TRUE_RTX means never -- that's the default. */
3443 if (x == const_true_rtx)
3444 {
3445 fputs ("nv", f);
3446 return;
3447 }
3448
3449 if (!COMPARISON_P (x))
3450 {
3451 output_operand_lossage ("invalid operand for '%%%c'", code);
3452 return;
3453 }
3454
3455 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3456 (aarch64_get_condition_code (x))], f);
3457 break;
3458
3459 case 'b':
3460 case 'h':
3461 case 's':
3462 case 'd':
3463 case 'q':
3464 /* Print a scalar FP/SIMD register name. */
3465 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3466 {
3467 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3468 return;
3469 }
50ce6f88 3470 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3471 break;
3472
3473 case 'S':
3474 case 'T':
3475 case 'U':
3476 case 'V':
3477 /* Print the first FP/SIMD register name in a list. */
3478 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3479 {
3480 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3481 return;
3482 }
50ce6f88 3483 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3484 break;
3485
a05c0ddf 3486 case 'X':
50d38551 3487 /* Print bottom 16 bits of integer constant in hex. */
a05c0ddf
IB
3488 if (GET_CODE (x) != CONST_INT)
3489 {
3490 output_operand_lossage ("invalid operand for '%%%c'", code);
3491 return;
3492 }
50d38551 3493 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3494 break;
3495
43e9d192
IB
3496 case 'w':
3497 case 'x':
3498 /* Print a general register name or the zero register (32-bit or
3499 64-bit). */
3520f7cc
JG
3500 if (x == const0_rtx
3501 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3502 {
50ce6f88 3503 asm_fprintf (f, "%czr", code);
43e9d192
IB
3504 break;
3505 }
3506
3507 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3508 {
50ce6f88 3509 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3510 break;
3511 }
3512
3513 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3514 {
50ce6f88 3515 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3516 break;
3517 }
3518
3519 /* Fall through */
3520
3521 case 0:
3522 /* Print a normal operand, if it's a general register, then we
3523 assume DImode. */
3524 if (x == NULL)
3525 {
3526 output_operand_lossage ("missing operand");
3527 return;
3528 }
3529
3530 switch (GET_CODE (x))
3531 {
3532 case REG:
01a3a324 3533 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3534 break;
3535
3536 case MEM:
3537 aarch64_memory_reference_mode = GET_MODE (x);
3538 output_address (XEXP (x, 0));
3539 break;
3540
3541 case LABEL_REF:
3542 case SYMBOL_REF:
3543 output_addr_const (asm_out_file, x);
3544 break;
3545
3546 case CONST_INT:
3547 asm_fprintf (f, "%wd", INTVAL (x));
3548 break;
3549
3550 case CONST_VECTOR:
3520f7cc
JG
3551 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3552 {
3553 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3554 HOST_WIDE_INT_MIN,
3555 HOST_WIDE_INT_MAX));
3556 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3557 }
3558 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3559 {
3560 fputc ('0', f);
3561 }
3562 else
3563 gcc_unreachable ();
43e9d192
IB
3564 break;
3565
3520f7cc
JG
3566 case CONST_DOUBLE:
3567 /* CONST_DOUBLE can represent a double-width integer.
3568 In this case, the mode of x is VOIDmode. */
3569 if (GET_MODE (x) == VOIDmode)
3570 ; /* Do Nothing. */
3571 else if (aarch64_float_const_zero_rtx_p (x))
3572 {
3573 fputc ('0', f);
3574 break;
3575 }
3576 else if (aarch64_float_const_representable_p (x))
3577 {
3578#define buf_size 20
3579 char float_buf[buf_size] = {'\0'};
3580 REAL_VALUE_TYPE r;
3581 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3582 real_to_decimal_for_mode (float_buf, &r,
3583 buf_size, buf_size,
3584 1, GET_MODE (x));
3585 asm_fprintf (asm_out_file, "%s", float_buf);
3586 break;
3587#undef buf_size
3588 }
3589 output_operand_lossage ("invalid constant");
3590 return;
43e9d192
IB
3591 default:
3592 output_operand_lossage ("invalid operand");
3593 return;
3594 }
3595 break;
3596
3597 case 'A':
3598 if (GET_CODE (x) == HIGH)
3599 x = XEXP (x, 0);
3600
3601 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3602 {
3603 case SYMBOL_SMALL_GOT:
3604 asm_fprintf (asm_out_file, ":got:");
3605 break;
3606
3607 case SYMBOL_SMALL_TLSGD:
3608 asm_fprintf (asm_out_file, ":tlsgd:");
3609 break;
3610
3611 case SYMBOL_SMALL_TLSDESC:
3612 asm_fprintf (asm_out_file, ":tlsdesc:");
3613 break;
3614
3615 case SYMBOL_SMALL_GOTTPREL:
3616 asm_fprintf (asm_out_file, ":gottprel:");
3617 break;
3618
3619 case SYMBOL_SMALL_TPREL:
3620 asm_fprintf (asm_out_file, ":tprel:");
3621 break;
3622
3623 default:
3624 break;
3625 }
3626 output_addr_const (asm_out_file, x);
3627 break;
3628
3629 case 'L':
3630 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3631 {
3632 case SYMBOL_SMALL_GOT:
3633 asm_fprintf (asm_out_file, ":lo12:");
3634 break;
3635
3636 case SYMBOL_SMALL_TLSGD:
3637 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3638 break;
3639
3640 case SYMBOL_SMALL_TLSDESC:
3641 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3642 break;
3643
3644 case SYMBOL_SMALL_GOTTPREL:
3645 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3646 break;
3647
3648 case SYMBOL_SMALL_TPREL:
3649 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3650 break;
3651
3652 default:
3653 break;
3654 }
3655 output_addr_const (asm_out_file, x);
3656 break;
3657
3658 case 'G':
3659
3660 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3661 {
3662 case SYMBOL_SMALL_TPREL:
3663 asm_fprintf (asm_out_file, ":tprel_hi12:");
3664 break;
3665 default:
3666 break;
3667 }
3668 output_addr_const (asm_out_file, x);
3669 break;
3670
3671 default:
3672 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3673 return;
3674 }
3675}
3676
3677void
3678aarch64_print_operand_address (FILE *f, rtx x)
3679{
3680 struct aarch64_address_info addr;
3681
3682 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3683 MEM, true))
3684 switch (addr.type)
3685 {
3686 case ADDRESS_REG_IMM:
3687 if (addr.offset == const0_rtx)
01a3a324 3688 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 3689 else
01a3a324 3690 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
3691 INTVAL (addr.offset));
3692 return;
3693
3694 case ADDRESS_REG_REG:
3695 if (addr.shift == 0)
01a3a324
N
3696 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3697 reg_names [REGNO (addr.offset)]);
43e9d192 3698 else
01a3a324
N
3699 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3700 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
3701 return;
3702
3703 case ADDRESS_REG_UXTW:
3704 if (addr.shift == 0)
01a3a324 3705 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3706 REGNO (addr.offset) - R0_REGNUM);
3707 else
01a3a324 3708 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3709 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3710 return;
3711
3712 case ADDRESS_REG_SXTW:
3713 if (addr.shift == 0)
01a3a324 3714 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
3715 REGNO (addr.offset) - R0_REGNUM);
3716 else
01a3a324 3717 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
3718 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3719 return;
3720
3721 case ADDRESS_REG_WB:
3722 switch (GET_CODE (x))
3723 {
3724 case PRE_INC:
01a3a324 3725 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3726 GET_MODE_SIZE (aarch64_memory_reference_mode));
3727 return;
3728 case POST_INC:
01a3a324 3729 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3730 GET_MODE_SIZE (aarch64_memory_reference_mode));
3731 return;
3732 case PRE_DEC:
01a3a324 3733 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3734 GET_MODE_SIZE (aarch64_memory_reference_mode));
3735 return;
3736 case POST_DEC:
01a3a324 3737 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
43e9d192
IB
3738 GET_MODE_SIZE (aarch64_memory_reference_mode));
3739 return;
3740 case PRE_MODIFY:
01a3a324 3741 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
3742 INTVAL (addr.offset));
3743 return;
3744 case POST_MODIFY:
01a3a324 3745 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
43e9d192
IB
3746 INTVAL (addr.offset));
3747 return;
3748 default:
3749 break;
3750 }
3751 break;
3752
3753 case ADDRESS_LO_SUM:
01a3a324 3754 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
3755 output_addr_const (f, addr.offset);
3756 asm_fprintf (f, "]");
3757 return;
3758
3759 case ADDRESS_SYMBOLIC:
3760 break;
3761 }
3762
3763 output_addr_const (f, x);
3764}
3765
3766void
3767aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3768 int labelno ATTRIBUTE_UNUSED)
3769{
3770 sorry ("function profiling");
3771}
3772
3773bool
3774aarch64_label_mentioned_p (rtx x)
3775{
3776 const char *fmt;
3777 int i;
3778
3779 if (GET_CODE (x) == LABEL_REF)
3780 return true;
3781
3782 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3783 referencing instruction, but they are constant offsets, not
3784 symbols. */
3785 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3786 return false;
3787
3788 fmt = GET_RTX_FORMAT (GET_CODE (x));
3789 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3790 {
3791 if (fmt[i] == 'E')
3792 {
3793 int j;
3794
3795 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3796 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3797 return 1;
3798 }
3799 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3800 return 1;
3801 }
3802
3803 return 0;
3804}
3805
3806/* Implement REGNO_REG_CLASS. */
3807
3808enum reg_class
3809aarch64_regno_regclass (unsigned regno)
3810{
3811 if (GP_REGNUM_P (regno))
3812 return CORE_REGS;
3813
3814 if (regno == SP_REGNUM)
3815 return STACK_REG;
3816
3817 if (regno == FRAME_POINTER_REGNUM
3818 || regno == ARG_POINTER_REGNUM)
3819 return CORE_REGS;
3820
3821 if (FP_REGNUM_P (regno))
3822 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3823
3824 return NO_REGS;
3825}
3826
3827/* Try a machine-dependent way of reloading an illegitimate address
3828 operand. If we find one, push the reload and return the new rtx. */
3829
3830rtx
3831aarch64_legitimize_reload_address (rtx *x_p,
3832 enum machine_mode mode,
3833 int opnum, int type,
3834 int ind_levels ATTRIBUTE_UNUSED)
3835{
3836 rtx x = *x_p;
3837
3838 /* Do not allow mem (plus (reg, const)) if vector mode. */
3839 if (aarch64_vector_mode_p (mode)
3840 && GET_CODE (x) == PLUS
3841 && REG_P (XEXP (x, 0))
3842 && CONST_INT_P (XEXP (x, 1)))
3843 {
3844 rtx orig_rtx = x;
3845 x = copy_rtx (x);
3846 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3847 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3848 opnum, (enum reload_type) type);
3849 return x;
3850 }
3851
3852 /* We must recognize output that we have already generated ourselves. */
3853 if (GET_CODE (x) == PLUS
3854 && GET_CODE (XEXP (x, 0)) == PLUS
3855 && REG_P (XEXP (XEXP (x, 0), 0))
3856 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3857 && CONST_INT_P (XEXP (x, 1)))
3858 {
3859 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3860 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3861 opnum, (enum reload_type) type);
3862 return x;
3863 }
3864
3865 /* We wish to handle large displacements off a base register by splitting
3866 the addend across an add and the mem insn. This can cut the number of
3867 extra insns needed from 3 to 1. It is only useful for load/store of a
3868 single register with 12 bit offset field. */
3869 if (GET_CODE (x) == PLUS
3870 && REG_P (XEXP (x, 0))
3871 && CONST_INT_P (XEXP (x, 1))
3872 && HARD_REGISTER_P (XEXP (x, 0))
3873 && mode != TImode
3874 && mode != TFmode
3875 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3876 {
3877 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3878 HOST_WIDE_INT low = val & 0xfff;
3879 HOST_WIDE_INT high = val - low;
3880 HOST_WIDE_INT offs;
3881 rtx cst;
3882
3883 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3884 BLKmode alignment. */
3885 if (GET_MODE_SIZE (mode) == 0)
3886 return NULL_RTX;
3887
3888 offs = low % GET_MODE_SIZE (mode);
3889
3890 /* Align misaligned offset by adjusting high part to compensate. */
3891 if (offs != 0)
3892 {
3893 if (aarch64_uimm12_shift (high + offs))
3894 {
3895 /* Align down. */
3896 low = low - offs;
3897 high = high + offs;
3898 }
3899 else
3900 {
3901 /* Align up. */
3902 offs = GET_MODE_SIZE (mode) - offs;
3903 low = low + offs;
3904 high = high + (low & 0x1000) - offs;
3905 low &= 0xfff;
3906 }
3907 }
3908
3909 /* Check for overflow. */
3910 if (high + low != val)
3911 return NULL_RTX;
3912
3913 cst = GEN_INT (high);
3914 if (!aarch64_uimm12_shift (high))
3915 cst = force_const_mem (Pmode, cst);
3916
3917 /* Reload high part into base reg, leaving the low part
3918 in the mem instruction. */
3919 x = gen_rtx_PLUS (Pmode,
3920 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3921 GEN_INT (low));
3922
3923 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3924 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3925 opnum, (enum reload_type) type);
3926 return x;
3927 }
3928
3929 return NULL_RTX;
3930}
3931
3932
3933static reg_class_t
3934aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3935 reg_class_t rclass,
3936 enum machine_mode mode,
3937 secondary_reload_info *sri)
3938{
3939 /* Address expressions of the form PLUS (SP, large_offset) need two
3940 scratch registers, one for the constant, and one for holding a
3941 copy of SP, since SP cannot be used on the RHS of an add-reg
3942 instruction. */
3943 if (mode == DImode
3944 && GET_CODE (x) == PLUS
3945 && XEXP (x, 0) == stack_pointer_rtx
3946 && CONST_INT_P (XEXP (x, 1))
3947 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3948 {
3949 sri->icode = CODE_FOR_reload_sp_immediate;
3950 return NO_REGS;
3951 }
3952
3953 /* Without the TARGET_SIMD instructions we cannot move a Q register
3954 to a Q register directly. We need a scratch. */
3955 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3956 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3957 && reg_class_subset_p (rclass, FP_REGS))
3958 {
3959 if (mode == TFmode)
3960 sri->icode = CODE_FOR_aarch64_reload_movtf;
3961 else if (mode == TImode)
3962 sri->icode = CODE_FOR_aarch64_reload_movti;
3963 return NO_REGS;
3964 }
3965
3966 /* A TFmode or TImode memory access should be handled via an FP_REGS
3967 because AArch64 has richer addressing modes for LDR/STR instructions
3968 than LDP/STP instructions. */
3969 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3970 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3971 return FP_REGS;
3972
3973 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3974 return CORE_REGS;
3975
3976 return NO_REGS;
3977}
3978
3979static bool
3980aarch64_can_eliminate (const int from, const int to)
3981{
3982 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3983 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3984
3985 if (frame_pointer_needed)
3986 {
3987 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3988 return true;
3989 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3990 return false;
3991 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3992 && !cfun->calls_alloca)
3993 return true;
3994 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3995 return true;
3996 return false;
3997 }
3998 else
3999 {
777e6976
IB
4000 /* If we decided that we didn't need a leaf frame pointer but then used
4001 LR in the function, then we'll want a frame pointer after all, so
4002 prevent this elimination to ensure a frame pointer is used.
4003
4004 NOTE: the original value of flag_omit_frame_pointer gets trashed
4005 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4006 of faked_omit_frame_pointer here (which is true when we always
4007 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4008 pointers when LR is clobbered). */
43e9d192 4009 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
777e6976
IB
4010 && df_regs_ever_live_p (LR_REGNUM)
4011 && faked_omit_frame_pointer)
43e9d192
IB
4012 return false;
4013 }
777e6976 4014
43e9d192
IB
4015 return true;
4016}
4017
4018HOST_WIDE_INT
4019aarch64_initial_elimination_offset (unsigned from, unsigned to)
4020{
4021 HOST_WIDE_INT frame_size;
4022 HOST_WIDE_INT offset;
4023
4024 aarch64_layout_frame ();
4025 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4026 + crtl->outgoing_args_size
4027 + cfun->machine->saved_varargs_size);
4028
4029 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4030 offset = frame_size;
4031
4032 if (to == HARD_FRAME_POINTER_REGNUM)
4033 {
4034 if (from == ARG_POINTER_REGNUM)
4035 return offset - crtl->outgoing_args_size;
4036
4037 if (from == FRAME_POINTER_REGNUM)
4038 return cfun->machine->frame.saved_regs_size;
4039 }
4040
4041 if (to == STACK_POINTER_REGNUM)
4042 {
4043 if (from == FRAME_POINTER_REGNUM)
4044 {
4045 HOST_WIDE_INT elim = crtl->outgoing_args_size
4046 + cfun->machine->frame.saved_regs_size
4047 - cfun->machine->frame.fp_lr_offset;
4048 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4049 return elim;
4050 }
4051 }
4052
4053 return offset;
4054}
4055
4056
4057/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4058 previous frame. */
4059
4060rtx
4061aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4062{
4063 if (count != 0)
4064 return const0_rtx;
4065 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4066}
4067
4068
4069static void
4070aarch64_asm_trampoline_template (FILE *f)
4071{
01a3a324
N
4072 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4073 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4074 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192
IB
4075 assemble_aligned_integer (4, const0_rtx);
4076 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4077 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4078}
4079
4080unsigned
4081aarch64_trampoline_size (void)
4082{
4083 return 32; /* 3 insns + padding + 2 dwords. */
4084}
4085
4086static void
4087aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4088{
4089 rtx fnaddr, mem, a_tramp;
4090
4091 /* Don't need to copy the trailing D-words, we fill those in below. */
4092 emit_block_move (m_tramp, assemble_trampoline_template (),
4093 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
4094 mem = adjust_address (m_tramp, DImode, 16);
4095 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4096 emit_move_insn (mem, fnaddr);
4097
4098 mem = adjust_address (m_tramp, DImode, 24);
4099 emit_move_insn (mem, chain_value);
4100
4101 /* XXX We should really define a "clear_cache" pattern and use
4102 gen_clear_cache(). */
4103 a_tramp = XEXP (m_tramp, 0);
4104 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4105 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
4106 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4107}
4108
4109static unsigned char
4110aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4111{
4112 switch (regclass)
4113 {
4114 case CORE_REGS:
4115 case POINTER_REGS:
4116 case GENERAL_REGS:
4117 case ALL_REGS:
4118 case FP_REGS:
4119 case FP_LO_REGS:
4120 return
4121 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4122 (GET_MODE_SIZE (mode) + 7) / 8;
4123 case STACK_REG:
4124 return 1;
4125
4126 case NO_REGS:
4127 return 0;
4128
4129 default:
4130 break;
4131 }
4132 gcc_unreachable ();
4133}
4134
4135static reg_class_t
4136aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4137{
4138 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4139 ? GENERAL_REGS : regclass);
4140}
4141
4142void
4143aarch64_asm_output_labelref (FILE* f, const char *name)
4144{
4145 asm_fprintf (f, "%U%s", name);
4146}
4147
4148static void
4149aarch64_elf_asm_constructor (rtx symbol, int priority)
4150{
4151 if (priority == DEFAULT_INIT_PRIORITY)
4152 default_ctor_section_asm_out_constructor (symbol, priority);
4153 else
4154 {
4155 section *s;
4156 char buf[18];
4157 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4158 s = get_section (buf, SECTION_WRITE, NULL);
4159 switch_to_section (s);
4160 assemble_align (POINTER_SIZE);
4161 fputs ("\t.dword\t", asm_out_file);
4162 output_addr_const (asm_out_file, symbol);
4163 fputc ('\n', asm_out_file);
4164 }
4165}
4166
4167static void
4168aarch64_elf_asm_destructor (rtx symbol, int priority)
4169{
4170 if (priority == DEFAULT_INIT_PRIORITY)
4171 default_dtor_section_asm_out_destructor (symbol, priority);
4172 else
4173 {
4174 section *s;
4175 char buf[18];
4176 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4177 s = get_section (buf, SECTION_WRITE, NULL);
4178 switch_to_section (s);
4179 assemble_align (POINTER_SIZE);
4180 fputs ("\t.dword\t", asm_out_file);
4181 output_addr_const (asm_out_file, symbol);
4182 fputc ('\n', asm_out_file);
4183 }
4184}
4185
4186const char*
4187aarch64_output_casesi (rtx *operands)
4188{
4189 char buf[100];
4190 char label[100];
9d821fa5 4191 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
43e9d192
IB
4192 int index;
4193 static const char *const patterns[4][2] =
4194 {
4195 {
4196 "ldrb\t%w3, [%0,%w1,uxtw]",
4197 "add\t%3, %4, %w3, sxtb #2"
4198 },
4199 {
4200 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4201 "add\t%3, %4, %w3, sxth #2"
4202 },
4203 {
4204 "ldr\t%w3, [%0,%w1,uxtw #2]",
4205 "add\t%3, %4, %w3, sxtw #2"
4206 },
4207 /* We assume that DImode is only generated when not optimizing and
4208 that we don't really need 64-bit address offsets. That would
4209 imply an object file with 8GB of code in a single function! */
4210 {
4211 "ldr\t%w3, [%0,%w1,uxtw #2]",
4212 "add\t%3, %4, %w3, sxtw #2"
4213 }
4214 };
4215
4216 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4217
4218 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4219
4220 gcc_assert (index >= 0 && index <= 3);
4221
4222 /* Need to implement table size reduction, by chaning the code below. */
4223 output_asm_insn (patterns[index][0], operands);
4224 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4225 snprintf (buf, sizeof (buf),
4226 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4227 output_asm_insn (buf, operands);
4228 output_asm_insn (patterns[index][1], operands);
4229 output_asm_insn ("br\t%3", operands);
4230 assemble_label (asm_out_file, label);
4231 return "";
4232}
4233
4234
4235/* Return size in bits of an arithmetic operand which is shifted/scaled and
4236 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4237 operator. */
4238
4239int
4240aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4241{
4242 if (shift >= 0 && shift <= 3)
4243 {
4244 int size;
4245 for (size = 8; size <= 32; size *= 2)
4246 {
4247 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4248 if (mask == bits << shift)
4249 return size;
4250 }
4251 }
4252 return 0;
4253}
4254
4255static bool
4256aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4257 const_rtx x ATTRIBUTE_UNUSED)
4258{
4259 /* We can't use blocks for constants when we're using a per-function
4260 constant pool. */
4261 return false;
4262}
4263
4264static section *
4265aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4266 rtx x ATTRIBUTE_UNUSED,
4267 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4268{
4269 /* Force all constant pool entries into the current function section. */
4270 return function_section (current_function_decl);
4271}
4272
4273
4274/* Costs. */
4275
4276/* Helper function for rtx cost calculation. Strip a shift expression
4277 from X. Returns the inner operand if successful, or the original
4278 expression on failure. */
4279static rtx
4280aarch64_strip_shift (rtx x)
4281{
4282 rtx op = x;
4283
4284 if ((GET_CODE (op) == ASHIFT
4285 || GET_CODE (op) == ASHIFTRT
4286 || GET_CODE (op) == LSHIFTRT)
4287 && CONST_INT_P (XEXP (op, 1)))
4288 return XEXP (op, 0);
4289
4290 if (GET_CODE (op) == MULT
4291 && CONST_INT_P (XEXP (op, 1))
4292 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4293 return XEXP (op, 0);
4294
4295 return x;
4296}
4297
4298/* Helper function for rtx cost calculation. Strip a shift or extend
4299 expression from X. Returns the inner operand if successful, or the
4300 original expression on failure. We deal with a number of possible
4301 canonicalization variations here. */
4302static rtx
4303aarch64_strip_shift_or_extend (rtx x)
4304{
4305 rtx op = x;
4306
4307 /* Zero and sign extraction of a widened value. */
4308 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4309 && XEXP (op, 2) == const0_rtx
4310 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4311 XEXP (op, 1)))
4312 return XEXP (XEXP (op, 0), 0);
4313
4314 /* It can also be represented (for zero-extend) as an AND with an
4315 immediate. */
4316 if (GET_CODE (op) == AND
4317 && GET_CODE (XEXP (op, 0)) == MULT
4318 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4319 && CONST_INT_P (XEXP (op, 1))
4320 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4321 INTVAL (XEXP (op, 1))) != 0)
4322 return XEXP (XEXP (op, 0), 0);
4323
4324 /* Now handle extended register, as this may also have an optional
4325 left shift by 1..4. */
4326 if (GET_CODE (op) == ASHIFT
4327 && CONST_INT_P (XEXP (op, 1))
4328 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4329 op = XEXP (op, 0);
4330
4331 if (GET_CODE (op) == ZERO_EXTEND
4332 || GET_CODE (op) == SIGN_EXTEND)
4333 op = XEXP (op, 0);
4334
4335 if (op != x)
4336 return op;
4337
4338 return aarch64_strip_shift (x);
4339}
4340
4341/* Calculate the cost of calculating X, storing it in *COST. Result
4342 is true if the total cost of the operation has now been calculated. */
4343static bool
4344aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4345 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4346{
4347 rtx op0, op1;
4348 const struct cpu_rtx_cost_table *extra_cost
4349 = aarch64_tune_params->insn_extra_cost;
4350
4351 switch (code)
4352 {
4353 case SET:
4354 op0 = SET_DEST (x);
4355 op1 = SET_SRC (x);
4356
4357 switch (GET_CODE (op0))
4358 {
4359 case MEM:
4360 if (speed)
4361 *cost += extra_cost->memory_store;
4362
4363 if (op1 != const0_rtx)
4364 *cost += rtx_cost (op1, SET, 1, speed);
4365 return true;
4366
4367 case SUBREG:
4368 if (! REG_P (SUBREG_REG (op0)))
4369 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4370 /* Fall through. */
4371 case REG:
4372 /* Cost is just the cost of the RHS of the set. */
4373 *cost += rtx_cost (op1, SET, 1, true);
4374 return true;
4375
4376 case ZERO_EXTRACT: /* Bit-field insertion. */
4377 case SIGN_EXTRACT:
4378 /* Strip any redundant widening of the RHS to meet the width of
4379 the target. */
4380 if (GET_CODE (op1) == SUBREG)
4381 op1 = SUBREG_REG (op1);
4382 if ((GET_CODE (op1) == ZERO_EXTEND
4383 || GET_CODE (op1) == SIGN_EXTEND)
4384 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4385 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4386 >= INTVAL (XEXP (op0, 1))))
4387 op1 = XEXP (op1, 0);
4388 *cost += rtx_cost (op1, SET, 1, speed);
4389 return true;
4390
4391 default:
4392 break;
4393 }
4394 return false;
4395
4396 case MEM:
4397 if (speed)
4398 *cost += extra_cost->memory_load;
4399
4400 return true;
4401
4402 case NEG:
4403 op0 = CONST0_RTX (GET_MODE (x));
4404 op1 = XEXP (x, 0);
4405 goto cost_minus;
4406
4407 case COMPARE:
4408 op0 = XEXP (x, 0);
4409 op1 = XEXP (x, 1);
4410
4411 if (op1 == const0_rtx
4412 && GET_CODE (op0) == AND)
4413 {
4414 x = op0;
4415 goto cost_logic;
4416 }
4417
4418 /* Comparisons can work if the order is swapped.
4419 Canonicalization puts the more complex operation first, but
4420 we want it in op1. */
4421 if (! (REG_P (op0)
4422 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4423 {
4424 op0 = XEXP (x, 1);
4425 op1 = XEXP (x, 0);
4426 }
4427 goto cost_minus;
4428
4429 case MINUS:
4430 op0 = XEXP (x, 0);
4431 op1 = XEXP (x, 1);
4432
4433 cost_minus:
4434 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4435 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4436 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4437 {
4438 if (op0 != const0_rtx)
4439 *cost += rtx_cost (op0, MINUS, 0, speed);
4440
4441 if (CONST_INT_P (op1))
4442 {
4443 if (!aarch64_uimm12_shift (INTVAL (op1)))
4444 *cost += rtx_cost (op1, MINUS, 1, speed);
4445 }
4446 else
4447 {
4448 op1 = aarch64_strip_shift_or_extend (op1);
4449 *cost += rtx_cost (op1, MINUS, 1, speed);
4450 }
4451 return true;
4452 }
4453
4454 return false;
4455
4456 case PLUS:
4457 op0 = XEXP (x, 0);
4458 op1 = XEXP (x, 1);
4459
4460 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4461 {
4462 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4463 {
4464 *cost += rtx_cost (op0, PLUS, 0, speed);
4465 }
4466 else
4467 {
4468 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4469
4470 if (new_op0 == op0
4471 && GET_CODE (op0) == MULT)
4472 {
4473 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4474 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4475 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4476 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4477 {
4478 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4479 speed)
4480 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4481 speed)
4482 + rtx_cost (op1, PLUS, 1, speed));
4483 if (speed)
4484 *cost += extra_cost->int_multiply_extend_add;
4485 return true;
4486 }
4487 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4488 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4489 + rtx_cost (op1, PLUS, 1, speed));
4490
4491 if (speed)
4492 *cost += extra_cost->int_multiply_add;
4493 }
4494
4495 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4496 + rtx_cost (op1, PLUS, 1, speed));
4497 }
4498 return true;
4499 }
4500
4501 return false;
4502
4503 case IOR:
4504 case XOR:
4505 case AND:
4506 cost_logic:
4507 op0 = XEXP (x, 0);
4508 op1 = XEXP (x, 1);
4509
4510 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4511 {
4512 if (CONST_INT_P (op1)
4513 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4514 {
4515 *cost += rtx_cost (op0, AND, 0, speed);
4516 }
4517 else
4518 {
4519 if (GET_CODE (op0) == NOT)
4520 op0 = XEXP (op0, 0);
4521 op0 = aarch64_strip_shift (op0);
4522 *cost += (rtx_cost (op0, AND, 0, speed)
4523 + rtx_cost (op1, AND, 1, speed));
4524 }
4525 return true;
4526 }
4527 return false;
4528
4529 case ZERO_EXTEND:
4530 if ((GET_MODE (x) == DImode
4531 && GET_MODE (XEXP (x, 0)) == SImode)
4532 || GET_CODE (XEXP (x, 0)) == MEM)
4533 {
4534 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4535 return true;
4536 }
4537 return false;
4538
4539 case SIGN_EXTEND:
4540 if (GET_CODE (XEXP (x, 0)) == MEM)
4541 {
4542 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4543 return true;
4544 }
4545 return false;
4546
4547 case ROTATE:
4548 if (!CONST_INT_P (XEXP (x, 1)))
4549 *cost += COSTS_N_INSNS (2);
4550 /* Fall through. */
4551 case ROTATERT:
4552 case LSHIFTRT:
4553 case ASHIFT:
4554 case ASHIFTRT:
4555
4556 /* Shifting by a register often takes an extra cycle. */
4557 if (speed && !CONST_INT_P (XEXP (x, 1)))
4558 *cost += extra_cost->register_shift;
4559
4560 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4561 return true;
4562
4563 case HIGH:
4564 if (!CONSTANT_P (XEXP (x, 0)))
4565 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4566 return true;
4567
4568 case LO_SUM:
4569 if (!CONSTANT_P (XEXP (x, 1)))
4570 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4571 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4572 return true;
4573
4574 case ZERO_EXTRACT:
4575 case SIGN_EXTRACT:
4576 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4577 return true;
4578
4579 case MULT:
4580 op0 = XEXP (x, 0);
4581 op1 = XEXP (x, 1);
4582
4583 *cost = COSTS_N_INSNS (1);
4584 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4585 {
4586 if (CONST_INT_P (op1)
4587 && exact_log2 (INTVAL (op1)) > 0)
4588 {
4589 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4590 return true;
4591 }
4592
4593 if ((GET_CODE (op0) == ZERO_EXTEND
4594 && GET_CODE (op1) == ZERO_EXTEND)
4595 || (GET_CODE (op0) == SIGN_EXTEND
4596 && GET_CODE (op1) == SIGN_EXTEND))
4597 {
4598 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4599 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4600 if (speed)
4601 *cost += extra_cost->int_multiply_extend;
4602 return true;
4603 }
4604
4605 if (speed)
4606 *cost += extra_cost->int_multiply;
4607 }
4608 else if (speed)
4609 {
4610 if (GET_MODE (x) == DFmode)
4611 *cost += extra_cost->double_multiply;
4612 else if (GET_MODE (x) == SFmode)
4613 *cost += extra_cost->float_multiply;
4614 }
4615
4616 return false; /* All arguments need to be in registers. */
4617
4618 case MOD:
4619 case UMOD:
4620 *cost = COSTS_N_INSNS (2);
4621 if (speed)
4622 {
4623 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4624 *cost += (extra_cost->int_multiply_add
4625 + extra_cost->int_divide);
4626 else if (GET_MODE (x) == DFmode)
4627 *cost += (extra_cost->double_multiply
4628 + extra_cost->double_divide);
4629 else if (GET_MODE (x) == SFmode)
4630 *cost += (extra_cost->float_multiply
4631 + extra_cost->float_divide);
4632 }
4633 return false; /* All arguments need to be in registers. */
4634
4635 case DIV:
4636 case UDIV:
4637 *cost = COSTS_N_INSNS (1);
4638 if (speed)
4639 {
4640 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4641 *cost += extra_cost->int_divide;
4642 else if (GET_MODE (x) == DFmode)
4643 *cost += extra_cost->double_divide;
4644 else if (GET_MODE (x) == SFmode)
4645 *cost += extra_cost->float_divide;
4646 }
4647 return false; /* All arguments need to be in registers. */
4648
4649 default:
4650 break;
4651 }
4652 return false;
4653}
4654
4655static int
4656aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4657 enum machine_mode mode ATTRIBUTE_UNUSED,
4658 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4659{
4660 enum rtx_code c = GET_CODE (x);
4661 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4662
4663 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4664 return addr_cost->pre_modify;
4665
4666 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4667 return addr_cost->post_modify;
4668
4669 if (c == PLUS)
4670 {
4671 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4672 return addr_cost->imm_offset;
4673 else if (GET_CODE (XEXP (x, 0)) == MULT
4674 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4675 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4676 return addr_cost->register_extend;
4677
4678 return addr_cost->register_offset;
4679 }
4680 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4681 return addr_cost->imm_offset;
4682
4683 return 0;
4684}
4685
4686static int
4687aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4688 reg_class_t from, reg_class_t to)
4689{
4690 const struct cpu_regmove_cost *regmove_cost
4691 = aarch64_tune_params->regmove_cost;
4692
4693 if (from == GENERAL_REGS && to == GENERAL_REGS)
4694 return regmove_cost->GP2GP;
4695 else if (from == GENERAL_REGS)
4696 return regmove_cost->GP2FP;
4697 else if (to == GENERAL_REGS)
4698 return regmove_cost->FP2GP;
4699
4700 /* When AdvSIMD instructions are disabled it is not possible to move
4701 a 128-bit value directly between Q registers. This is handled in
4702 secondary reload. A general register is used as a scratch to move
4703 the upper DI value and the lower DI value is moved directly,
4704 hence the cost is the sum of three moves. */
4705
4706 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4707 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4708
4709 return regmove_cost->FP2FP;
4710}
4711
4712static int
4713aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4714 reg_class_t rclass ATTRIBUTE_UNUSED,
4715 bool in ATTRIBUTE_UNUSED)
4716{
4717 return aarch64_tune_params->memmov_cost;
4718}
4719
4720static void initialize_aarch64_code_model (void);
4721
4722/* Parse the architecture extension string. */
4723
4724static void
4725aarch64_parse_extension (char *str)
4726{
4727 /* The extension string is parsed left to right. */
4728 const struct aarch64_option_extension *opt = NULL;
4729
4730 /* Flag to say whether we are adding or removing an extension. */
4731 int adding_ext = -1;
4732
4733 while (str != NULL && *str != 0)
4734 {
4735 char *ext;
4736 size_t len;
4737
4738 str++;
4739 ext = strchr (str, '+');
4740
4741 if (ext != NULL)
4742 len = ext - str;
4743 else
4744 len = strlen (str);
4745
4746 if (len >= 2 && strncmp (str, "no", 2) == 0)
4747 {
4748 adding_ext = 0;
4749 len -= 2;
4750 str += 2;
4751 }
4752 else if (len > 0)
4753 adding_ext = 1;
4754
4755 if (len == 0)
4756 {
4757 error ("missing feature modifier after %qs", "+no");
4758 return;
4759 }
4760
4761 /* Scan over the extensions table trying to find an exact match. */
4762 for (opt = all_extensions; opt->name != NULL; opt++)
4763 {
4764 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4765 {
4766 /* Add or remove the extension. */
4767 if (adding_ext)
4768 aarch64_isa_flags |= opt->flags_on;
4769 else
4770 aarch64_isa_flags &= ~(opt->flags_off);
4771 break;
4772 }
4773 }
4774
4775 if (opt->name == NULL)
4776 {
4777 /* Extension not found in list. */
4778 error ("unknown feature modifier %qs", str);
4779 return;
4780 }
4781
4782 str = ext;
4783 };
4784
4785 return;
4786}
4787
4788/* Parse the ARCH string. */
4789
4790static void
4791aarch64_parse_arch (void)
4792{
4793 char *ext;
4794 const struct processor *arch;
4795 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4796 size_t len;
4797
4798 strcpy (str, aarch64_arch_string);
4799
4800 ext = strchr (str, '+');
4801
4802 if (ext != NULL)
4803 len = ext - str;
4804 else
4805 len = strlen (str);
4806
4807 if (len == 0)
4808 {
4809 error ("missing arch name in -march=%qs", str);
4810 return;
4811 }
4812
4813 /* Loop through the list of supported ARCHs to find a match. */
4814 for (arch = all_architectures; arch->name != NULL; arch++)
4815 {
4816 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4817 {
4818 selected_arch = arch;
4819 aarch64_isa_flags = selected_arch->flags;
4820 selected_cpu = &all_cores[selected_arch->core];
4821
4822 if (ext != NULL)
4823 {
4824 /* ARCH string contains at least one extension. */
4825 aarch64_parse_extension (ext);
4826 }
4827
4828 return;
4829 }
4830 }
4831
4832 /* ARCH name not found in list. */
4833 error ("unknown value %qs for -march", str);
4834 return;
4835}
4836
4837/* Parse the CPU string. */
4838
4839static void
4840aarch64_parse_cpu (void)
4841{
4842 char *ext;
4843 const struct processor *cpu;
4844 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4845 size_t len;
4846
4847 strcpy (str, aarch64_cpu_string);
4848
4849 ext = strchr (str, '+');
4850
4851 if (ext != NULL)
4852 len = ext - str;
4853 else
4854 len = strlen (str);
4855
4856 if (len == 0)
4857 {
4858 error ("missing cpu name in -mcpu=%qs", str);
4859 return;
4860 }
4861
4862 /* Loop through the list of supported CPUs to find a match. */
4863 for (cpu = all_cores; cpu->name != NULL; cpu++)
4864 {
4865 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4866 {
4867 selected_cpu = cpu;
4868 aarch64_isa_flags = selected_cpu->flags;
4869
4870 if (ext != NULL)
4871 {
4872 /* CPU string contains at least one extension. */
4873 aarch64_parse_extension (ext);
4874 }
4875
4876 return;
4877 }
4878 }
4879
4880 /* CPU name not found in list. */
4881 error ("unknown value %qs for -mcpu", str);
4882 return;
4883}
4884
4885/* Parse the TUNE string. */
4886
4887static void
4888aarch64_parse_tune (void)
4889{
4890 const struct processor *cpu;
4891 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4892 strcpy (str, aarch64_tune_string);
4893
4894 /* Loop through the list of supported CPUs to find a match. */
4895 for (cpu = all_cores; cpu->name != NULL; cpu++)
4896 {
4897 if (strcmp (cpu->name, str) == 0)
4898 {
4899 selected_tune = cpu;
4900 return;
4901 }
4902 }
4903
4904 /* CPU name not found in list. */
4905 error ("unknown value %qs for -mtune", str);
4906 return;
4907}
4908
4909
4910/* Implement TARGET_OPTION_OVERRIDE. */
4911
4912static void
4913aarch64_override_options (void)
4914{
4915 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4916 otherwise march remains undefined. mtune can be used with either march or
4917 mcpu. */
4918
4919 if (aarch64_arch_string)
4920 {
4921 aarch64_parse_arch ();
4922 aarch64_cpu_string = NULL;
4923 }
4924
4925 if (aarch64_cpu_string)
4926 {
4927 aarch64_parse_cpu ();
4928 selected_arch = NULL;
4929 }
4930
4931 if (aarch64_tune_string)
4932 {
4933 aarch64_parse_tune ();
4934 }
4935
4936 initialize_aarch64_code_model ();
4937
4938 aarch64_build_bitmask_table ();
4939
4940 /* This target defaults to strict volatile bitfields. */
4941 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4942 flag_strict_volatile_bitfields = 1;
4943
4944 /* If the user did not specify a processor, choose the default
4945 one for them. This will be the CPU set during configuration using
4946 --with-cpu, otherwise it is "generic". */
4947 if (!selected_cpu)
4948 {
4949 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4950 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4951 }
4952
4953 gcc_assert (selected_cpu);
4954
4955 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4956 if (!selected_tune)
4957 selected_tune = &all_cores[selected_cpu->core];
4958
4959 aarch64_tune_flags = selected_tune->flags;
4960 aarch64_tune = selected_tune->core;
4961 aarch64_tune_params = selected_tune->tune;
4962
4963 aarch64_override_options_after_change ();
4964}
4965
4966/* Implement targetm.override_options_after_change. */
4967
4968static void
4969aarch64_override_options_after_change (void)
4970{
4971 faked_omit_frame_pointer = false;
4972
4973 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4974 that aarch64_frame_pointer_required will be called. We need to remember
4975 whether flag_omit_frame_pointer was turned on normally or just faked. */
4976
4977 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4978 {
4979 flag_omit_frame_pointer = true;
4980 faked_omit_frame_pointer = true;
4981 }
4982}
4983
4984static struct machine_function *
4985aarch64_init_machine_status (void)
4986{
4987 struct machine_function *machine;
4988 machine = ggc_alloc_cleared_machine_function ();
4989 return machine;
4990}
4991
4992void
4993aarch64_init_expanders (void)
4994{
4995 init_machine_status = aarch64_init_machine_status;
4996}
4997
4998/* A checking mechanism for the implementation of the various code models. */
4999static void
5000initialize_aarch64_code_model (void)
5001{
5002 if (flag_pic)
5003 {
5004 switch (aarch64_cmodel_var)
5005 {
5006 case AARCH64_CMODEL_TINY:
5007 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5008 break;
5009 case AARCH64_CMODEL_SMALL:
5010 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5011 break;
5012 case AARCH64_CMODEL_LARGE:
5013 sorry ("code model %qs with -f%s", "large",
5014 flag_pic > 1 ? "PIC" : "pic");
5015 default:
5016 gcc_unreachable ();
5017 }
5018 }
5019 else
5020 aarch64_cmodel = aarch64_cmodel_var;
5021}
5022
5023/* Return true if SYMBOL_REF X binds locally. */
5024
5025static bool
5026aarch64_symbol_binds_local_p (const_rtx x)
5027{
5028 return (SYMBOL_REF_DECL (x)
5029 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5030 : SYMBOL_REF_LOCAL_P (x));
5031}
5032
5033/* Return true if SYMBOL_REF X is thread local */
5034static bool
5035aarch64_tls_symbol_p (rtx x)
5036{
5037 if (! TARGET_HAVE_TLS)
5038 return false;
5039
5040 if (GET_CODE (x) != SYMBOL_REF)
5041 return false;
5042
5043 return SYMBOL_REF_TLS_MODEL (x) != 0;
5044}
5045
5046/* Classify a TLS symbol into one of the TLS kinds. */
5047enum aarch64_symbol_type
5048aarch64_classify_tls_symbol (rtx x)
5049{
5050 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5051
5052 switch (tls_kind)
5053 {
5054 case TLS_MODEL_GLOBAL_DYNAMIC:
5055 case TLS_MODEL_LOCAL_DYNAMIC:
5056 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5057
5058 case TLS_MODEL_INITIAL_EXEC:
5059 return SYMBOL_SMALL_GOTTPREL;
5060
5061 case TLS_MODEL_LOCAL_EXEC:
5062 return SYMBOL_SMALL_TPREL;
5063
5064 case TLS_MODEL_EMULATED:
5065 case TLS_MODEL_NONE:
5066 return SYMBOL_FORCE_TO_MEM;
5067
5068 default:
5069 gcc_unreachable ();
5070 }
5071}
5072
5073/* Return the method that should be used to access SYMBOL_REF or
5074 LABEL_REF X in context CONTEXT. */
17f4d4bf 5075
43e9d192
IB
5076enum aarch64_symbol_type
5077aarch64_classify_symbol (rtx x,
5078 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5079{
5080 if (GET_CODE (x) == LABEL_REF)
5081 {
5082 switch (aarch64_cmodel)
5083 {
5084 case AARCH64_CMODEL_LARGE:
5085 return SYMBOL_FORCE_TO_MEM;
5086
5087 case AARCH64_CMODEL_TINY_PIC:
5088 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5089 return SYMBOL_TINY_ABSOLUTE;
5090
43e9d192
IB
5091 case AARCH64_CMODEL_SMALL_PIC:
5092 case AARCH64_CMODEL_SMALL:
5093 return SYMBOL_SMALL_ABSOLUTE;
5094
5095 default:
5096 gcc_unreachable ();
5097 }
5098 }
5099
17f4d4bf 5100 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 5101 {
17f4d4bf
CSS
5102 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5103 || CONSTANT_POOL_ADDRESS_P (x))
43e9d192
IB
5104 return SYMBOL_FORCE_TO_MEM;
5105
5106 if (aarch64_tls_symbol_p (x))
5107 return aarch64_classify_tls_symbol (x);
5108
17f4d4bf
CSS
5109 switch (aarch64_cmodel)
5110 {
5111 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
5112 if (SYMBOL_REF_WEAK (x))
5113 return SYMBOL_FORCE_TO_MEM;
5114 return SYMBOL_TINY_ABSOLUTE;
5115
17f4d4bf
CSS
5116 case AARCH64_CMODEL_SMALL:
5117 if (SYMBOL_REF_WEAK (x))
5118 return SYMBOL_FORCE_TO_MEM;
5119 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5120
17f4d4bf 5121 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6
MS
5122 if (!aarch64_symbol_binds_local_p (x))
5123 return SYMBOL_SMALL_GOT;
5124 return SYMBOL_TINY_ABSOLUTE;
5125
17f4d4bf
CSS
5126 case AARCH64_CMODEL_SMALL_PIC:
5127 if (!aarch64_symbol_binds_local_p (x))
5128 return SYMBOL_SMALL_GOT;
5129 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 5130
17f4d4bf
CSS
5131 default:
5132 gcc_unreachable ();
5133 }
43e9d192 5134 }
17f4d4bf 5135
43e9d192
IB
5136 /* By default push everything into the constant pool. */
5137 return SYMBOL_FORCE_TO_MEM;
5138}
5139
43e9d192
IB
5140bool
5141aarch64_constant_address_p (rtx x)
5142{
5143 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5144}
5145
5146bool
5147aarch64_legitimate_pic_operand_p (rtx x)
5148{
5149 if (GET_CODE (x) == SYMBOL_REF
5150 || (GET_CODE (x) == CONST
5151 && GET_CODE (XEXP (x, 0)) == PLUS
5152 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5153 return false;
5154
5155 return true;
5156}
5157
3520f7cc
JG
5158/* Return true if X holds either a quarter-precision or
5159 floating-point +0.0 constant. */
5160static bool
5161aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5162{
5163 if (!CONST_DOUBLE_P (x))
5164 return false;
5165
5166 /* TODO: We could handle moving 0.0 to a TFmode register,
5167 but first we would like to refactor the movtf_aarch64
5168 to be more amicable to split moves properly and
5169 correctly gate on TARGET_SIMD. For now - reject all
5170 constants which are not to SFmode or DFmode registers. */
5171 if (!(mode == SFmode || mode == DFmode))
5172 return false;
5173
5174 if (aarch64_float_const_zero_rtx_p (x))
5175 return true;
5176 return aarch64_float_const_representable_p (x);
5177}
5178
43e9d192
IB
5179static bool
5180aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5181{
5182 /* Do not allow vector struct mode constants. We could support
5183 0 and -1 easily, but they need support in aarch64-simd.md. */
5184 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5185 return false;
5186
5187 /* This could probably go away because
5188 we now decompose CONST_INTs according to expand_mov_immediate. */
5189 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 5190 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
5191 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5192 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
5193
5194 if (GET_CODE (x) == HIGH
5195 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5196 return true;
5197
5198 return aarch64_constant_address_p (x);
5199}
5200
a5bc806c 5201rtx
43e9d192
IB
5202aarch64_load_tp (rtx target)
5203{
5204 if (!target
5205 || GET_MODE (target) != Pmode
5206 || !register_operand (target, Pmode))
5207 target = gen_reg_rtx (Pmode);
5208
5209 /* Can return in any reg. */
5210 emit_insn (gen_aarch64_load_tp_hard (target));
5211 return target;
5212}
5213
43e9d192
IB
5214/* On AAPCS systems, this is the "struct __va_list". */
5215static GTY(()) tree va_list_type;
5216
5217/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5218 Return the type to use as __builtin_va_list.
5219
5220 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5221
5222 struct __va_list
5223 {
5224 void *__stack;
5225 void *__gr_top;
5226 void *__vr_top;
5227 int __gr_offs;
5228 int __vr_offs;
5229 }; */
5230
5231static tree
5232aarch64_build_builtin_va_list (void)
5233{
5234 tree va_list_name;
5235 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5236
5237 /* Create the type. */
5238 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5239 /* Give it the required name. */
5240 va_list_name = build_decl (BUILTINS_LOCATION,
5241 TYPE_DECL,
5242 get_identifier ("__va_list"),
5243 va_list_type);
5244 DECL_ARTIFICIAL (va_list_name) = 1;
5245 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 5246 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
5247
5248 /* Create the fields. */
5249 f_stack = build_decl (BUILTINS_LOCATION,
5250 FIELD_DECL, get_identifier ("__stack"),
5251 ptr_type_node);
5252 f_grtop = build_decl (BUILTINS_LOCATION,
5253 FIELD_DECL, get_identifier ("__gr_top"),
5254 ptr_type_node);
5255 f_vrtop = build_decl (BUILTINS_LOCATION,
5256 FIELD_DECL, get_identifier ("__vr_top"),
5257 ptr_type_node);
5258 f_groff = build_decl (BUILTINS_LOCATION,
5259 FIELD_DECL, get_identifier ("__gr_offs"),
5260 integer_type_node);
5261 f_vroff = build_decl (BUILTINS_LOCATION,
5262 FIELD_DECL, get_identifier ("__vr_offs"),
5263 integer_type_node);
5264
5265 DECL_ARTIFICIAL (f_stack) = 1;
5266 DECL_ARTIFICIAL (f_grtop) = 1;
5267 DECL_ARTIFICIAL (f_vrtop) = 1;
5268 DECL_ARTIFICIAL (f_groff) = 1;
5269 DECL_ARTIFICIAL (f_vroff) = 1;
5270
5271 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5272 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5273 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5274 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5275 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5276
5277 TYPE_FIELDS (va_list_type) = f_stack;
5278 DECL_CHAIN (f_stack) = f_grtop;
5279 DECL_CHAIN (f_grtop) = f_vrtop;
5280 DECL_CHAIN (f_vrtop) = f_groff;
5281 DECL_CHAIN (f_groff) = f_vroff;
5282
5283 /* Compute its layout. */
5284 layout_type (va_list_type);
5285
5286 return va_list_type;
5287}
5288
5289/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5290static void
5291aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5292{
5293 const CUMULATIVE_ARGS *cum;
5294 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5295 tree stack, grtop, vrtop, groff, vroff;
5296 tree t;
5297 int gr_save_area_size;
5298 int vr_save_area_size;
5299 int vr_offset;
5300
5301 cum = &crtl->args.info;
5302 gr_save_area_size
5303 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5304 vr_save_area_size
5305 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5306
5307 if (TARGET_GENERAL_REGS_ONLY)
5308 {
5309 if (cum->aapcs_nvrn > 0)
5310 sorry ("%qs and floating point or vector arguments",
5311 "-mgeneral-regs-only");
5312 vr_save_area_size = 0;
5313 }
5314
5315 f_stack = TYPE_FIELDS (va_list_type_node);
5316 f_grtop = DECL_CHAIN (f_stack);
5317 f_vrtop = DECL_CHAIN (f_grtop);
5318 f_groff = DECL_CHAIN (f_vrtop);
5319 f_vroff = DECL_CHAIN (f_groff);
5320
5321 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5322 NULL_TREE);
5323 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5324 NULL_TREE);
5325 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5326 NULL_TREE);
5327 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5328 NULL_TREE);
5329 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5330 NULL_TREE);
5331
5332 /* Emit code to initialize STACK, which points to the next varargs stack
5333 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5334 by named arguments. STACK is 8-byte aligned. */
5335 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5336 if (cum->aapcs_stack_size > 0)
5337 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5338 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5339 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5340
5341 /* Emit code to initialize GRTOP, the top of the GR save area.
5342 virtual_incoming_args_rtx should have been 16 byte aligned. */
5343 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5344 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5345 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5346
5347 /* Emit code to initialize VRTOP, the top of the VR save area.
5348 This address is gr_save_area_bytes below GRTOP, rounded
5349 down to the next 16-byte boundary. */
5350 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5351 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5352 STACK_BOUNDARY / BITS_PER_UNIT);
5353
5354 if (vr_offset)
5355 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5356 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5357 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5358
5359 /* Emit code to initialize GROFF, the offset from GRTOP of the
5360 next GPR argument. */
5361 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5362 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5364
5365 /* Likewise emit code to initialize VROFF, the offset from FTOP
5366 of the next VR argument. */
5367 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5368 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5369 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5370}
5371
5372/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5373
5374static tree
5375aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5376 gimple_seq *post_p ATTRIBUTE_UNUSED)
5377{
5378 tree addr;
5379 bool indirect_p;
5380 bool is_ha; /* is HFA or HVA. */
5381 bool dw_align; /* double-word align. */
5382 enum machine_mode ag_mode = VOIDmode;
5383 int nregs;
5384 enum machine_mode mode;
5385
5386 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5387 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5388 HOST_WIDE_INT size, rsize, adjust, align;
5389 tree t, u, cond1, cond2;
5390
5391 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5392 if (indirect_p)
5393 type = build_pointer_type (type);
5394
5395 mode = TYPE_MODE (type);
5396
5397 f_stack = TYPE_FIELDS (va_list_type_node);
5398 f_grtop = DECL_CHAIN (f_stack);
5399 f_vrtop = DECL_CHAIN (f_grtop);
5400 f_groff = DECL_CHAIN (f_vrtop);
5401 f_vroff = DECL_CHAIN (f_groff);
5402
5403 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5404 f_stack, NULL_TREE);
5405 size = int_size_in_bytes (type);
5406 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5407
5408 dw_align = false;
5409 adjust = 0;
5410 if (aarch64_vfp_is_call_or_return_candidate (mode,
5411 type,
5412 &ag_mode,
5413 &nregs,
5414 &is_ha))
5415 {
5416 /* TYPE passed in fp/simd registers. */
5417 if (TARGET_GENERAL_REGS_ONLY)
5418 sorry ("%qs and floating point or vector arguments",
5419 "-mgeneral-regs-only");
5420
5421 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5422 unshare_expr (valist), f_vrtop, NULL_TREE);
5423 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5424 unshare_expr (valist), f_vroff, NULL_TREE);
5425
5426 rsize = nregs * UNITS_PER_VREG;
5427
5428 if (is_ha)
5429 {
5430 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5431 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5432 }
5433 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5434 && size < UNITS_PER_VREG)
5435 {
5436 adjust = UNITS_PER_VREG - size;
5437 }
5438 }
5439 else
5440 {
5441 /* TYPE passed in general registers. */
5442 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5443 unshare_expr (valist), f_grtop, NULL_TREE);
5444 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5445 unshare_expr (valist), f_groff, NULL_TREE);
5446 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5447 nregs = rsize / UNITS_PER_WORD;
5448
5449 if (align > 8)
5450 dw_align = true;
5451
5452 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5453 && size < UNITS_PER_WORD)
5454 {
5455 adjust = UNITS_PER_WORD - size;
5456 }
5457 }
5458
5459 /* Get a local temporary for the field value. */
5460 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5461
5462 /* Emit code to branch if off >= 0. */
5463 t = build2 (GE_EXPR, boolean_type_node, off,
5464 build_int_cst (TREE_TYPE (off), 0));
5465 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5466
5467 if (dw_align)
5468 {
5469 /* Emit: offs = (offs + 15) & -16. */
5470 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5471 build_int_cst (TREE_TYPE (off), 15));
5472 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5473 build_int_cst (TREE_TYPE (off), -16));
5474 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5475 }
5476 else
5477 roundup = NULL;
5478
5479 /* Update ap.__[g|v]r_offs */
5480 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5481 build_int_cst (TREE_TYPE (off), rsize));
5482 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5483
5484 /* String up. */
5485 if (roundup)
5486 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5487
5488 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5489 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5490 build_int_cst (TREE_TYPE (f_off), 0));
5491 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5492
5493 /* String up: make sure the assignment happens before the use. */
5494 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5495 COND_EXPR_ELSE (cond1) = t;
5496
5497 /* Prepare the trees handling the argument that is passed on the stack;
5498 the top level node will store in ON_STACK. */
5499 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5500 if (align > 8)
5501 {
5502 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5503 t = fold_convert (intDI_type_node, arg);
5504 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5505 build_int_cst (TREE_TYPE (t), 15));
5506 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5507 build_int_cst (TREE_TYPE (t), -16));
5508 t = fold_convert (TREE_TYPE (arg), t);
5509 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5510 }
5511 else
5512 roundup = NULL;
5513 /* Advance ap.__stack */
5514 t = fold_convert (intDI_type_node, arg);
5515 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5516 build_int_cst (TREE_TYPE (t), size + 7));
5517 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5518 build_int_cst (TREE_TYPE (t), -8));
5519 t = fold_convert (TREE_TYPE (arg), t);
5520 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5521 /* String up roundup and advance. */
5522 if (roundup)
5523 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5524 /* String up with arg */
5525 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5526 /* Big-endianness related address adjustment. */
5527 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5528 && size < UNITS_PER_WORD)
5529 {
5530 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5531 size_int (UNITS_PER_WORD - size));
5532 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5533 }
5534
5535 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5536 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5537
5538 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5539 t = off;
5540 if (adjust)
5541 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5542 build_int_cst (TREE_TYPE (off), adjust));
5543
5544 t = fold_convert (sizetype, t);
5545 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5546
5547 if (is_ha)
5548 {
5549 /* type ha; // treat as "struct {ftype field[n];}"
5550 ... [computing offs]
5551 for (i = 0; i <nregs; ++i, offs += 16)
5552 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5553 return ha; */
5554 int i;
5555 tree tmp_ha, field_t, field_ptr_t;
5556
5557 /* Declare a local variable. */
5558 tmp_ha = create_tmp_var_raw (type, "ha");
5559 gimple_add_tmp_var (tmp_ha);
5560
5561 /* Establish the base type. */
5562 switch (ag_mode)
5563 {
5564 case SFmode:
5565 field_t = float_type_node;
5566 field_ptr_t = float_ptr_type_node;
5567 break;
5568 case DFmode:
5569 field_t = double_type_node;
5570 field_ptr_t = double_ptr_type_node;
5571 break;
5572 case TFmode:
5573 field_t = long_double_type_node;
5574 field_ptr_t = long_double_ptr_type_node;
5575 break;
5576/* The half precision and quad precision are not fully supported yet. Enable
5577 the following code after the support is complete. Need to find the correct
5578 type node for __fp16 *. */
5579#if 0
5580 case HFmode:
5581 field_t = float_type_node;
5582 field_ptr_t = float_ptr_type_node;
5583 break;
5584#endif
5585 case V2SImode:
5586 case V4SImode:
5587 {
5588 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5589 field_t = build_vector_type_for_mode (innertype, ag_mode);
5590 field_ptr_t = build_pointer_type (field_t);
5591 }
5592 break;
5593 default:
5594 gcc_assert (0);
5595 }
5596
5597 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5598 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5599 addr = t;
5600 t = fold_convert (field_ptr_t, addr);
5601 t = build2 (MODIFY_EXPR, field_t,
5602 build1 (INDIRECT_REF, field_t, tmp_ha),
5603 build1 (INDIRECT_REF, field_t, t));
5604
5605 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5606 for (i = 1; i < nregs; ++i)
5607 {
5608 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5609 u = fold_convert (field_ptr_t, addr);
5610 u = build2 (MODIFY_EXPR, field_t,
5611 build2 (MEM_REF, field_t, tmp_ha,
5612 build_int_cst (field_ptr_t,
5613 (i *
5614 int_size_in_bytes (field_t)))),
5615 build1 (INDIRECT_REF, field_t, u));
5616 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5617 }
5618
5619 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5620 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5621 }
5622
5623 COND_EXPR_ELSE (cond2) = t;
5624 addr = fold_convert (build_pointer_type (type), cond1);
5625 addr = build_va_arg_indirect_ref (addr);
5626
5627 if (indirect_p)
5628 addr = build_va_arg_indirect_ref (addr);
5629
5630 return addr;
5631}
5632
5633/* Implement TARGET_SETUP_INCOMING_VARARGS. */
5634
5635static void
5636aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5637 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5638 int no_rtl)
5639{
5640 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5641 CUMULATIVE_ARGS local_cum;
5642 int gr_saved, vr_saved;
5643
5644 /* The caller has advanced CUM up to, but not beyond, the last named
5645 argument. Advance a local copy of CUM past the last "real" named
5646 argument, to find out how many registers are left over. */
5647 local_cum = *cum;
5648 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5649
5650 /* Found out how many registers we need to save. */
5651 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5652 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5653
5654 if (TARGET_GENERAL_REGS_ONLY)
5655 {
5656 if (local_cum.aapcs_nvrn > 0)
5657 sorry ("%qs and floating point or vector arguments",
5658 "-mgeneral-regs-only");
5659 vr_saved = 0;
5660 }
5661
5662 if (!no_rtl)
5663 {
5664 if (gr_saved > 0)
5665 {
5666 rtx ptr, mem;
5667
5668 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5669 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5670 - gr_saved * UNITS_PER_WORD);
5671 mem = gen_frame_mem (BLKmode, ptr);
5672 set_mem_alias_set (mem, get_varargs_alias_set ());
5673
5674 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5675 mem, gr_saved);
5676 }
5677 if (vr_saved > 0)
5678 {
5679 /* We can't use move_block_from_reg, because it will use
5680 the wrong mode, storing D regs only. */
5681 enum machine_mode mode = TImode;
5682 int off, i;
5683
5684 /* Set OFF to the offset from virtual_incoming_args_rtx of
5685 the first vector register. The VR save area lies below
5686 the GR one, and is aligned to 16 bytes. */
5687 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5688 STACK_BOUNDARY / BITS_PER_UNIT);
5689 off -= vr_saved * UNITS_PER_VREG;
5690
5691 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5692 {
5693 rtx ptr, mem;
5694
5695 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5696 mem = gen_frame_mem (mode, ptr);
5697 set_mem_alias_set (mem, get_varargs_alias_set ());
5698 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5699 off += UNITS_PER_VREG;
5700 }
5701 }
5702 }
5703
5704 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5705 any complication of having crtl->args.pretend_args_size changed. */
5706 cfun->machine->saved_varargs_size
5707 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5708 STACK_BOUNDARY / BITS_PER_UNIT)
5709 + vr_saved * UNITS_PER_VREG);
5710}
5711
5712static void
5713aarch64_conditional_register_usage (void)
5714{
5715 int i;
5716 if (!TARGET_FLOAT)
5717 {
5718 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5719 {
5720 fixed_regs[i] = 1;
5721 call_used_regs[i] = 1;
5722 }
5723 }
5724}
5725
5726/* Walk down the type tree of TYPE counting consecutive base elements.
5727 If *MODEP is VOIDmode, then set it to the first valid floating point
5728 type. If a non-floating point type is found, or if a floating point
5729 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5730 otherwise return the count in the sub-tree. */
5731static int
5732aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5733{
5734 enum machine_mode mode;
5735 HOST_WIDE_INT size;
5736
5737 switch (TREE_CODE (type))
5738 {
5739 case REAL_TYPE:
5740 mode = TYPE_MODE (type);
5741 if (mode != DFmode && mode != SFmode && mode != TFmode)
5742 return -1;
5743
5744 if (*modep == VOIDmode)
5745 *modep = mode;
5746
5747 if (*modep == mode)
5748 return 1;
5749
5750 break;
5751
5752 case COMPLEX_TYPE:
5753 mode = TYPE_MODE (TREE_TYPE (type));
5754 if (mode != DFmode && mode != SFmode && mode != TFmode)
5755 return -1;
5756
5757 if (*modep == VOIDmode)
5758 *modep = mode;
5759
5760 if (*modep == mode)
5761 return 2;
5762
5763 break;
5764
5765 case VECTOR_TYPE:
5766 /* Use V2SImode and V4SImode as representatives of all 64-bit
5767 and 128-bit vector types. */
5768 size = int_size_in_bytes (type);
5769 switch (size)
5770 {
5771 case 8:
5772 mode = V2SImode;
5773 break;
5774 case 16:
5775 mode = V4SImode;
5776 break;
5777 default:
5778 return -1;
5779 }
5780
5781 if (*modep == VOIDmode)
5782 *modep = mode;
5783
5784 /* Vector modes are considered to be opaque: two vectors are
5785 equivalent for the purposes of being homogeneous aggregates
5786 if they are the same size. */
5787 if (*modep == mode)
5788 return 1;
5789
5790 break;
5791
5792 case ARRAY_TYPE:
5793 {
5794 int count;
5795 tree index = TYPE_DOMAIN (type);
5796
5797 /* Can't handle incomplete types. */
5798 if (!COMPLETE_TYPE_P (type))
5799 return -1;
5800
5801 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5802 if (count == -1
5803 || !index
5804 || !TYPE_MAX_VALUE (index)
5805 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5806 || !TYPE_MIN_VALUE (index)
5807 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5808 || count < 0)
5809 return -1;
5810
5811 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5812 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5813
5814 /* There must be no padding. */
5815 if (!host_integerp (TYPE_SIZE (type), 1)
5816 || (tree_low_cst (TYPE_SIZE (type), 1)
5817 != count * GET_MODE_BITSIZE (*modep)))
5818 return -1;
5819
5820 return count;
5821 }
5822
5823 case RECORD_TYPE:
5824 {
5825 int count = 0;
5826 int sub_count;
5827 tree field;
5828
5829 /* Can't handle incomplete types. */
5830 if (!COMPLETE_TYPE_P (type))
5831 return -1;
5832
5833 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5834 {
5835 if (TREE_CODE (field) != FIELD_DECL)
5836 continue;
5837
5838 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5839 if (sub_count < 0)
5840 return -1;
5841 count += sub_count;
5842 }
5843
5844 /* There must be no padding. */
5845 if (!host_integerp (TYPE_SIZE (type), 1)
5846 || (tree_low_cst (TYPE_SIZE (type), 1)
5847 != count * GET_MODE_BITSIZE (*modep)))
5848 return -1;
5849
5850 return count;
5851 }
5852
5853 case UNION_TYPE:
5854 case QUAL_UNION_TYPE:
5855 {
5856 /* These aren't very interesting except in a degenerate case. */
5857 int count = 0;
5858 int sub_count;
5859 tree field;
5860
5861 /* Can't handle incomplete types. */
5862 if (!COMPLETE_TYPE_P (type))
5863 return -1;
5864
5865 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5866 {
5867 if (TREE_CODE (field) != FIELD_DECL)
5868 continue;
5869
5870 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5871 if (sub_count < 0)
5872 return -1;
5873 count = count > sub_count ? count : sub_count;
5874 }
5875
5876 /* There must be no padding. */
5877 if (!host_integerp (TYPE_SIZE (type), 1)
5878 || (tree_low_cst (TYPE_SIZE (type), 1)
5879 != count * GET_MODE_BITSIZE (*modep)))
5880 return -1;
5881
5882 return count;
5883 }
5884
5885 default:
5886 break;
5887 }
5888
5889 return -1;
5890}
5891
5892/* Return TRUE if the type, as described by TYPE and MODE, is a composite
5893 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5894 array types. The C99 floating-point complex types are also considered
5895 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5896 types, which are GCC extensions and out of the scope of AAPCS64, are
5897 treated as composite types here as well.
5898
5899 Note that MODE itself is not sufficient in determining whether a type
5900 is such a composite type or not. This is because
5901 stor-layout.c:compute_record_mode may have already changed the MODE
5902 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5903 structure with only one field may have its MODE set to the mode of the
5904 field. Also an integer mode whose size matches the size of the
5905 RECORD_TYPE type may be used to substitute the original mode
5906 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5907 solely relied on. */
5908
5909static bool
5910aarch64_composite_type_p (const_tree type,
5911 enum machine_mode mode)
5912{
5913 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5914 return true;
5915
5916 if (mode == BLKmode
5917 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5918 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5919 return true;
5920
5921 return false;
5922}
5923
5924/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5925 type as described in AAPCS64 \S 4.1.2.
5926
5927 See the comment above aarch64_composite_type_p for the notes on MODE. */
5928
5929static bool
5930aarch64_short_vector_p (const_tree type,
5931 enum machine_mode mode)
5932{
5933 HOST_WIDE_INT size = -1;
5934
5935 if (type && TREE_CODE (type) == VECTOR_TYPE)
5936 size = int_size_in_bytes (type);
5937 else if (!aarch64_composite_type_p (type, mode)
5938 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5939 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5940 size = GET_MODE_SIZE (mode);
5941
5942 return (size == 8 || size == 16) ? true : false;
5943}
5944
5945/* Return TRUE if an argument, whose type is described by TYPE and MODE,
5946 shall be passed or returned in simd/fp register(s) (providing these
5947 parameter passing registers are available).
5948
5949 Upon successful return, *COUNT returns the number of needed registers,
5950 *BASE_MODE returns the mode of the individual register and when IS_HAF
5951 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5952 floating-point aggregate or a homogeneous short-vector aggregate. */
5953
5954static bool
5955aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5956 const_tree type,
5957 enum machine_mode *base_mode,
5958 int *count,
5959 bool *is_ha)
5960{
5961 enum machine_mode new_mode = VOIDmode;
5962 bool composite_p = aarch64_composite_type_p (type, mode);
5963
5964 if (is_ha != NULL) *is_ha = false;
5965
5966 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5967 || aarch64_short_vector_p (type, mode))
5968 {
5969 *count = 1;
5970 new_mode = mode;
5971 }
5972 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5973 {
5974 if (is_ha != NULL) *is_ha = true;
5975 *count = 2;
5976 new_mode = GET_MODE_INNER (mode);
5977 }
5978 else if (type && composite_p)
5979 {
5980 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5981
5982 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5983 {
5984 if (is_ha != NULL) *is_ha = true;
5985 *count = ag_count;
5986 }
5987 else
5988 return false;
5989 }
5990 else
5991 return false;
5992
5993 *base_mode = new_mode;
5994 return true;
5995}
5996
5997/* Implement TARGET_STRUCT_VALUE_RTX. */
5998
5999static rtx
6000aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6001 int incoming ATTRIBUTE_UNUSED)
6002{
6003 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6004}
6005
6006/* Implements target hook vector_mode_supported_p. */
6007static bool
6008aarch64_vector_mode_supported_p (enum machine_mode mode)
6009{
6010 if (TARGET_SIMD
6011 && (mode == V4SImode || mode == V8HImode
6012 || mode == V16QImode || mode == V2DImode
6013 || mode == V2SImode || mode == V4HImode
6014 || mode == V8QImode || mode == V2SFmode
6015 || mode == V4SFmode || mode == V2DFmode))
6016 return true;
6017
6018 return false;
6019}
6020
b7342d25
IB
6021/* Return appropriate SIMD container
6022 for MODE within a vector of WIDTH bits. */
43e9d192 6023static enum machine_mode
b7342d25 6024aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 6025{
b7342d25 6026 gcc_assert (width == 64 || width == 128);
43e9d192 6027 if (TARGET_SIMD)
b7342d25
IB
6028 {
6029 if (width == 128)
6030 switch (mode)
6031 {
6032 case DFmode:
6033 return V2DFmode;
6034 case SFmode:
6035 return V4SFmode;
6036 case SImode:
6037 return V4SImode;
6038 case HImode:
6039 return V8HImode;
6040 case QImode:
6041 return V16QImode;
6042 case DImode:
6043 return V2DImode;
6044 default:
6045 break;
6046 }
6047 else
6048 switch (mode)
6049 {
6050 case SFmode:
6051 return V2SFmode;
6052 case SImode:
6053 return V2SImode;
6054 case HImode:
6055 return V4HImode;
6056 case QImode:
6057 return V8QImode;
6058 default:
6059 break;
6060 }
6061 }
43e9d192
IB
6062 return word_mode;
6063}
6064
b7342d25
IB
6065/* Return 128-bit container as the preferred SIMD mode for MODE. */
6066static enum machine_mode
6067aarch64_preferred_simd_mode (enum machine_mode mode)
6068{
6069 return aarch64_simd_container_mode (mode, 128);
6070}
6071
3b357264
JG
6072/* Return the bitmask of possible vector sizes for the vectorizer
6073 to iterate over. */
6074static unsigned int
6075aarch64_autovectorize_vector_sizes (void)
6076{
6077 return (16 | 8);
6078}
6079
c6fc9e43
YZ
6080/* A table to help perform AArch64-specific name mangling for AdvSIMD
6081 vector types in order to conform to the AAPCS64 (see "Procedure
6082 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6083 qualify for emission with the mangled names defined in that document,
6084 a vector type must not only be of the correct mode but also be
6085 composed of AdvSIMD vector element types (e.g.
6086 _builtin_aarch64_simd_qi); these types are registered by
6087 aarch64_init_simd_builtins (). In other words, vector types defined
6088 in other ways e.g. via vector_size attribute will get default
6089 mangled names. */
6090typedef struct
6091{
6092 enum machine_mode mode;
6093 const char *element_type_name;
6094 const char *mangled_name;
6095} aarch64_simd_mangle_map_entry;
6096
6097static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6098 /* 64-bit containerized types. */
6099 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6100 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6101 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6102 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6103 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6104 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6105 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6106 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6107 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6108 /* 128-bit containerized types. */
6109 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6110 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6111 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6112 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6113 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6114 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6115 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6116 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6117 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6118 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6119 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6120 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6121 { VOIDmode, NULL, NULL }
6122};
6123
ac2b960f
YZ
6124/* Implement TARGET_MANGLE_TYPE. */
6125
6f549691 6126static const char *
ac2b960f
YZ
6127aarch64_mangle_type (const_tree type)
6128{
6129 /* The AArch64 ABI documents say that "__va_list" has to be
6130 managled as if it is in the "std" namespace. */
6131 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6132 return "St9__va_list";
6133
c6fc9e43
YZ
6134 /* Check the mode of the vector type, and the name of the vector
6135 element type, against the table. */
6136 if (TREE_CODE (type) == VECTOR_TYPE)
6137 {
6138 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6139
6140 while (pos->mode != VOIDmode)
6141 {
6142 tree elt_type = TREE_TYPE (type);
6143
6144 if (pos->mode == TYPE_MODE (type)
6145 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6146 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6147 pos->element_type_name))
6148 return pos->mangled_name;
6149
6150 pos++;
6151 }
6152 }
6153
ac2b960f
YZ
6154 /* Use the default mangling. */
6155 return NULL;
6156}
6157
43e9d192 6158/* Return the equivalent letter for size. */
81c2dfb9 6159static char
43e9d192
IB
6160sizetochar (int size)
6161{
6162 switch (size)
6163 {
6164 case 64: return 'd';
6165 case 32: return 's';
6166 case 16: return 'h';
6167 case 8 : return 'b';
6168 default: gcc_unreachable ();
6169 }
6170}
6171
3520f7cc
JG
6172/* Return true iff x is a uniform vector of floating-point
6173 constants, and the constant can be represented in
6174 quarter-precision form. Note, as aarch64_float_const_representable
6175 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6176static bool
6177aarch64_vect_float_const_representable_p (rtx x)
6178{
6179 int i = 0;
6180 REAL_VALUE_TYPE r0, ri;
6181 rtx x0, xi;
6182
6183 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6184 return false;
6185
6186 x0 = CONST_VECTOR_ELT (x, 0);
6187 if (!CONST_DOUBLE_P (x0))
6188 return false;
6189
6190 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6191
6192 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6193 {
6194 xi = CONST_VECTOR_ELT (x, i);
6195 if (!CONST_DOUBLE_P (xi))
6196 return false;
6197
6198 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6199 if (!REAL_VALUES_EQUAL (r0, ri))
6200 return false;
6201 }
6202
6203 return aarch64_float_const_representable_p (x0);
6204}
6205
d8edd899 6206/* Return true for valid and false for invalid. */
3ea63f60 6207bool
48063b9d
IB
6208aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6209 struct simd_immediate_info *info)
43e9d192
IB
6210{
6211#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6212 matches = 1; \
6213 for (i = 0; i < idx; i += (STRIDE)) \
6214 if (!(TEST)) \
6215 matches = 0; \
6216 if (matches) \
6217 { \
6218 immtype = (CLASS); \
6219 elsize = (ELSIZE); \
43e9d192
IB
6220 eshift = (SHIFT); \
6221 emvn = (NEG); \
6222 break; \
6223 }
6224
6225 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6226 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6227 unsigned char bytes[16];
43e9d192
IB
6228 int immtype = -1, matches;
6229 unsigned int invmask = inverse ? 0xff : 0;
6230 int eshift, emvn;
6231
43e9d192 6232 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 6233 {
81c2dfb9
IB
6234 if (! (aarch64_simd_imm_zero_p (op, mode)
6235 || aarch64_vect_float_const_representable_p (op)))
d8edd899 6236 return false;
3520f7cc 6237
48063b9d
IB
6238 if (info)
6239 {
6240 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 6241 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
6242 info->mvn = false;
6243 info->shift = 0;
6244 }
3520f7cc 6245
d8edd899 6246 return true;
3520f7cc 6247 }
43e9d192
IB
6248
6249 /* Splat vector constant out into a byte vector. */
6250 for (i = 0; i < n_elts; i++)
6251 {
6252 rtx el = CONST_VECTOR_ELT (op, i);
6253 unsigned HOST_WIDE_INT elpart;
6254 unsigned int part, parts;
6255
6256 if (GET_CODE (el) == CONST_INT)
6257 {
6258 elpart = INTVAL (el);
6259 parts = 1;
6260 }
6261 else if (GET_CODE (el) == CONST_DOUBLE)
6262 {
6263 elpart = CONST_DOUBLE_LOW (el);
6264 parts = 2;
6265 }
6266 else
6267 gcc_unreachable ();
6268
6269 for (part = 0; part < parts; part++)
6270 {
6271 unsigned int byte;
6272 for (byte = 0; byte < innersize; byte++)
6273 {
6274 bytes[idx++] = (elpart & 0xff) ^ invmask;
6275 elpart >>= BITS_PER_UNIT;
6276 }
6277 if (GET_CODE (el) == CONST_DOUBLE)
6278 elpart = CONST_DOUBLE_HIGH (el);
6279 }
6280 }
6281
6282 /* Sanity check. */
6283 gcc_assert (idx == GET_MODE_SIZE (mode));
6284
6285 do
6286 {
6287 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6288 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6289
6290 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6291 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6292
6293 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6294 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6295
6296 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6297 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6298
6299 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6300
6301 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6302
6303 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6304 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6305
6306 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6307 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6308
6309 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6310 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6311
6312 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6313 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6314
6315 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6316
6317 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6318
6319 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6320 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6321
6322 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6323 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6324
6325 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6326 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6327
6328 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6329 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6330
6331 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6332
6333 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6334 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6335 }
6336 while (0);
6337
6338 /* TODO: Currently the assembler cannot handle types 12 to 15.
6339 And there is no way to specify cmode through the compiler.
6340 Disable them till there is support in the assembler. */
6341 if (immtype == -1
6342 || (immtype >= 12 && immtype <= 15)
6343 || immtype == 18)
d8edd899 6344 return false;
43e9d192 6345
48063b9d 6346 if (info)
43e9d192 6347 {
48063b9d 6348 info->element_width = elsize;
48063b9d
IB
6349 info->mvn = emvn != 0;
6350 info->shift = eshift;
6351
43e9d192
IB
6352 unsigned HOST_WIDE_INT imm = 0;
6353
6354 /* Un-invert bytes of recognized vector, if necessary. */
6355 if (invmask != 0)
6356 for (i = 0; i < idx; i++)
6357 bytes[i] ^= invmask;
6358
6359 if (immtype == 17)
6360 {
6361 /* FIXME: Broken on 32-bit H_W_I hosts. */
6362 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6363
6364 for (i = 0; i < 8; i++)
6365 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6366 << (i * BITS_PER_UNIT);
6367
43e9d192 6368
48063b9d
IB
6369 info->value = GEN_INT (imm);
6370 }
6371 else
6372 {
6373 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6374 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
6375
6376 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
6377 generic constants. */
6378 if (info->mvn)
43e9d192 6379 imm = ~imm;
48063b9d
IB
6380 imm = (imm >> info->shift) & 0xff;
6381 info->value = GEN_INT (imm);
6382 }
43e9d192
IB
6383 }
6384
48063b9d 6385 return true;
43e9d192
IB
6386#undef CHECK
6387}
6388
43e9d192
IB
6389static bool
6390aarch64_const_vec_all_same_int_p (rtx x,
6391 HOST_WIDE_INT minval,
6392 HOST_WIDE_INT maxval)
6393{
6394 HOST_WIDE_INT firstval;
6395 int count, i;
6396
6397 if (GET_CODE (x) != CONST_VECTOR
6398 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6399 return false;
6400
6401 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6402 if (firstval < minval || firstval > maxval)
6403 return false;
6404
6405 count = CONST_VECTOR_NUNITS (x);
6406 for (i = 1; i < count; i++)
6407 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6408 return false;
6409
6410 return true;
6411}
6412
6413/* Check of immediate shift constants are within range. */
6414bool
6415aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6416{
6417 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6418 if (left)
6419 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6420 else
6421 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6422}
6423
3520f7cc
JG
6424/* Return true if X is a uniform vector where all elements
6425 are either the floating-point constant 0.0 or the
6426 integer constant 0. */
43e9d192
IB
6427bool
6428aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6429{
3520f7cc 6430 return x == CONST0_RTX (mode);
43e9d192
IB
6431}
6432
6433bool
6434aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6435{
6436 HOST_WIDE_INT imm = INTVAL (x);
6437 int i;
6438
6439 for (i = 0; i < 8; i++)
6440 {
6441 unsigned int byte = imm & 0xff;
6442 if (byte != 0xff && byte != 0)
6443 return false;
6444 imm >>= 8;
6445 }
6446
6447 return true;
6448}
6449
83f8c414
CSS
6450bool
6451aarch64_mov_operand_p (rtx x,
a5350ddc 6452 enum aarch64_symbol_context context,
83f8c414
CSS
6453 enum machine_mode mode)
6454{
83f8c414
CSS
6455 if (GET_CODE (x) == HIGH
6456 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6457 return true;
6458
6459 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6460 return true;
6461
6462 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6463 return true;
6464
a5350ddc
CSS
6465 return aarch64_classify_symbolic_expression (x, context)
6466 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
6467}
6468
43e9d192
IB
6469/* Return a const_int vector of VAL. */
6470rtx
6471aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6472{
6473 int nunits = GET_MODE_NUNITS (mode);
6474 rtvec v = rtvec_alloc (nunits);
6475 int i;
6476
6477 for (i=0; i < nunits; i++)
6478 RTVEC_ELT (v, i) = GEN_INT (val);
6479
6480 return gen_rtx_CONST_VECTOR (mode, v);
6481}
6482
051d0e2f
SN
6483/* Check OP is a legal scalar immediate for the MOVI instruction. */
6484
6485bool
6486aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6487{
6488 enum machine_mode vmode;
6489
6490 gcc_assert (!VECTOR_MODE_P (mode));
6491 vmode = aarch64_preferred_simd_mode (mode);
6492 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 6493 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
6494}
6495
43e9d192
IB
6496/* Construct and return a PARALLEL RTX vector. */
6497rtx
6498aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6499{
6500 int nunits = GET_MODE_NUNITS (mode);
6501 rtvec v = rtvec_alloc (nunits / 2);
6502 int base = high ? nunits / 2 : 0;
6503 rtx t1;
6504 int i;
6505
6506 for (i=0; i < nunits / 2; i++)
6507 RTVEC_ELT (v, i) = GEN_INT (base + i);
6508
6509 t1 = gen_rtx_PARALLEL (mode, v);
6510 return t1;
6511}
6512
6513/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6514 HIGH (exclusive). */
6515void
6516aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6517{
6518 HOST_WIDE_INT lane;
6519 gcc_assert (GET_CODE (operand) == CONST_INT);
6520 lane = INTVAL (operand);
6521
6522 if (lane < low || lane >= high)
6523 error ("lane out of range");
6524}
6525
6526void
6527aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6528{
6529 gcc_assert (GET_CODE (operand) == CONST_INT);
6530 HOST_WIDE_INT lane = INTVAL (operand);
6531
6532 if (lane < low || lane >= high)
6533 error ("constant out of range");
6534}
6535
6536/* Emit code to reinterpret one AdvSIMD type as another,
6537 without altering bits. */
6538void
6539aarch64_simd_reinterpret (rtx dest, rtx src)
6540{
6541 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6542}
6543
6544/* Emit code to place a AdvSIMD pair result in memory locations (with equal
6545 registers). */
6546void
6547aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6548 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6549 rtx op1)
6550{
6551 rtx mem = gen_rtx_MEM (mode, destaddr);
6552 rtx tmp1 = gen_reg_rtx (mode);
6553 rtx tmp2 = gen_reg_rtx (mode);
6554
6555 emit_insn (intfn (tmp1, op1, tmp2));
6556
6557 emit_move_insn (mem, tmp1);
6558 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6559 emit_move_insn (mem, tmp2);
6560}
6561
6562/* Return TRUE if OP is a valid vector addressing mode. */
6563bool
6564aarch64_simd_mem_operand_p (rtx op)
6565{
6566 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6567 || GET_CODE (XEXP (op, 0)) == REG);
6568}
6569
6570/* Set up OPERANDS for a register copy from SRC to DEST, taking care
6571 not to early-clobber SRC registers in the process.
6572
6573 We assume that the operands described by SRC and DEST represent a
6574 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6575 number of components into which the copy has been decomposed. */
6576void
6577aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6578 rtx *src, unsigned int count)
6579{
6580 unsigned int i;
6581
6582 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6583 || REGNO (operands[0]) < REGNO (operands[1]))
6584 {
6585 for (i = 0; i < count; i++)
6586 {
6587 operands[2 * i] = dest[i];
6588 operands[2 * i + 1] = src[i];
6589 }
6590 }
6591 else
6592 {
6593 for (i = 0; i < count; i++)
6594 {
6595 operands[2 * i] = dest[count - i - 1];
6596 operands[2 * i + 1] = src[count - i - 1];
6597 }
6598 }
6599}
6600
6601/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6602 one of VSTRUCT modes: OI, CI or XI. */
6603int
6604aarch64_simd_attr_length_move (rtx insn)
6605{
43e9d192
IB
6606 enum machine_mode mode;
6607
6608 extract_insn_cached (insn);
6609
6610 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6611 {
6612 mode = GET_MODE (recog_data.operand[0]);
6613 switch (mode)
6614 {
6615 case OImode:
6616 return 8;
6617 case CImode:
6618 return 12;
6619 case XImode:
6620 return 16;
6621 default:
6622 gcc_unreachable ();
6623 }
6624 }
6625 return 4;
6626}
6627
db0253a4
TB
6628/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6629 alignment of a vector to 128 bits. */
6630static HOST_WIDE_INT
6631aarch64_simd_vector_alignment (const_tree type)
6632{
6633 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6634 return MIN (align, 128);
6635}
6636
6637/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6638static bool
6639aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6640{
6641 if (is_packed)
6642 return false;
6643
6644 /* We guarantee alignment for vectors up to 128-bits. */
6645 if (tree_int_cst_compare (TYPE_SIZE (type),
6646 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6647 return false;
6648
6649 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6650 return true;
6651}
6652
4369c11e
TB
6653/* If VALS is a vector constant that can be loaded into a register
6654 using DUP, generate instructions to do so and return an RTX to
6655 assign to the register. Otherwise return NULL_RTX. */
6656static rtx
6657aarch64_simd_dup_constant (rtx vals)
6658{
6659 enum machine_mode mode = GET_MODE (vals);
6660 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6661 int n_elts = GET_MODE_NUNITS (mode);
6662 bool all_same = true;
6663 rtx x;
6664 int i;
6665
6666 if (GET_CODE (vals) != CONST_VECTOR)
6667 return NULL_RTX;
6668
6669 for (i = 1; i < n_elts; ++i)
6670 {
6671 x = CONST_VECTOR_ELT (vals, i);
6672 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6673 all_same = false;
6674 }
6675
6676 if (!all_same)
6677 return NULL_RTX;
6678
6679 /* We can load this constant by using DUP and a constant in a
6680 single ARM register. This will be cheaper than a vector
6681 load. */
6682 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6683 return gen_rtx_VEC_DUPLICATE (mode, x);
6684}
6685
6686
6687/* Generate code to load VALS, which is a PARALLEL containing only
6688 constants (for vec_init) or CONST_VECTOR, efficiently into a
6689 register. Returns an RTX to copy into the register, or NULL_RTX
6690 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 6691static rtx
4369c11e
TB
6692aarch64_simd_make_constant (rtx vals)
6693{
6694 enum machine_mode mode = GET_MODE (vals);
6695 rtx const_dup;
6696 rtx const_vec = NULL_RTX;
6697 int n_elts = GET_MODE_NUNITS (mode);
6698 int n_const = 0;
6699 int i;
6700
6701 if (GET_CODE (vals) == CONST_VECTOR)
6702 const_vec = vals;
6703 else if (GET_CODE (vals) == PARALLEL)
6704 {
6705 /* A CONST_VECTOR must contain only CONST_INTs and
6706 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6707 Only store valid constants in a CONST_VECTOR. */
6708 for (i = 0; i < n_elts; ++i)
6709 {
6710 rtx x = XVECEXP (vals, 0, i);
6711 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6712 n_const++;
6713 }
6714 if (n_const == n_elts)
6715 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6716 }
6717 else
6718 gcc_unreachable ();
6719
6720 if (const_vec != NULL_RTX
48063b9d 6721 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
6722 /* Load using MOVI/MVNI. */
6723 return const_vec;
6724 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6725 /* Loaded using DUP. */
6726 return const_dup;
6727 else if (const_vec != NULL_RTX)
6728 /* Load from constant pool. We can not take advantage of single-cycle
6729 LD1 because we need a PC-relative addressing mode. */
6730 return const_vec;
6731 else
6732 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6733 We can not construct an initializer. */
6734 return NULL_RTX;
6735}
6736
6737void
6738aarch64_expand_vector_init (rtx target, rtx vals)
6739{
6740 enum machine_mode mode = GET_MODE (target);
6741 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6742 int n_elts = GET_MODE_NUNITS (mode);
6743 int n_var = 0, one_var = -1;
6744 bool all_same = true;
6745 rtx x, mem;
6746 int i;
6747
6748 x = XVECEXP (vals, 0, 0);
6749 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6750 n_var = 1, one_var = 0;
6751
6752 for (i = 1; i < n_elts; ++i)
6753 {
6754 x = XVECEXP (vals, 0, i);
6755 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6756 ++n_var, one_var = i;
6757
6758 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6759 all_same = false;
6760 }
6761
6762 if (n_var == 0)
6763 {
6764 rtx constant = aarch64_simd_make_constant (vals);
6765 if (constant != NULL_RTX)
6766 {
6767 emit_move_insn (target, constant);
6768 return;
6769 }
6770 }
6771
6772 /* Splat a single non-constant element if we can. */
6773 if (all_same)
6774 {
6775 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6776 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6777 return;
6778 }
6779
6780 /* One field is non-constant. Load constant then overwrite varying
6781 field. This is more efficient than using the stack. */
6782 if (n_var == 1)
6783 {
6784 rtx copy = copy_rtx (vals);
6785 rtx index = GEN_INT (one_var);
6786 enum insn_code icode;
6787
6788 /* Load constant part of vector, substitute neighboring value for
6789 varying element. */
6790 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6791 aarch64_expand_vector_init (target, copy);
6792
6793 /* Insert variable. */
6794 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6795 icode = optab_handler (vec_set_optab, mode);
6796 gcc_assert (icode != CODE_FOR_nothing);
6797 emit_insn (GEN_FCN (icode) (target, x, index));
6798 return;
6799 }
6800
6801 /* Construct the vector in memory one field at a time
6802 and load the whole vector. */
6803 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6804 for (i = 0; i < n_elts; i++)
6805 emit_move_insn (adjust_address_nv (mem, inner_mode,
6806 i * GET_MODE_SIZE (inner_mode)),
6807 XVECEXP (vals, 0, i));
6808 emit_move_insn (target, mem);
6809
6810}
6811
43e9d192
IB
6812static unsigned HOST_WIDE_INT
6813aarch64_shift_truncation_mask (enum machine_mode mode)
6814{
6815 return
6816 (aarch64_vector_mode_supported_p (mode)
6817 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6818}
6819
6820#ifndef TLS_SECTION_ASM_FLAG
6821#define TLS_SECTION_ASM_FLAG 'T'
6822#endif
6823
6824void
6825aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6826 tree decl ATTRIBUTE_UNUSED)
6827{
6828 char flagchars[10], *f = flagchars;
6829
6830 /* If we have already declared this section, we can use an
6831 abbreviated form to switch back to it -- unless this section is
6832 part of a COMDAT groups, in which case GAS requires the full
6833 declaration every time. */
6834 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6835 && (flags & SECTION_DECLARED))
6836 {
6837 fprintf (asm_out_file, "\t.section\t%s\n", name);
6838 return;
6839 }
6840
6841 if (!(flags & SECTION_DEBUG))
6842 *f++ = 'a';
6843 if (flags & SECTION_WRITE)
6844 *f++ = 'w';
6845 if (flags & SECTION_CODE)
6846 *f++ = 'x';
6847 if (flags & SECTION_SMALL)
6848 *f++ = 's';
6849 if (flags & SECTION_MERGE)
6850 *f++ = 'M';
6851 if (flags & SECTION_STRINGS)
6852 *f++ = 'S';
6853 if (flags & SECTION_TLS)
6854 *f++ = TLS_SECTION_ASM_FLAG;
6855 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6856 *f++ = 'G';
6857 *f = '\0';
6858
6859 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6860
6861 if (!(flags & SECTION_NOTYPE))
6862 {
6863 const char *type;
6864 const char *format;
6865
6866 if (flags & SECTION_BSS)
6867 type = "nobits";
6868 else
6869 type = "progbits";
6870
6871#ifdef TYPE_OPERAND_FMT
6872 format = "," TYPE_OPERAND_FMT;
6873#else
6874 format = ",@%s";
6875#endif
6876
6877 fprintf (asm_out_file, format, type);
6878
6879 if (flags & SECTION_ENTSIZE)
6880 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6881 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6882 {
6883 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6884 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6885 else
6886 fprintf (asm_out_file, ",%s,comdat",
6887 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6888 }
6889 }
6890
6891 putc ('\n', asm_out_file);
6892}
6893
6894/* Select a format to encode pointers in exception handling data. */
6895int
6896aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6897{
6898 int type;
6899 switch (aarch64_cmodel)
6900 {
6901 case AARCH64_CMODEL_TINY:
6902 case AARCH64_CMODEL_TINY_PIC:
6903 case AARCH64_CMODEL_SMALL:
6904 case AARCH64_CMODEL_SMALL_PIC:
6905 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6906 for everything. */
6907 type = DW_EH_PE_sdata4;
6908 break;
6909 default:
6910 /* No assumptions here. 8-byte relocs required. */
6911 type = DW_EH_PE_sdata8;
6912 break;
6913 }
6914 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6915}
6916
0462169c
SN
6917/* Emit load exclusive. */
6918
6919static void
6920aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6921 rtx mem, rtx model_rtx)
6922{
6923 rtx (*gen) (rtx, rtx, rtx);
6924
6925 switch (mode)
6926 {
6927 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6928 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6929 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6930 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6931 default:
6932 gcc_unreachable ();
6933 }
6934
6935 emit_insn (gen (rval, mem, model_rtx));
6936}
6937
6938/* Emit store exclusive. */
6939
6940static void
6941aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6942 rtx rval, rtx mem, rtx model_rtx)
6943{
6944 rtx (*gen) (rtx, rtx, rtx, rtx);
6945
6946 switch (mode)
6947 {
6948 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6949 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6950 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6951 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6952 default:
6953 gcc_unreachable ();
6954 }
6955
6956 emit_insn (gen (bval, rval, mem, model_rtx));
6957}
6958
6959/* Mark the previous jump instruction as unlikely. */
6960
6961static void
6962aarch64_emit_unlikely_jump (rtx insn)
6963{
6964 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6965
6966 insn = emit_jump_insn (insn);
6967 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6968}
6969
6970/* Expand a compare and swap pattern. */
6971
6972void
6973aarch64_expand_compare_and_swap (rtx operands[])
6974{
6975 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6976 enum machine_mode mode, cmp_mode;
6977 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6978
6979 bval = operands[0];
6980 rval = operands[1];
6981 mem = operands[2];
6982 oldval = operands[3];
6983 newval = operands[4];
6984 is_weak = operands[5];
6985 mod_s = operands[6];
6986 mod_f = operands[7];
6987 mode = GET_MODE (mem);
6988 cmp_mode = mode;
6989
6990 /* Normally the succ memory model must be stronger than fail, but in the
6991 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6992 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6993
6994 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6995 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6996 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6997
6998 switch (mode)
6999 {
7000 case QImode:
7001 case HImode:
7002 /* For short modes, we're going to perform the comparison in SImode,
7003 so do the zero-extension now. */
7004 cmp_mode = SImode;
7005 rval = gen_reg_rtx (SImode);
7006 oldval = convert_modes (SImode, mode, oldval, true);
7007 /* Fall through. */
7008
7009 case SImode:
7010 case DImode:
7011 /* Force the value into a register if needed. */
7012 if (!aarch64_plus_operand (oldval, mode))
7013 oldval = force_reg (cmp_mode, oldval);
7014 break;
7015
7016 default:
7017 gcc_unreachable ();
7018 }
7019
7020 switch (mode)
7021 {
7022 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7023 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7024 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7025 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7026 default:
7027 gcc_unreachable ();
7028 }
7029
7030 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7031
7032 if (mode == QImode || mode == HImode)
7033 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7034
7035 x = gen_rtx_REG (CCmode, CC_REGNUM);
7036 x = gen_rtx_EQ (SImode, x, const0_rtx);
7037 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7038}
7039
7040/* Split a compare and swap pattern. */
7041
7042void
7043aarch64_split_compare_and_swap (rtx operands[])
7044{
7045 rtx rval, mem, oldval, newval, scratch;
7046 enum machine_mode mode;
0462169c
SN
7047 bool is_weak;
7048 rtx label1, label2, x, cond;
7049
7050 rval = operands[0];
7051 mem = operands[1];
7052 oldval = operands[2];
7053 newval = operands[3];
7054 is_weak = (operands[4] != const0_rtx);
0462169c
SN
7055 scratch = operands[7];
7056 mode = GET_MODE (mem);
7057
7058 label1 = NULL_RTX;
7059 if (!is_weak)
7060 {
7061 label1 = gen_label_rtx ();
7062 emit_label (label1);
7063 }
7064 label2 = gen_label_rtx ();
7065
7066 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7067
7068 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7069 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7070 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7071 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7072 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7073
7074 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7075
7076 if (!is_weak)
7077 {
7078 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7079 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7080 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7081 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7082 }
7083 else
7084 {
7085 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7086 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7087 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7088 }
7089
7090 emit_label (label2);
7091}
7092
7093/* Split an atomic operation. */
7094
7095void
7096aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7097 rtx value, rtx model_rtx, rtx cond)
7098{
7099 enum machine_mode mode = GET_MODE (mem);
7100 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7101 rtx label, x;
7102
7103 label = gen_label_rtx ();
7104 emit_label (label);
7105
7106 if (new_out)
7107 new_out = gen_lowpart (wmode, new_out);
7108 if (old_out)
7109 old_out = gen_lowpart (wmode, old_out);
7110 else
7111 old_out = new_out;
7112 value = simplify_gen_subreg (wmode, value, mode, 0);
7113
7114 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7115
7116 switch (code)
7117 {
7118 case SET:
7119 new_out = value;
7120 break;
7121
7122 case NOT:
7123 x = gen_rtx_AND (wmode, old_out, value);
7124 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7125 x = gen_rtx_NOT (wmode, new_out);
7126 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7127 break;
7128
7129 case MINUS:
7130 if (CONST_INT_P (value))
7131 {
7132 value = GEN_INT (-INTVAL (value));
7133 code = PLUS;
7134 }
7135 /* Fall through. */
7136
7137 default:
7138 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7139 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7140 break;
7141 }
7142
7143 aarch64_emit_store_exclusive (mode, cond, mem,
7144 gen_lowpart (mode, new_out), model_rtx);
7145
7146 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7147 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7148 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7149 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7150}
7151
95ca411e
YZ
7152static void
7153aarch64_print_extension (void)
7154{
7155 const struct aarch64_option_extension *opt = NULL;
7156
7157 for (opt = all_extensions; opt->name != NULL; opt++)
7158 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7159 asm_fprintf (asm_out_file, "+%s", opt->name);
7160
7161 asm_fprintf (asm_out_file, "\n");
7162}
7163
43e9d192
IB
7164static void
7165aarch64_start_file (void)
7166{
7167 if (selected_arch)
95ca411e
YZ
7168 {
7169 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7170 aarch64_print_extension ();
7171 }
43e9d192 7172 else if (selected_cpu)
95ca411e
YZ
7173 {
7174 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7175 aarch64_print_extension ();
7176 }
43e9d192
IB
7177 default_file_start();
7178}
7179
7180/* Target hook for c_mode_for_suffix. */
7181static enum machine_mode
7182aarch64_c_mode_for_suffix (char suffix)
7183{
7184 if (suffix == 'q')
7185 return TFmode;
7186
7187 return VOIDmode;
7188}
7189
3520f7cc
JG
7190/* We can only represent floating point constants which will fit in
7191 "quarter-precision" values. These values are characterised by
7192 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7193 by:
7194
7195 (-1)^s * (n/16) * 2^r
7196
7197 Where:
7198 's' is the sign bit.
7199 'n' is an integer in the range 16 <= n <= 31.
7200 'r' is an integer in the range -3 <= r <= 4. */
7201
7202/* Return true iff X can be represented by a quarter-precision
7203 floating point immediate operand X. Note, we cannot represent 0.0. */
7204bool
7205aarch64_float_const_representable_p (rtx x)
7206{
7207 /* This represents our current view of how many bits
7208 make up the mantissa. */
7209 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 7210 int exponent;
3520f7cc
JG
7211 unsigned HOST_WIDE_INT mantissa, mask;
7212 HOST_WIDE_INT m1, m2;
7213 REAL_VALUE_TYPE r, m;
7214
7215 if (!CONST_DOUBLE_P (x))
7216 return false;
7217
7218 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7219
7220 /* We cannot represent infinities, NaNs or +/-zero. We won't
7221 know if we have +zero until we analyse the mantissa, but we
7222 can reject the other invalid values. */
7223 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7224 || REAL_VALUE_MINUS_ZERO (r))
7225 return false;
7226
ba96cdfb 7227 /* Extract exponent. */
3520f7cc
JG
7228 r = real_value_abs (&r);
7229 exponent = REAL_EXP (&r);
7230
7231 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7232 highest (sign) bit, with a fixed binary point at bit point_pos.
7233 m1 holds the low part of the mantissa, m2 the high part.
7234 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7235 bits for the mantissa, this can fail (low bits will be lost). */
7236 real_ldexp (&m, &r, point_pos - exponent);
7237 REAL_VALUE_TO_INT (&m1, &m2, m);
7238
7239 /* If the low part of the mantissa has bits set we cannot represent
7240 the value. */
7241 if (m1 != 0)
7242 return false;
7243 /* We have rejected the lower HOST_WIDE_INT, so update our
7244 understanding of how many bits lie in the mantissa and
7245 look only at the high HOST_WIDE_INT. */
7246 mantissa = m2;
7247 point_pos -= HOST_BITS_PER_WIDE_INT;
7248
7249 /* We can only represent values with a mantissa of the form 1.xxxx. */
7250 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7251 if ((mantissa & mask) != 0)
7252 return false;
7253
7254 /* Having filtered unrepresentable values, we may now remove all
7255 but the highest 5 bits. */
7256 mantissa >>= point_pos - 5;
7257
7258 /* We cannot represent the value 0.0, so reject it. This is handled
7259 elsewhere. */
7260 if (mantissa == 0)
7261 return false;
7262
7263 /* Then, as bit 4 is always set, we can mask it off, leaving
7264 the mantissa in the range [0, 15]. */
7265 mantissa &= ~(1 << 4);
7266 gcc_assert (mantissa <= 15);
7267
7268 /* GCC internally does not use IEEE754-like encoding (where normalized
7269 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7270 Our mantissa values are shifted 4 places to the left relative to
7271 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7272 by 5 places to correct for GCC's representation. */
7273 exponent = 5 - exponent;
7274
7275 return (exponent >= 0 && exponent <= 7);
7276}
7277
7278char*
81c2dfb9 7279aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
7280 enum machine_mode mode,
7281 unsigned width)
7282{
3ea63f60 7283 bool is_valid;
3520f7cc 7284 static char templ[40];
3520f7cc
JG
7285 const char *mnemonic;
7286 unsigned int lane_count = 0;
81c2dfb9 7287 char element_char;
3520f7cc 7288
48063b9d
IB
7289 struct simd_immediate_info info;
7290
7291 /* This will return true to show const_vector is legal for use as either
7292 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7293 also update INFO to show how the immediate should be generated. */
81c2dfb9 7294 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
7295 gcc_assert (is_valid);
7296
81c2dfb9 7297 element_char = sizetochar (info.element_width);
48063b9d
IB
7298 lane_count = width / info.element_width;
7299
3520f7cc
JG
7300 mode = GET_MODE_INNER (mode);
7301 if (mode == SFmode || mode == DFmode)
7302 {
48063b9d
IB
7303 gcc_assert (info.shift == 0 && ! info.mvn);
7304 if (aarch64_float_const_zero_rtx_p (info.value))
7305 info.value = GEN_INT (0);
7306 else
7307 {
7308#define buf_size 20
7309 REAL_VALUE_TYPE r;
7310 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7311 char float_buf[buf_size] = {'\0'};
7312 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7313#undef buf_size
7314
7315 if (lane_count == 1)
7316 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7317 else
7318 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 7319 lane_count, element_char, float_buf);
48063b9d
IB
7320 return templ;
7321 }
3520f7cc 7322 }
3520f7cc 7323
48063b9d 7324 mnemonic = info.mvn ? "mvni" : "movi";
3520f7cc
JG
7325
7326 if (lane_count == 1)
48063b9d
IB
7327 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7328 mnemonic, UINTVAL (info.value));
7329 else if (info.shift)
7330 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
81c2dfb9 7331 ", lsl %d", mnemonic, lane_count, element_char,
48063b9d 7332 UINTVAL (info.value), info.shift);
3520f7cc 7333 else
48063b9d 7334 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 7335 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
7336 return templ;
7337}
7338
b7342d25
IB
7339char*
7340aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7341 enum machine_mode mode)
7342{
7343 enum machine_mode vmode;
7344
7345 gcc_assert (!VECTOR_MODE_P (mode));
7346 vmode = aarch64_simd_container_mode (mode, 64);
7347 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7348 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7349}
7350
88b08073
JG
7351/* Split operands into moves from op[1] + op[2] into op[0]. */
7352
7353void
7354aarch64_split_combinev16qi (rtx operands[3])
7355{
7356 unsigned int dest = REGNO (operands[0]);
7357 unsigned int src1 = REGNO (operands[1]);
7358 unsigned int src2 = REGNO (operands[2]);
7359 enum machine_mode halfmode = GET_MODE (operands[1]);
7360 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7361 rtx destlo, desthi;
7362
7363 gcc_assert (halfmode == V16QImode);
7364
7365 if (src1 == dest && src2 == dest + halfregs)
7366 {
7367 /* No-op move. Can't split to nothing; emit something. */
7368 emit_note (NOTE_INSN_DELETED);
7369 return;
7370 }
7371
7372 /* Preserve register attributes for variable tracking. */
7373 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7374 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7375 GET_MODE_SIZE (halfmode));
7376
7377 /* Special case of reversed high/low parts. */
7378 if (reg_overlap_mentioned_p (operands[2], destlo)
7379 && reg_overlap_mentioned_p (operands[1], desthi))
7380 {
7381 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7382 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7383 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7384 }
7385 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7386 {
7387 /* Try to avoid unnecessary moves if part of the result
7388 is in the right place already. */
7389 if (src1 != dest)
7390 emit_move_insn (destlo, operands[1]);
7391 if (src2 != dest + halfregs)
7392 emit_move_insn (desthi, operands[2]);
7393 }
7394 else
7395 {
7396 if (src2 != dest + halfregs)
7397 emit_move_insn (desthi, operands[2]);
7398 if (src1 != dest)
7399 emit_move_insn (destlo, operands[1]);
7400 }
7401}
7402
7403/* vec_perm support. */
7404
7405#define MAX_VECT_LEN 16
7406
7407struct expand_vec_perm_d
7408{
7409 rtx target, op0, op1;
7410 unsigned char perm[MAX_VECT_LEN];
7411 enum machine_mode vmode;
7412 unsigned char nelt;
7413 bool one_vector_p;
7414 bool testing_p;
7415};
7416
7417/* Generate a variable permutation. */
7418
7419static void
7420aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7421{
7422 enum machine_mode vmode = GET_MODE (target);
7423 bool one_vector_p = rtx_equal_p (op0, op1);
7424
7425 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7426 gcc_checking_assert (GET_MODE (op0) == vmode);
7427 gcc_checking_assert (GET_MODE (op1) == vmode);
7428 gcc_checking_assert (GET_MODE (sel) == vmode);
7429 gcc_checking_assert (TARGET_SIMD);
7430
7431 if (one_vector_p)
7432 {
7433 if (vmode == V8QImode)
7434 {
7435 /* Expand the argument to a V16QI mode by duplicating it. */
7436 rtx pair = gen_reg_rtx (V16QImode);
7437 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7438 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7439 }
7440 else
7441 {
7442 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7443 }
7444 }
7445 else
7446 {
7447 rtx pair;
7448
7449 if (vmode == V8QImode)
7450 {
7451 pair = gen_reg_rtx (V16QImode);
7452 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7453 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7454 }
7455 else
7456 {
7457 pair = gen_reg_rtx (OImode);
7458 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7459 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7460 }
7461 }
7462}
7463
7464void
7465aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7466{
7467 enum machine_mode vmode = GET_MODE (target);
7468 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7469 bool one_vector_p = rtx_equal_p (op0, op1);
7470 rtx rmask[MAX_VECT_LEN], mask;
7471
7472 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7473
7474 /* The TBL instruction does not use a modulo index, so we must take care
7475 of that ourselves. */
7476 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7477 for (i = 0; i < nelt; ++i)
7478 rmask[i] = mask;
7479 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7480 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7481
7482 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7483}
7484
cc4d934f
JG
7485/* Recognize patterns suitable for the TRN instructions. */
7486static bool
7487aarch64_evpc_trn (struct expand_vec_perm_d *d)
7488{
7489 unsigned int i, odd, mask, nelt = d->nelt;
7490 rtx out, in0, in1, x;
7491 rtx (*gen) (rtx, rtx, rtx);
7492 enum machine_mode vmode = d->vmode;
7493
7494 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7495 return false;
7496
7497 /* Note that these are little-endian tests.
7498 We correct for big-endian later. */
7499 if (d->perm[0] == 0)
7500 odd = 0;
7501 else if (d->perm[0] == 1)
7502 odd = 1;
7503 else
7504 return false;
7505 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7506
7507 for (i = 0; i < nelt; i += 2)
7508 {
7509 if (d->perm[i] != i + odd)
7510 return false;
7511 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7512 return false;
7513 }
7514
7515 /* Success! */
7516 if (d->testing_p)
7517 return true;
7518
7519 in0 = d->op0;
7520 in1 = d->op1;
7521 if (BYTES_BIG_ENDIAN)
7522 {
7523 x = in0, in0 = in1, in1 = x;
7524 odd = !odd;
7525 }
7526 out = d->target;
7527
7528 if (odd)
7529 {
7530 switch (vmode)
7531 {
7532 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7533 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7534 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7535 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7536 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7537 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7538 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7539 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7540 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7541 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7542 default:
7543 return false;
7544 }
7545 }
7546 else
7547 {
7548 switch (vmode)
7549 {
7550 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7551 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7552 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7553 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7554 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7555 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7556 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7557 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7558 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7559 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7560 default:
7561 return false;
7562 }
7563 }
7564
7565 emit_insn (gen (out, in0, in1));
7566 return true;
7567}
7568
7569/* Recognize patterns suitable for the UZP instructions. */
7570static bool
7571aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7572{
7573 unsigned int i, odd, mask, nelt = d->nelt;
7574 rtx out, in0, in1, x;
7575 rtx (*gen) (rtx, rtx, rtx);
7576 enum machine_mode vmode = d->vmode;
7577
7578 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7579 return false;
7580
7581 /* Note that these are little-endian tests.
7582 We correct for big-endian later. */
7583 if (d->perm[0] == 0)
7584 odd = 0;
7585 else if (d->perm[0] == 1)
7586 odd = 1;
7587 else
7588 return false;
7589 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7590
7591 for (i = 0; i < nelt; i++)
7592 {
7593 unsigned elt = (i * 2 + odd) & mask;
7594 if (d->perm[i] != elt)
7595 return false;
7596 }
7597
7598 /* Success! */
7599 if (d->testing_p)
7600 return true;
7601
7602 in0 = d->op0;
7603 in1 = d->op1;
7604 if (BYTES_BIG_ENDIAN)
7605 {
7606 x = in0, in0 = in1, in1 = x;
7607 odd = !odd;
7608 }
7609 out = d->target;
7610
7611 if (odd)
7612 {
7613 switch (vmode)
7614 {
7615 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7616 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7617 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7618 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7619 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7620 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7621 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7622 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7623 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7624 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7625 default:
7626 return false;
7627 }
7628 }
7629 else
7630 {
7631 switch (vmode)
7632 {
7633 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7634 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7635 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7636 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7637 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7638 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7639 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7640 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7641 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7642 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7643 default:
7644 return false;
7645 }
7646 }
7647
7648 emit_insn (gen (out, in0, in1));
7649 return true;
7650}
7651
7652/* Recognize patterns suitable for the ZIP instructions. */
7653static bool
7654aarch64_evpc_zip (struct expand_vec_perm_d *d)
7655{
7656 unsigned int i, high, mask, nelt = d->nelt;
7657 rtx out, in0, in1, x;
7658 rtx (*gen) (rtx, rtx, rtx);
7659 enum machine_mode vmode = d->vmode;
7660
7661 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7662 return false;
7663
7664 /* Note that these are little-endian tests.
7665 We correct for big-endian later. */
7666 high = nelt / 2;
7667 if (d->perm[0] == high)
7668 /* Do Nothing. */
7669 ;
7670 else if (d->perm[0] == 0)
7671 high = 0;
7672 else
7673 return false;
7674 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7675
7676 for (i = 0; i < nelt / 2; i++)
7677 {
7678 unsigned elt = (i + high) & mask;
7679 if (d->perm[i * 2] != elt)
7680 return false;
7681 elt = (elt + nelt) & mask;
7682 if (d->perm[i * 2 + 1] != elt)
7683 return false;
7684 }
7685
7686 /* Success! */
7687 if (d->testing_p)
7688 return true;
7689
7690 in0 = d->op0;
7691 in1 = d->op1;
7692 if (BYTES_BIG_ENDIAN)
7693 {
7694 x = in0, in0 = in1, in1 = x;
7695 high = !high;
7696 }
7697 out = d->target;
7698
7699 if (high)
7700 {
7701 switch (vmode)
7702 {
7703 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7704 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7705 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7706 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7707 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7708 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7709 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7710 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7711 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7712 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7713 default:
7714 return false;
7715 }
7716 }
7717 else
7718 {
7719 switch (vmode)
7720 {
7721 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7722 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7723 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7724 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7725 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7726 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7727 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7728 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7729 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7730 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7731 default:
7732 return false;
7733 }
7734 }
7735
7736 emit_insn (gen (out, in0, in1));
7737 return true;
7738}
7739
88b08073
JG
7740static bool
7741aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7742{
7743 rtx rperm[MAX_VECT_LEN], sel;
7744 enum machine_mode vmode = d->vmode;
7745 unsigned int i, nelt = d->nelt;
7746
7747 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7748 numbering of elements for big-endian, we must reverse the order. */
7749 if (BYTES_BIG_ENDIAN)
7750 return false;
7751
7752 if (d->testing_p)
7753 return true;
7754
7755 /* Generic code will try constant permutation twice. Once with the
7756 original mode and again with the elements lowered to QImode.
7757 So wait and don't do the selector expansion ourselves. */
7758 if (vmode != V8QImode && vmode != V16QImode)
7759 return false;
7760
7761 for (i = 0; i < nelt; ++i)
7762 rperm[i] = GEN_INT (d->perm[i]);
7763 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7764 sel = force_reg (vmode, sel);
7765
7766 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7767 return true;
7768}
7769
7770static bool
7771aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7772{
7773 /* The pattern matching functions above are written to look for a small
7774 number to begin the sequence (0, 1, N/2). If we begin with an index
7775 from the second operand, we can swap the operands. */
7776 if (d->perm[0] >= d->nelt)
7777 {
7778 unsigned i, nelt = d->nelt;
7779 rtx x;
7780
7781 for (i = 0; i < nelt; ++i)
7782 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7783
7784 x = d->op0;
7785 d->op0 = d->op1;
7786 d->op1 = x;
7787 }
7788
7789 if (TARGET_SIMD)
cc4d934f
JG
7790 {
7791 if (aarch64_evpc_zip (d))
7792 return true;
7793 else if (aarch64_evpc_uzp (d))
7794 return true;
7795 else if (aarch64_evpc_trn (d))
7796 return true;
7797 return aarch64_evpc_tbl (d);
7798 }
88b08073
JG
7799 return false;
7800}
7801
7802/* Expand a vec_perm_const pattern. */
7803
7804bool
7805aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7806{
7807 struct expand_vec_perm_d d;
7808 int i, nelt, which;
7809
7810 d.target = target;
7811 d.op0 = op0;
7812 d.op1 = op1;
7813
7814 d.vmode = GET_MODE (target);
7815 gcc_assert (VECTOR_MODE_P (d.vmode));
7816 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7817 d.testing_p = false;
7818
7819 for (i = which = 0; i < nelt; ++i)
7820 {
7821 rtx e = XVECEXP (sel, 0, i);
7822 int ei = INTVAL (e) & (2 * nelt - 1);
7823 which |= (ei < nelt ? 1 : 2);
7824 d.perm[i] = ei;
7825 }
7826
7827 switch (which)
7828 {
7829 default:
7830 gcc_unreachable ();
7831
7832 case 3:
7833 d.one_vector_p = false;
7834 if (!rtx_equal_p (op0, op1))
7835 break;
7836
7837 /* The elements of PERM do not suggest that only the first operand
7838 is used, but both operands are identical. Allow easier matching
7839 of the permutation by folding the permutation into the single
7840 input vector. */
7841 /* Fall Through. */
7842 case 2:
7843 for (i = 0; i < nelt; ++i)
7844 d.perm[i] &= nelt - 1;
7845 d.op0 = op1;
7846 d.one_vector_p = true;
7847 break;
7848
7849 case 1:
7850 d.op1 = op0;
7851 d.one_vector_p = true;
7852 break;
7853 }
7854
7855 return aarch64_expand_vec_perm_const_1 (&d);
7856}
7857
7858static bool
7859aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7860 const unsigned char *sel)
7861{
7862 struct expand_vec_perm_d d;
7863 unsigned int i, nelt, which;
7864 bool ret;
7865
7866 d.vmode = vmode;
7867 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7868 d.testing_p = true;
7869 memcpy (d.perm, sel, nelt);
7870
7871 /* Calculate whether all elements are in one vector. */
7872 for (i = which = 0; i < nelt; ++i)
7873 {
7874 unsigned char e = d.perm[i];
7875 gcc_assert (e < 2 * nelt);
7876 which |= (e < nelt ? 1 : 2);
7877 }
7878
7879 /* If all elements are from the second vector, reindex as if from the
7880 first vector. */
7881 if (which == 2)
7882 for (i = 0; i < nelt; ++i)
7883 d.perm[i] -= nelt;
7884
7885 /* Check whether the mask can be applied to a single vector. */
7886 d.one_vector_p = (which != 3);
7887
7888 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7889 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7890 if (!d.one_vector_p)
7891 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7892
7893 start_sequence ();
7894 ret = aarch64_expand_vec_perm_const_1 (&d);
7895 end_sequence ();
7896
7897 return ret;
7898}
7899
43e9d192
IB
7900#undef TARGET_ADDRESS_COST
7901#define TARGET_ADDRESS_COST aarch64_address_cost
7902
7903/* This hook will determines whether unnamed bitfields affect the alignment
7904 of the containing structure. The hook returns true if the structure
7905 should inherit the alignment requirements of an unnamed bitfield's
7906 type. */
7907#undef TARGET_ALIGN_ANON_BITFIELD
7908#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7909
7910#undef TARGET_ASM_ALIGNED_DI_OP
7911#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7912
7913#undef TARGET_ASM_ALIGNED_HI_OP
7914#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7915
7916#undef TARGET_ASM_ALIGNED_SI_OP
7917#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7918
7919#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7920#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7921 hook_bool_const_tree_hwi_hwi_const_tree_true
7922
7923#undef TARGET_ASM_FILE_START
7924#define TARGET_ASM_FILE_START aarch64_start_file
7925
7926#undef TARGET_ASM_OUTPUT_MI_THUNK
7927#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7928
7929#undef TARGET_ASM_SELECT_RTX_SECTION
7930#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7931
7932#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7933#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7934
7935#undef TARGET_BUILD_BUILTIN_VA_LIST
7936#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7937
7938#undef TARGET_CALLEE_COPIES
7939#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7940
7941#undef TARGET_CAN_ELIMINATE
7942#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7943
7944#undef TARGET_CANNOT_FORCE_CONST_MEM
7945#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7946
7947#undef TARGET_CONDITIONAL_REGISTER_USAGE
7948#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7949
7950/* Only the least significant bit is used for initialization guard
7951 variables. */
7952#undef TARGET_CXX_GUARD_MASK_BIT
7953#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7954
7955#undef TARGET_C_MODE_FOR_SUFFIX
7956#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7957
7958#ifdef TARGET_BIG_ENDIAN_DEFAULT
7959#undef TARGET_DEFAULT_TARGET_FLAGS
7960#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7961#endif
7962
7963#undef TARGET_CLASS_MAX_NREGS
7964#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7965
119103ca
JG
7966#undef TARGET_BUILTIN_DECL
7967#define TARGET_BUILTIN_DECL aarch64_builtin_decl
7968
43e9d192
IB
7969#undef TARGET_EXPAND_BUILTIN
7970#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7971
7972#undef TARGET_EXPAND_BUILTIN_VA_START
7973#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7974
9697e620
JG
7975#undef TARGET_FOLD_BUILTIN
7976#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
7977
43e9d192
IB
7978#undef TARGET_FUNCTION_ARG
7979#define TARGET_FUNCTION_ARG aarch64_function_arg
7980
7981#undef TARGET_FUNCTION_ARG_ADVANCE
7982#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7983
7984#undef TARGET_FUNCTION_ARG_BOUNDARY
7985#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7986
7987#undef TARGET_FUNCTION_OK_FOR_SIBCALL
7988#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7989
7990#undef TARGET_FUNCTION_VALUE
7991#define TARGET_FUNCTION_VALUE aarch64_function_value
7992
7993#undef TARGET_FUNCTION_VALUE_REGNO_P
7994#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7995
7996#undef TARGET_FRAME_POINTER_REQUIRED
7997#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7998
0ac198d3
JG
7999#undef TARGET_GIMPLE_FOLD_BUILTIN
8000#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8001
43e9d192
IB
8002#undef TARGET_GIMPLIFY_VA_ARG_EXPR
8003#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8004
8005#undef TARGET_INIT_BUILTINS
8006#define TARGET_INIT_BUILTINS aarch64_init_builtins
8007
8008#undef TARGET_LEGITIMATE_ADDRESS_P
8009#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8010
8011#undef TARGET_LEGITIMATE_CONSTANT_P
8012#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8013
8014#undef TARGET_LIBGCC_CMP_RETURN_MODE
8015#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8016
ac2b960f
YZ
8017#undef TARGET_MANGLE_TYPE
8018#define TARGET_MANGLE_TYPE aarch64_mangle_type
8019
43e9d192
IB
8020#undef TARGET_MEMORY_MOVE_COST
8021#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8022
8023#undef TARGET_MUST_PASS_IN_STACK
8024#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8025
8026/* This target hook should return true if accesses to volatile bitfields
8027 should use the narrowest mode possible. It should return false if these
8028 accesses should use the bitfield container type. */
8029#undef TARGET_NARROW_VOLATILE_BITFIELD
8030#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8031
8032#undef TARGET_OPTION_OVERRIDE
8033#define TARGET_OPTION_OVERRIDE aarch64_override_options
8034
8035#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8036#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8037 aarch64_override_options_after_change
8038
8039#undef TARGET_PASS_BY_REFERENCE
8040#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8041
8042#undef TARGET_PREFERRED_RELOAD_CLASS
8043#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8044
8045#undef TARGET_SECONDARY_RELOAD
8046#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8047
8048#undef TARGET_SHIFT_TRUNCATION_MASK
8049#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8050
8051#undef TARGET_SETUP_INCOMING_VARARGS
8052#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8053
8054#undef TARGET_STRUCT_VALUE_RTX
8055#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8056
8057#undef TARGET_REGISTER_MOVE_COST
8058#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8059
8060#undef TARGET_RETURN_IN_MEMORY
8061#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8062
8063#undef TARGET_RETURN_IN_MSB
8064#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8065
8066#undef TARGET_RTX_COSTS
8067#define TARGET_RTX_COSTS aarch64_rtx_costs
8068
8069#undef TARGET_TRAMPOLINE_INIT
8070#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8071
8072#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8073#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8074
8075#undef TARGET_VECTOR_MODE_SUPPORTED_P
8076#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8077
8078#undef TARGET_ARRAY_MODE_SUPPORTED_P
8079#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8080
8081#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8082#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8083
42fc9a7f
JG
8084#undef TARGET_VECTORIZE_BUILTINS
8085#define TARGET_VECTORIZE_BUILTINS
8086
8087#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8088#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8089 aarch64_builtin_vectorized_function
8090
3b357264
JG
8091#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8092#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8093 aarch64_autovectorize_vector_sizes
8094
43e9d192
IB
8095/* Section anchor support. */
8096
8097#undef TARGET_MIN_ANCHOR_OFFSET
8098#define TARGET_MIN_ANCHOR_OFFSET -256
8099
8100/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8101 byte offset; we can do much more for larger data types, but have no way
8102 to determine the size of the access. We assume accesses are aligned. */
8103#undef TARGET_MAX_ANCHOR_OFFSET
8104#define TARGET_MAX_ANCHOR_OFFSET 4095
8105
db0253a4
TB
8106#undef TARGET_VECTOR_ALIGNMENT
8107#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8108
8109#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8110#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8111 aarch64_simd_vector_alignment_reachable
8112
88b08073
JG
8113/* vec_perm support. */
8114
8115#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8116#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8117 aarch64_vectorize_vec_perm_const_ok
8118
70f09188 8119
706b2314 8120#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
8121#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8122
43e9d192
IB
8123struct gcc_target targetm = TARGET_INITIALIZER;
8124
8125#include "gt-aarch64.h"