]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64.c
Merge with trunk.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 #include "cfgloop.h"
49 #include "tree-vectorizer.h"
50
51 /* Defined for convenience. */
52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
53
54 /* Classifies an address.
55
56 ADDRESS_REG_IMM
57 A simple base register plus immediate offset.
58
59 ADDRESS_REG_WB
60 A base register indexed by immediate offset with writeback.
61
62 ADDRESS_REG_REG
63 A base register indexed by (optionally scaled) register.
64
65 ADDRESS_REG_UXTW
66 A base register indexed by (optionally scaled) zero-extended register.
67
68 ADDRESS_REG_SXTW
69 A base register indexed by (optionally scaled) sign-extended register.
70
71 ADDRESS_LO_SUM
72 A LO_SUM rtx with a base register and "LO12" symbol relocation.
73
74 ADDRESS_SYMBOLIC:
75 A constant symbolic address, in pc-relative literal pool. */
76
77 enum aarch64_address_type {
78 ADDRESS_REG_IMM,
79 ADDRESS_REG_WB,
80 ADDRESS_REG_REG,
81 ADDRESS_REG_UXTW,
82 ADDRESS_REG_SXTW,
83 ADDRESS_LO_SUM,
84 ADDRESS_SYMBOLIC
85 };
86
87 struct aarch64_address_info {
88 enum aarch64_address_type type;
89 rtx base;
90 rtx offset;
91 int shift;
92 enum aarch64_symbol_type symbol_type;
93 };
94
95 struct simd_immediate_info
96 {
97 rtx value;
98 int shift;
99 int element_width;
100 bool mvn;
101 bool msl;
102 };
103
104 /* The current code model. */
105 enum aarch64_code_model aarch64_cmodel;
106
107 #ifdef HAVE_AS_TLS
108 #undef TARGET_HAVE_TLS
109 #define TARGET_HAVE_TLS 1
110 #endif
111
112 static bool aarch64_lra_p (void);
113 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
114 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
115 const_tree,
116 enum machine_mode *, int *,
117 bool *);
118 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
119 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
120 static void aarch64_override_options_after_change (void);
121 static bool aarch64_vector_mode_supported_p (enum machine_mode);
122 static unsigned bit_count (unsigned HOST_WIDE_INT);
123 static bool aarch64_const_vec_all_same_int_p (rtx,
124 HOST_WIDE_INT, HOST_WIDE_INT);
125
126 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
127 const unsigned char *sel);
128
129 /* The processor for which instructions should be scheduled. */
130 enum aarch64_processor aarch64_tune = generic;
131
132 /* The current tuning set. */
133 const struct tune_params *aarch64_tune_params;
134
135 /* Mask to specify which instructions we are allowed to generate. */
136 unsigned long aarch64_isa_flags = 0;
137
138 /* Mask to specify which instruction scheduling options should be used. */
139 unsigned long aarch64_tune_flags = 0;
140
141 /* Tuning parameters. */
142
143 #if HAVE_DESIGNATED_INITIALIZERS
144 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
145 #else
146 #define NAMED_PARAM(NAME, VAL) (VAL)
147 #endif
148
149 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
150 __extension__
151 #endif
152 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
153 {
154 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
155 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
156 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
157 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
158 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
159 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
160 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
161 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
162 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
163 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
164 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
165 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
166 };
167
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 __extension__
170 #endif
171 static const struct cpu_addrcost_table generic_addrcost_table =
172 {
173 NAMED_PARAM (pre_modify, 0),
174 NAMED_PARAM (post_modify, 0),
175 NAMED_PARAM (register_offset, 0),
176 NAMED_PARAM (register_extend, 0),
177 NAMED_PARAM (imm_offset, 0)
178 };
179
180 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
181 __extension__
182 #endif
183 static const struct cpu_regmove_cost generic_regmove_cost =
184 {
185 NAMED_PARAM (GP2GP, 1),
186 NAMED_PARAM (GP2FP, 2),
187 NAMED_PARAM (FP2GP, 2),
188 /* We currently do not provide direct support for TFmode Q->Q move.
189 Therefore we need to raise the cost above 2 in order to have
190 reload handle the situation. */
191 NAMED_PARAM (FP2FP, 4)
192 };
193
194 /* Generic costs for vector insn classes. */
195 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
196 __extension__
197 #endif
198 static const struct cpu_vector_cost generic_vector_cost =
199 {
200 NAMED_PARAM (scalar_stmt_cost, 1),
201 NAMED_PARAM (scalar_load_cost, 1),
202 NAMED_PARAM (scalar_store_cost, 1),
203 NAMED_PARAM (vec_stmt_cost, 1),
204 NAMED_PARAM (vec_to_scalar_cost, 1),
205 NAMED_PARAM (scalar_to_vec_cost, 1),
206 NAMED_PARAM (vec_align_load_cost, 1),
207 NAMED_PARAM (vec_unalign_load_cost, 1),
208 NAMED_PARAM (vec_unalign_store_cost, 1),
209 NAMED_PARAM (vec_store_cost, 1),
210 NAMED_PARAM (cond_taken_branch_cost, 3),
211 NAMED_PARAM (cond_not_taken_branch_cost, 1)
212 };
213
214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 __extension__
216 #endif
217 static const struct tune_params generic_tunings =
218 {
219 &generic_rtx_cost_table,
220 &generic_addrcost_table,
221 &generic_regmove_cost,
222 &generic_vector_cost,
223 NAMED_PARAM (memmov_cost, 4)
224 };
225
226 /* A processor implementing AArch64. */
227 struct processor
228 {
229 const char *const name;
230 enum aarch64_processor core;
231 const char *arch;
232 const unsigned long flags;
233 const struct tune_params *const tune;
234 };
235
236 /* Processor cores implementing AArch64. */
237 static const struct processor all_cores[] =
238 {
239 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
240 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
241 #include "aarch64-cores.def"
242 #undef AARCH64_CORE
243 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
244 {NULL, aarch64_none, NULL, 0, NULL}
245 };
246
247 /* Architectures implementing AArch64. */
248 static const struct processor all_architectures[] =
249 {
250 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
251 {NAME, CORE, #ARCH, FLAGS, NULL},
252 #include "aarch64-arches.def"
253 #undef AARCH64_ARCH
254 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
255 {NULL, aarch64_none, NULL, 0, NULL}
256 };
257
258 /* Target specification. These are populated as commandline arguments
259 are processed, or NULL if not specified. */
260 static const struct processor *selected_arch;
261 static const struct processor *selected_cpu;
262 static const struct processor *selected_tune;
263
264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
265
266 /* An ISA extension in the co-processor and main instruction set space. */
267 struct aarch64_option_extension
268 {
269 const char *const name;
270 const unsigned long flags_on;
271 const unsigned long flags_off;
272 };
273
274 /* ISA extensions in AArch64. */
275 static const struct aarch64_option_extension all_extensions[] =
276 {
277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
278 {NAME, FLAGS_ON, FLAGS_OFF},
279 #include "aarch64-option-extensions.def"
280 #undef AARCH64_OPT_EXTENSION
281 {NULL, 0, 0}
282 };
283
284 /* Used to track the size of an address when generating a pre/post
285 increment address. */
286 static enum machine_mode aarch64_memory_reference_mode;
287
288 /* Used to force GTY into this file. */
289 static GTY(()) int gty_dummy;
290
291 /* A table of valid AArch64 "bitmask immediate" values for
292 logical instructions. */
293
294 #define AARCH64_NUM_BITMASKS 5334
295 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
296
297 /* Did we set flag_omit_frame_pointer just so
298 aarch64_frame_pointer_required would be called? */
299 static bool faked_omit_frame_pointer;
300
301 typedef enum aarch64_cond_code
302 {
303 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
304 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
305 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
306 }
307 aarch64_cc;
308
309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
310
311 /* The condition codes of the processor, and the inverse function. */
312 static const char * const aarch64_condition_codes[] =
313 {
314 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
315 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
316 };
317
318 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
319 unsigned
320 aarch64_dbx_register_number (unsigned regno)
321 {
322 if (GP_REGNUM_P (regno))
323 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
324 else if (regno == SP_REGNUM)
325 return AARCH64_DWARF_SP;
326 else if (FP_REGNUM_P (regno))
327 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
328
329 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
330 equivalent DWARF register. */
331 return DWARF_FRAME_REGISTERS;
332 }
333
334 /* Return TRUE if MODE is any of the large INT modes. */
335 static bool
336 aarch64_vect_struct_mode_p (enum machine_mode mode)
337 {
338 return mode == OImode || mode == CImode || mode == XImode;
339 }
340
341 /* Return TRUE if MODE is any of the vector modes. */
342 static bool
343 aarch64_vector_mode_p (enum machine_mode mode)
344 {
345 return aarch64_vector_mode_supported_p (mode)
346 || aarch64_vect_struct_mode_p (mode);
347 }
348
349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
350 static bool
351 aarch64_array_mode_supported_p (enum machine_mode mode,
352 unsigned HOST_WIDE_INT nelems)
353 {
354 if (TARGET_SIMD
355 && AARCH64_VALID_SIMD_QREG_MODE (mode)
356 && (nelems >= 2 && nelems <= 4))
357 return true;
358
359 return false;
360 }
361
362 /* Implement HARD_REGNO_NREGS. */
363
364 int
365 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
366 {
367 switch (aarch64_regno_regclass (regno))
368 {
369 case FP_REGS:
370 case FP_LO_REGS:
371 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
372 default:
373 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
374 }
375 gcc_unreachable ();
376 }
377
378 /* Implement HARD_REGNO_MODE_OK. */
379
380 int
381 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
382 {
383 if (GET_MODE_CLASS (mode) == MODE_CC)
384 return regno == CC_REGNUM;
385
386 if (regno == SP_REGNUM)
387 /* The purpose of comparing with ptr_mode is to support the
388 global register variable associated with the stack pointer
389 register via the syntax of asm ("wsp") in ILP32. */
390 return mode == Pmode || mode == ptr_mode;
391
392 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
393 return mode == Pmode;
394
395 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
396 return 1;
397
398 if (FP_REGNUM_P (regno))
399 {
400 if (aarch64_vect_struct_mode_p (mode))
401 return
402 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
403 else
404 return 1;
405 }
406
407 return 0;
408 }
409
410 /* Return true if calls to DECL should be treated as
411 long-calls (ie called via a register). */
412 static bool
413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
414 {
415 return false;
416 }
417
418 /* Return true if calls to symbol-ref SYM should be treated as
419 long-calls (ie called via a register). */
420 bool
421 aarch64_is_long_call_p (rtx sym)
422 {
423 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
424 }
425
426 /* Return true if the offsets to a zero/sign-extract operation
427 represent an expression that matches an extend operation. The
428 operands represent the paramters from
429
430 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
431 bool
432 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
433 rtx extract_imm)
434 {
435 HOST_WIDE_INT mult_val, extract_val;
436
437 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
438 return false;
439
440 mult_val = INTVAL (mult_imm);
441 extract_val = INTVAL (extract_imm);
442
443 if (extract_val > 8
444 && extract_val < GET_MODE_BITSIZE (mode)
445 && exact_log2 (extract_val & ~7) > 0
446 && (extract_val & 7) <= 4
447 && mult_val == (1 << (extract_val & 7)))
448 return true;
449
450 return false;
451 }
452
453 /* Emit an insn that's a simple single-set. Both the operands must be
454 known to be valid. */
455 inline static rtx
456 emit_set_insn (rtx x, rtx y)
457 {
458 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
459 }
460
461 /* X and Y are two things to compare using CODE. Emit the compare insn and
462 return the rtx for register 0 in the proper mode. */
463 rtx
464 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
465 {
466 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
467 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
468
469 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
470 return cc_reg;
471 }
472
473 /* Build the SYMBOL_REF for __tls_get_addr. */
474
475 static GTY(()) rtx tls_get_addr_libfunc;
476
477 rtx
478 aarch64_tls_get_addr (void)
479 {
480 if (!tls_get_addr_libfunc)
481 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
482 return tls_get_addr_libfunc;
483 }
484
485 /* Return the TLS model to use for ADDR. */
486
487 static enum tls_model
488 tls_symbolic_operand_type (rtx addr)
489 {
490 enum tls_model tls_kind = TLS_MODEL_NONE;
491 rtx sym, addend;
492
493 if (GET_CODE (addr) == CONST)
494 {
495 split_const (addr, &sym, &addend);
496 if (GET_CODE (sym) == SYMBOL_REF)
497 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
498 }
499 else if (GET_CODE (addr) == SYMBOL_REF)
500 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
501
502 return tls_kind;
503 }
504
505 /* We'll allow lo_sum's in addresses in our legitimate addresses
506 so that combine would take care of combining addresses where
507 necessary, but for generation purposes, we'll generate the address
508 as :
509 RTL Absolute
510 tmp = hi (symbol_ref); adrp x1, foo
511 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
512 nop
513
514 PIC TLS
515 adrp x1, :got:foo adrp tmp, :tlsgd:foo
516 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
517 bl __tls_get_addr
518 nop
519
520 Load TLS symbol, depending on TLS mechanism and TLS access model.
521
522 Global Dynamic - Traditional TLS:
523 adrp tmp, :tlsgd:imm
524 add dest, tmp, #:tlsgd_lo12:imm
525 bl __tls_get_addr
526
527 Global Dynamic - TLS Descriptors:
528 adrp dest, :tlsdesc:imm
529 ldr tmp, [dest, #:tlsdesc_lo12:imm]
530 add dest, dest, #:tlsdesc_lo12:imm
531 blr tmp
532 mrs tp, tpidr_el0
533 add dest, dest, tp
534
535 Initial Exec:
536 mrs tp, tpidr_el0
537 adrp tmp, :gottprel:imm
538 ldr dest, [tmp, #:gottprel_lo12:imm]
539 add dest, dest, tp
540
541 Local Exec:
542 mrs tp, tpidr_el0
543 add t0, tp, #:tprel_hi12:imm
544 add t0, #:tprel_lo12_nc:imm
545 */
546
547 static void
548 aarch64_load_symref_appropriately (rtx dest, rtx imm,
549 enum aarch64_symbol_type type)
550 {
551 switch (type)
552 {
553 case SYMBOL_SMALL_ABSOLUTE:
554 {
555 /* In ILP32, the mode of dest can be either SImode or DImode. */
556 rtx tmp_reg = dest;
557 enum machine_mode mode = GET_MODE (dest);
558
559 gcc_assert (mode == Pmode || mode == ptr_mode);
560
561 if (can_create_pseudo_p ())
562 tmp_reg = gen_reg_rtx (mode);
563
564 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
565 emit_insn (gen_add_losym (dest, tmp_reg, imm));
566 return;
567 }
568
569 case SYMBOL_TINY_ABSOLUTE:
570 emit_insn (gen_rtx_SET (Pmode, dest, imm));
571 return;
572
573 case SYMBOL_SMALL_GOT:
574 {
575 /* In ILP32, the mode of dest can be either SImode or DImode,
576 while the got entry is always of SImode size. The mode of
577 dest depends on how dest is used: if dest is assigned to a
578 pointer (e.g. in the memory), it has SImode; it may have
579 DImode if dest is dereferenced to access the memeory.
580 This is why we have to handle three different ldr_got_small
581 patterns here (two patterns for ILP32). */
582 rtx tmp_reg = dest;
583 enum machine_mode mode = GET_MODE (dest);
584
585 if (can_create_pseudo_p ())
586 tmp_reg = gen_reg_rtx (mode);
587
588 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
589 if (mode == ptr_mode)
590 {
591 if (mode == DImode)
592 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
593 else
594 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
595 }
596 else
597 {
598 gcc_assert (mode == Pmode);
599 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
600 }
601
602 return;
603 }
604
605 case SYMBOL_SMALL_TLSGD:
606 {
607 rtx insns;
608 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
609
610 start_sequence ();
611 emit_call_insn (gen_tlsgd_small (result, imm));
612 insns = get_insns ();
613 end_sequence ();
614
615 RTL_CONST_CALL_P (insns) = 1;
616 emit_libcall_block (insns, dest, result, imm);
617 return;
618 }
619
620 case SYMBOL_SMALL_TLSDESC:
621 {
622 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
623 rtx tp;
624
625 emit_insn (gen_tlsdesc_small (imm));
626 tp = aarch64_load_tp (NULL);
627 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
629 return;
630 }
631
632 case SYMBOL_SMALL_GOTTPREL:
633 {
634 rtx tmp_reg = gen_reg_rtx (Pmode);
635 rtx tp = aarch64_load_tp (NULL);
636 emit_insn (gen_tlsie_small (tmp_reg, imm));
637 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
638 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
639 return;
640 }
641
642 case SYMBOL_SMALL_TPREL:
643 {
644 rtx tp = aarch64_load_tp (NULL);
645 emit_insn (gen_tlsle_small (dest, tp, imm));
646 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
647 return;
648 }
649
650 case SYMBOL_TINY_GOT:
651 emit_insn (gen_ldr_got_tiny (dest, imm));
652 return;
653
654 default:
655 gcc_unreachable ();
656 }
657 }
658
659 /* Emit a move from SRC to DEST. Assume that the move expanders can
660 handle all moves if !can_create_pseudo_p (). The distinction is
661 important because, unlike emit_move_insn, the move expanders know
662 how to force Pmode objects into the constant pool even when the
663 constant pool address is not itself legitimate. */
664 static rtx
665 aarch64_emit_move (rtx dest, rtx src)
666 {
667 return (can_create_pseudo_p ()
668 ? emit_move_insn (dest, src)
669 : emit_move_insn_1 (dest, src));
670 }
671
672 void
673 aarch64_split_128bit_move (rtx dst, rtx src)
674 {
675 rtx low_dst;
676
677 enum machine_mode src_mode = GET_MODE (src);
678 enum machine_mode dst_mode = GET_MODE (dst);
679 int src_regno = REGNO (src);
680 int dst_regno = REGNO (dst);
681
682 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
683
684 if (REG_P (dst) && REG_P (src))
685 {
686 gcc_assert (src_mode == TImode || src_mode == TFmode);
687
688 /* Handle r -> w, w -> r. */
689 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
690 {
691 switch (src_mode) {
692 case TImode:
693 emit_insn
694 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
695 emit_insn
696 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
697 return;
698 case TFmode:
699 emit_insn
700 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
701 emit_insn
702 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
703 return;
704 default:
705 gcc_unreachable ();
706 }
707 }
708 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
709 {
710 switch (src_mode) {
711 case TImode:
712 emit_insn
713 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
714 emit_insn
715 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
716 return;
717 case TFmode:
718 emit_insn
719 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
720 emit_insn
721 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
722 return;
723 default:
724 gcc_unreachable ();
725 }
726 }
727 /* Fall through to r -> r cases. */
728 }
729
730 switch (dst_mode) {
731 case TImode:
732 low_dst = gen_lowpart (word_mode, dst);
733 if (REG_P (low_dst)
734 && reg_overlap_mentioned_p (low_dst, src))
735 {
736 aarch64_emit_move (gen_highpart (word_mode, dst),
737 gen_highpart_mode (word_mode, TImode, src));
738 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
739 }
740 else
741 {
742 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
743 aarch64_emit_move (gen_highpart (word_mode, dst),
744 gen_highpart_mode (word_mode, TImode, src));
745 }
746 return;
747 case TFmode:
748 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
749 gen_rtx_REG (DFmode, src_regno));
750 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
751 gen_rtx_REG (DFmode, src_regno + 1));
752 return;
753 default:
754 gcc_unreachable ();
755 }
756 }
757
758 bool
759 aarch64_split_128bit_move_p (rtx dst, rtx src)
760 {
761 return (! REG_P (src)
762 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
763 }
764
765 /* Split a complex SIMD combine. */
766
767 void
768 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
769 {
770 enum machine_mode src_mode = GET_MODE (src1);
771 enum machine_mode dst_mode = GET_MODE (dst);
772
773 gcc_assert (VECTOR_MODE_P (dst_mode));
774
775 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
776 {
777 rtx (*gen) (rtx, rtx, rtx);
778
779 switch (src_mode)
780 {
781 case V8QImode:
782 gen = gen_aarch64_simd_combinev8qi;
783 break;
784 case V4HImode:
785 gen = gen_aarch64_simd_combinev4hi;
786 break;
787 case V2SImode:
788 gen = gen_aarch64_simd_combinev2si;
789 break;
790 case V2SFmode:
791 gen = gen_aarch64_simd_combinev2sf;
792 break;
793 case DImode:
794 gen = gen_aarch64_simd_combinedi;
795 break;
796 case DFmode:
797 gen = gen_aarch64_simd_combinedf;
798 break;
799 default:
800 gcc_unreachable ();
801 }
802
803 emit_insn (gen (dst, src1, src2));
804 return;
805 }
806 }
807
808 /* Split a complex SIMD move. */
809
810 void
811 aarch64_split_simd_move (rtx dst, rtx src)
812 {
813 enum machine_mode src_mode = GET_MODE (src);
814 enum machine_mode dst_mode = GET_MODE (dst);
815
816 gcc_assert (VECTOR_MODE_P (dst_mode));
817
818 if (REG_P (dst) && REG_P (src))
819 {
820 rtx (*gen) (rtx, rtx);
821
822 gcc_assert (VECTOR_MODE_P (src_mode));
823
824 switch (src_mode)
825 {
826 case V16QImode:
827 gen = gen_aarch64_split_simd_movv16qi;
828 break;
829 case V8HImode:
830 gen = gen_aarch64_split_simd_movv8hi;
831 break;
832 case V4SImode:
833 gen = gen_aarch64_split_simd_movv4si;
834 break;
835 case V2DImode:
836 gen = gen_aarch64_split_simd_movv2di;
837 break;
838 case V4SFmode:
839 gen = gen_aarch64_split_simd_movv4sf;
840 break;
841 case V2DFmode:
842 gen = gen_aarch64_split_simd_movv2df;
843 break;
844 default:
845 gcc_unreachable ();
846 }
847
848 emit_insn (gen (dst, src));
849 return;
850 }
851 }
852
853 static rtx
854 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
855 {
856 if (can_create_pseudo_p ())
857 return force_reg (mode, value);
858 else
859 {
860 x = aarch64_emit_move (x, value);
861 return x;
862 }
863 }
864
865
866 static rtx
867 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
868 {
869 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
870 {
871 rtx high;
872 /* Load the full offset into a register. This
873 might be improvable in the future. */
874 high = GEN_INT (offset);
875 offset = 0;
876 high = aarch64_force_temporary (mode, temp, high);
877 reg = aarch64_force_temporary (mode, temp,
878 gen_rtx_PLUS (mode, high, reg));
879 }
880 return plus_constant (mode, reg, offset);
881 }
882
883 void
884 aarch64_expand_mov_immediate (rtx dest, rtx imm)
885 {
886 enum machine_mode mode = GET_MODE (dest);
887 unsigned HOST_WIDE_INT mask;
888 int i;
889 bool first;
890 unsigned HOST_WIDE_INT val;
891 bool subtargets;
892 rtx subtarget;
893 int one_match, zero_match;
894
895 gcc_assert (mode == SImode || mode == DImode);
896
897 /* Check on what type of symbol it is. */
898 if (GET_CODE (imm) == SYMBOL_REF
899 || GET_CODE (imm) == LABEL_REF
900 || GET_CODE (imm) == CONST)
901 {
902 rtx mem, base, offset;
903 enum aarch64_symbol_type sty;
904
905 /* If we have (const (plus symbol offset)), separate out the offset
906 before we start classifying the symbol. */
907 split_const (imm, &base, &offset);
908
909 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
910 switch (sty)
911 {
912 case SYMBOL_FORCE_TO_MEM:
913 if (offset != const0_rtx
914 && targetm.cannot_force_const_mem (mode, imm))
915 {
916 gcc_assert(can_create_pseudo_p ());
917 base = aarch64_force_temporary (mode, dest, base);
918 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
919 aarch64_emit_move (dest, base);
920 return;
921 }
922 mem = force_const_mem (ptr_mode, imm);
923 gcc_assert (mem);
924 if (mode != ptr_mode)
925 mem = gen_rtx_ZERO_EXTEND (mode, mem);
926 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
927 return;
928
929 case SYMBOL_SMALL_TLSGD:
930 case SYMBOL_SMALL_TLSDESC:
931 case SYMBOL_SMALL_GOTTPREL:
932 case SYMBOL_SMALL_GOT:
933 case SYMBOL_TINY_GOT:
934 if (offset != const0_rtx)
935 {
936 gcc_assert(can_create_pseudo_p ());
937 base = aarch64_force_temporary (mode, dest, base);
938 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
939 aarch64_emit_move (dest, base);
940 return;
941 }
942 /* FALLTHRU */
943
944 case SYMBOL_SMALL_TPREL:
945 case SYMBOL_SMALL_ABSOLUTE:
946 case SYMBOL_TINY_ABSOLUTE:
947 aarch64_load_symref_appropriately (dest, imm, sty);
948 return;
949
950 default:
951 gcc_unreachable ();
952 }
953 }
954
955 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
956 {
957 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
958 return;
959 }
960
961 if (!CONST_INT_P (imm))
962 {
963 if (GET_CODE (imm) == HIGH)
964 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
965 else
966 {
967 rtx mem = force_const_mem (mode, imm);
968 gcc_assert (mem);
969 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
970 }
971
972 return;
973 }
974
975 if (mode == SImode)
976 {
977 /* We know we can't do this in 1 insn, and we must be able to do it
978 in two; so don't mess around looking for sequences that don't buy
979 us anything. */
980 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
981 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
982 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
983 return;
984 }
985
986 /* Remaining cases are all for DImode. */
987
988 val = INTVAL (imm);
989 subtargets = optimize && can_create_pseudo_p ();
990
991 one_match = 0;
992 zero_match = 0;
993 mask = 0xffff;
994
995 for (i = 0; i < 64; i += 16, mask <<= 16)
996 {
997 if ((val & mask) == 0)
998 zero_match++;
999 else if ((val & mask) == mask)
1000 one_match++;
1001 }
1002
1003 if (one_match == 2)
1004 {
1005 mask = 0xffff;
1006 for (i = 0; i < 64; i += 16, mask <<= 16)
1007 {
1008 if ((val & mask) != mask)
1009 {
1010 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1011 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1012 GEN_INT ((val >> i) & 0xffff)));
1013 return;
1014 }
1015 }
1016 gcc_unreachable ();
1017 }
1018
1019 if (zero_match == 2)
1020 goto simple_sequence;
1021
1022 mask = 0x0ffff0000UL;
1023 for (i = 16; i < 64; i += 16, mask <<= 16)
1024 {
1025 HOST_WIDE_INT comp = mask & ~(mask - 1);
1026
1027 if (aarch64_uimm12_shift (val - (val & mask)))
1028 {
1029 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1030
1031 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1032 emit_insn (gen_adddi3 (dest, subtarget,
1033 GEN_INT (val - (val & mask))));
1034 return;
1035 }
1036 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1037 {
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041 GEN_INT ((val + comp) & mask)));
1042 emit_insn (gen_adddi3 (dest, subtarget,
1043 GEN_INT (val - ((val + comp) & mask))));
1044 return;
1045 }
1046 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1047 {
1048 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049
1050 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051 GEN_INT ((val - comp) | ~mask)));
1052 emit_insn (gen_adddi3 (dest, subtarget,
1053 GEN_INT (val - ((val - comp) | ~mask))));
1054 return;
1055 }
1056 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1057 {
1058 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059
1060 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1061 GEN_INT (val | ~mask)));
1062 emit_insn (gen_adddi3 (dest, subtarget,
1063 GEN_INT (val - (val | ~mask))));
1064 return;
1065 }
1066 }
1067
1068 /* See if we can do it by arithmetically combining two
1069 immediates. */
1070 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1071 {
1072 int j;
1073 mask = 0xffff;
1074
1075 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1076 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1077 {
1078 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1079 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1080 GEN_INT (aarch64_bitmasks[i])));
1081 emit_insn (gen_adddi3 (dest, subtarget,
1082 GEN_INT (val - aarch64_bitmasks[i])));
1083 return;
1084 }
1085
1086 for (j = 0; j < 64; j += 16, mask <<= 16)
1087 {
1088 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1089 {
1090 emit_insn (gen_rtx_SET (VOIDmode, dest,
1091 GEN_INT (aarch64_bitmasks[i])));
1092 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1093 GEN_INT ((val >> j) & 0xffff)));
1094 return;
1095 }
1096 }
1097 }
1098
1099 /* See if we can do it by logically combining two immediates. */
1100 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1101 {
1102 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1103 {
1104 int j;
1105
1106 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1107 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1108 {
1109 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1110 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1111 GEN_INT (aarch64_bitmasks[i])));
1112 emit_insn (gen_iordi3 (dest, subtarget,
1113 GEN_INT (aarch64_bitmasks[j])));
1114 return;
1115 }
1116 }
1117 else if ((val & aarch64_bitmasks[i]) == val)
1118 {
1119 int j;
1120
1121 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1122 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1123 {
1124
1125 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1126 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1127 GEN_INT (aarch64_bitmasks[j])));
1128 emit_insn (gen_anddi3 (dest, subtarget,
1129 GEN_INT (aarch64_bitmasks[i])));
1130 return;
1131 }
1132 }
1133 }
1134
1135 simple_sequence:
1136 first = true;
1137 mask = 0xffff;
1138 for (i = 0; i < 64; i += 16, mask <<= 16)
1139 {
1140 if ((val & mask) != 0)
1141 {
1142 if (first)
1143 {
1144 emit_insn (gen_rtx_SET (VOIDmode, dest,
1145 GEN_INT (val & mask)));
1146 first = false;
1147 }
1148 else
1149 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1150 GEN_INT ((val >> i) & 0xffff)));
1151 }
1152 }
1153 }
1154
1155 static bool
1156 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1157 {
1158 /* Indirect calls are not currently supported. */
1159 if (decl == NULL)
1160 return false;
1161
1162 /* Cannot tail-call to long-calls, since these are outside of the
1163 range of a branch instruction (we could handle this if we added
1164 support for indirect tail-calls. */
1165 if (aarch64_decl_is_long_call_p (decl))
1166 return false;
1167
1168 return true;
1169 }
1170
1171 /* Implement TARGET_PASS_BY_REFERENCE. */
1172
1173 static bool
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1175 enum machine_mode mode,
1176 const_tree type,
1177 bool named ATTRIBUTE_UNUSED)
1178 {
1179 HOST_WIDE_INT size;
1180 enum machine_mode dummymode;
1181 int nregs;
1182
1183 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1184 size = (mode == BLKmode && type)
1185 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1186
1187 if (type)
1188 {
1189 /* Arrays always passed by reference. */
1190 if (TREE_CODE (type) == ARRAY_TYPE)
1191 return true;
1192 /* Other aggregates based on their size. */
1193 if (AGGREGATE_TYPE_P (type))
1194 size = int_size_in_bytes (type);
1195 }
1196
1197 /* Variable sized arguments are always returned by reference. */
1198 if (size < 0)
1199 return true;
1200
1201 /* Can this be a candidate to be passed in fp/simd register(s)? */
1202 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1203 &dummymode, &nregs,
1204 NULL))
1205 return false;
1206
1207 /* Arguments which are variable sized or larger than 2 registers are
1208 passed by reference unless they are a homogenous floating point
1209 aggregate. */
1210 return size > 2 * UNITS_PER_WORD;
1211 }
1212
1213 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1214 static bool
1215 aarch64_return_in_msb (const_tree valtype)
1216 {
1217 enum machine_mode dummy_mode;
1218 int dummy_int;
1219
1220 /* Never happens in little-endian mode. */
1221 if (!BYTES_BIG_ENDIAN)
1222 return false;
1223
1224 /* Only composite types smaller than or equal to 16 bytes can
1225 be potentially returned in registers. */
1226 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1227 || int_size_in_bytes (valtype) <= 0
1228 || int_size_in_bytes (valtype) > 16)
1229 return false;
1230
1231 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233 is always passed/returned in the least significant bits of fp/simd
1234 register(s). */
1235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1236 &dummy_mode, &dummy_int, NULL))
1237 return false;
1238
1239 return true;
1240 }
1241
1242 /* Implement TARGET_FUNCTION_VALUE.
1243 Define how to find the value returned by a function. */
1244
1245 static rtx
1246 aarch64_function_value (const_tree type, const_tree func,
1247 bool outgoing ATTRIBUTE_UNUSED)
1248 {
1249 enum machine_mode mode;
1250 int unsignedp;
1251 int count;
1252 enum machine_mode ag_mode;
1253
1254 mode = TYPE_MODE (type);
1255 if (INTEGRAL_TYPE_P (type))
1256 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1257
1258 if (aarch64_return_in_msb (type))
1259 {
1260 HOST_WIDE_INT size = int_size_in_bytes (type);
1261
1262 if (size % UNITS_PER_WORD != 0)
1263 {
1264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1266 }
1267 }
1268
1269 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1270 &ag_mode, &count, NULL))
1271 {
1272 if (!aarch64_composite_type_p (type, mode))
1273 {
1274 gcc_assert (count == 1 && mode == ag_mode);
1275 return gen_rtx_REG (mode, V0_REGNUM);
1276 }
1277 else
1278 {
1279 int i;
1280 rtx par;
1281
1282 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1283 for (i = 0; i < count; i++)
1284 {
1285 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1286 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1287 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1288 XVECEXP (par, 0, i) = tmp;
1289 }
1290 return par;
1291 }
1292 }
1293 else
1294 return gen_rtx_REG (mode, R0_REGNUM);
1295 }
1296
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298 Return true if REGNO is the number of a hard register in which the values
1299 of called function may come back. */
1300
1301 static bool
1302 aarch64_function_value_regno_p (const unsigned int regno)
1303 {
1304 /* Maximum of 16 bytes can be returned in the general registers. Examples
1305 of 16-byte return values are: 128-bit integers and 16-byte small
1306 structures (excluding homogeneous floating-point aggregates). */
1307 if (regno == R0_REGNUM || regno == R1_REGNUM)
1308 return true;
1309
1310 /* Up to four fp/simd registers can return a function value, e.g. a
1311 homogeneous floating-point aggregate having four members. */
1312 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1313 return !TARGET_GENERAL_REGS_ONLY;
1314
1315 return false;
1316 }
1317
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1319
1320 If the type T of the result of a function is such that
1321 void func (T arg)
1322 would require that arg be passed as a value in a register (or set of
1323 registers) according to the parameter passing rules, then the result
1324 is returned in the same registers as would be used for such an
1325 argument. */
1326
1327 static bool
1328 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1329 {
1330 HOST_WIDE_INT size;
1331 enum machine_mode ag_mode;
1332 int count;
1333
1334 if (!AGGREGATE_TYPE_P (type)
1335 && TREE_CODE (type) != COMPLEX_TYPE
1336 && TREE_CODE (type) != VECTOR_TYPE)
1337 /* Simple scalar types always returned in registers. */
1338 return false;
1339
1340 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1341 type,
1342 &ag_mode,
1343 &count,
1344 NULL))
1345 return false;
1346
1347 /* Types larger than 2 registers returned in memory. */
1348 size = int_size_in_bytes (type);
1349 return (size < 0 || size > 2 * UNITS_PER_WORD);
1350 }
1351
1352 static bool
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1354 const_tree type, int *nregs)
1355 {
1356 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1357 return aarch64_vfp_is_call_or_return_candidate (mode,
1358 type,
1359 &pcum->aapcs_vfp_rmode,
1360 nregs,
1361 NULL);
1362 }
1363
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365 bits. The idea is to suppress any stronger alignment requested by
1366 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367 This is a helper function for local use only. */
1368
1369 static unsigned int
1370 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1371 {
1372 unsigned int alignment;
1373
1374 if (type)
1375 {
1376 if (!integer_zerop (TYPE_SIZE (type)))
1377 {
1378 if (TYPE_MODE (type) == mode)
1379 alignment = TYPE_ALIGN (type);
1380 else
1381 alignment = GET_MODE_ALIGNMENT (mode);
1382 }
1383 else
1384 alignment = 0;
1385 }
1386 else
1387 alignment = GET_MODE_ALIGNMENT (mode);
1388
1389 return alignment;
1390 }
1391
1392 /* Layout a function argument according to the AAPCS64 rules. The rule
1393 numbers refer to the rule numbers in the AAPCS64. */
1394
1395 static void
1396 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1397 const_tree type,
1398 bool named ATTRIBUTE_UNUSED)
1399 {
1400 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1401 int ncrn, nvrn, nregs;
1402 bool allocate_ncrn, allocate_nvrn;
1403
1404 /* We need to do this once per argument. */
1405 if (pcum->aapcs_arg_processed)
1406 return;
1407
1408 pcum->aapcs_arg_processed = true;
1409
1410 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1411 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1412 mode,
1413 type,
1414 &nregs);
1415
1416 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417 The following code thus handles passing by SIMD/FP registers first. */
1418
1419 nvrn = pcum->aapcs_nvrn;
1420
1421 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422 and homogenous short-vector aggregates (HVA). */
1423 if (allocate_nvrn)
1424 {
1425 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1426 {
1427 pcum->aapcs_nextnvrn = nvrn + nregs;
1428 if (!aarch64_composite_type_p (type, mode))
1429 {
1430 gcc_assert (nregs == 1);
1431 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1432 }
1433 else
1434 {
1435 rtx par;
1436 int i;
1437 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1438 for (i = 0; i < nregs; i++)
1439 {
1440 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1441 V0_REGNUM + nvrn + i);
1442 tmp = gen_rtx_EXPR_LIST
1443 (VOIDmode, tmp,
1444 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1445 XVECEXP (par, 0, i) = tmp;
1446 }
1447 pcum->aapcs_reg = par;
1448 }
1449 return;
1450 }
1451 else
1452 {
1453 /* C.3 NSRN is set to 8. */
1454 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1455 goto on_stack;
1456 }
1457 }
1458
1459 ncrn = pcum->aapcs_ncrn;
1460 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1461 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1462
1463
1464 /* C6 - C9. though the sign and zero extension semantics are
1465 handled elsewhere. This is the case where the argument fits
1466 entirely general registers. */
1467 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1468 {
1469 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1470
1471 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1472
1473 /* C.8 if the argument has an alignment of 16 then the NGRN is
1474 rounded up to the next even number. */
1475 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1476 {
1477 ++ncrn;
1478 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1479 }
1480 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481 A reg is still generated for it, but the caller should be smart
1482 enough not to use it. */
1483 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1484 {
1485 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1486 }
1487 else
1488 {
1489 rtx par;
1490 int i;
1491
1492 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1493 for (i = 0; i < nregs; i++)
1494 {
1495 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1496 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1497 GEN_INT (i * UNITS_PER_WORD));
1498 XVECEXP (par, 0, i) = tmp;
1499 }
1500 pcum->aapcs_reg = par;
1501 }
1502
1503 pcum->aapcs_nextncrn = ncrn + nregs;
1504 return;
1505 }
1506
1507 /* C.11 */
1508 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1509
1510 /* The argument is passed on stack; record the needed number of words for
1511 this argument (we can re-use NREGS) and align the total size if
1512 necessary. */
1513 on_stack:
1514 pcum->aapcs_stack_words = nregs;
1515 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1516 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1517 16 / UNITS_PER_WORD) + 1;
1518 return;
1519 }
1520
1521 /* Implement TARGET_FUNCTION_ARG. */
1522
1523 static rtx
1524 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1525 const_tree type, bool named)
1526 {
1527 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1529
1530 if (mode == VOIDmode)
1531 return NULL_RTX;
1532
1533 aarch64_layout_arg (pcum_v, mode, type, named);
1534 return pcum->aapcs_reg;
1535 }
1536
1537 void
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1539 const_tree fntype ATTRIBUTE_UNUSED,
1540 rtx libname ATTRIBUTE_UNUSED,
1541 const_tree fndecl ATTRIBUTE_UNUSED,
1542 unsigned n_named ATTRIBUTE_UNUSED)
1543 {
1544 pcum->aapcs_ncrn = 0;
1545 pcum->aapcs_nvrn = 0;
1546 pcum->aapcs_nextncrn = 0;
1547 pcum->aapcs_nextnvrn = 0;
1548 pcum->pcs_variant = ARM_PCS_AAPCS64;
1549 pcum->aapcs_reg = NULL_RTX;
1550 pcum->aapcs_arg_processed = false;
1551 pcum->aapcs_stack_words = 0;
1552 pcum->aapcs_stack_size = 0;
1553
1554 return;
1555 }
1556
1557 static void
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1559 enum machine_mode mode,
1560 const_tree type,
1561 bool named)
1562 {
1563 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1564 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1565 {
1566 aarch64_layout_arg (pcum_v, mode, type, named);
1567 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1568 != (pcum->aapcs_stack_words != 0));
1569 pcum->aapcs_arg_processed = false;
1570 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1571 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1572 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1573 pcum->aapcs_stack_words = 0;
1574 pcum->aapcs_reg = NULL_RTX;
1575 }
1576 }
1577
1578 bool
1579 aarch64_function_arg_regno_p (unsigned regno)
1580 {
1581 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1582 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1583 }
1584
1585 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1586 PARM_BOUNDARY bits of alignment, but will be given anything up
1587 to STACK_BOUNDARY bits if the type requires it. This makes sure
1588 that both before and after the layout of each argument, the Next
1589 Stacked Argument Address (NSAA) will have a minimum alignment of
1590 8 bytes. */
1591
1592 static unsigned int
1593 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1594 {
1595 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1596
1597 if (alignment < PARM_BOUNDARY)
1598 alignment = PARM_BOUNDARY;
1599 if (alignment > STACK_BOUNDARY)
1600 alignment = STACK_BOUNDARY;
1601 return alignment;
1602 }
1603
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1605
1606 Return true if an argument passed on the stack should be padded upwards,
1607 i.e. if the least-significant byte of the stack slot has useful data.
1608
1609 Small aggregate types are placed in the lowest memory address.
1610
1611 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1612
1613 bool
1614 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1615 {
1616 /* On little-endian targets, the least significant byte of every stack
1617 argument is passed at the lowest byte address of the stack slot. */
1618 if (!BYTES_BIG_ENDIAN)
1619 return true;
1620
1621 /* Otherwise, integral, floating-point and pointer types are padded downward:
1622 the least significant byte of a stack argument is passed at the highest
1623 byte address of the stack slot. */
1624 if (type
1625 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1626 || POINTER_TYPE_P (type))
1627 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1628 return false;
1629
1630 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1631 return true;
1632 }
1633
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1635
1636 It specifies padding for the last (may also be the only)
1637 element of a block move between registers and memory. If
1638 assuming the block is in the memory, padding upward means that
1639 the last element is padded after its highest significant byte,
1640 while in downward padding, the last element is padded at the
1641 its least significant byte side.
1642
1643 Small aggregates and small complex types are always padded
1644 upwards.
1645
1646 We don't need to worry about homogeneous floating-point or
1647 short-vector aggregates; their move is not affected by the
1648 padding direction determined here. Regardless of endianness,
1649 each element of such an aggregate is put in the least
1650 significant bits of a fp/simd register.
1651
1652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653 register has useful data, and return the opposite if the most
1654 significant byte does. */
1655
1656 bool
1657 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1658 bool first ATTRIBUTE_UNUSED)
1659 {
1660
1661 /* Small composite types are always padded upward. */
1662 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1663 {
1664 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1665 : GET_MODE_SIZE (mode));
1666 if (size < 2 * UNITS_PER_WORD)
1667 return true;
1668 }
1669
1670 /* Otherwise, use the default padding. */
1671 return !BYTES_BIG_ENDIAN;
1672 }
1673
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1676 {
1677 return SImode;
1678 }
1679
1680 static bool
1681 aarch64_frame_pointer_required (void)
1682 {
1683 /* If the function contains dynamic stack allocations, we need to
1684 use the frame pointer to access the static parts of the frame. */
1685 if (cfun->calls_alloca)
1686 return true;
1687
1688 /* We may have turned flag_omit_frame_pointer on in order to have this
1689 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690 and we'll check it here.
1691 If we really did set flag_omit_frame_pointer normally, then we return false
1692 (no frame pointer required) in all cases. */
1693
1694 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1695 return false;
1696 else if (flag_omit_leaf_frame_pointer)
1697 return !crtl->is_leaf;
1698 return true;
1699 }
1700
1701 /* Mark the registers that need to be saved by the callee and calculate
1702 the size of the callee-saved registers area and frame record (both FP
1703 and LR may be omitted). */
1704 static void
1705 aarch64_layout_frame (void)
1706 {
1707 HOST_WIDE_INT offset = 0;
1708 int regno;
1709
1710 if (reload_completed && cfun->machine->frame.laid_out)
1711 return;
1712
1713 cfun->machine->frame.fp_lr_offset = 0;
1714
1715 /* First mark all the registers that really need to be saved... */
1716 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1717 cfun->machine->frame.reg_offset[regno] = -1;
1718
1719 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1720 cfun->machine->frame.reg_offset[regno] = -1;
1721
1722 /* ... that includes the eh data registers (if needed)... */
1723 if (crtl->calls_eh_return)
1724 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1725 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1726
1727 /* ... and any callee saved register that dataflow says is live. */
1728 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1729 if (df_regs_ever_live_p (regno)
1730 && !call_used_regs[regno])
1731 cfun->machine->frame.reg_offset[regno] = 0;
1732
1733 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1734 if (df_regs_ever_live_p (regno)
1735 && !call_used_regs[regno])
1736 cfun->machine->frame.reg_offset[regno] = 0;
1737
1738 if (frame_pointer_needed)
1739 {
1740 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1741 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1742 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1743 }
1744
1745 /* Now assign stack slots for them. */
1746 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1747 if (cfun->machine->frame.reg_offset[regno] != -1)
1748 {
1749 cfun->machine->frame.reg_offset[regno] = offset;
1750 offset += UNITS_PER_WORD;
1751 }
1752
1753 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1754 if (cfun->machine->frame.reg_offset[regno] != -1)
1755 {
1756 cfun->machine->frame.reg_offset[regno] = offset;
1757 offset += UNITS_PER_WORD;
1758 }
1759
1760 if (frame_pointer_needed)
1761 {
1762 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1763 offset += UNITS_PER_WORD;
1764 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1765 }
1766
1767 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1768 {
1769 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1770 offset += UNITS_PER_WORD;
1771 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1772 }
1773
1774 cfun->machine->frame.padding0 =
1775 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1776 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1777
1778 cfun->machine->frame.saved_regs_size = offset;
1779 cfun->machine->frame.laid_out = true;
1780 }
1781
1782 /* Make the last instruction frame-related and note that it performs
1783 the operation described by FRAME_PATTERN. */
1784
1785 static void
1786 aarch64_set_frame_expr (rtx frame_pattern)
1787 {
1788 rtx insn;
1789
1790 insn = get_last_insn ();
1791 RTX_FRAME_RELATED_P (insn) = 1;
1792 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1793 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1794 frame_pattern,
1795 REG_NOTES (insn));
1796 }
1797
1798 static bool
1799 aarch64_register_saved_on_entry (int regno)
1800 {
1801 return cfun->machine->frame.reg_offset[regno] != -1;
1802 }
1803
1804
1805 static void
1806 aarch64_save_or_restore_fprs (int start_offset, int increment,
1807 bool restore, rtx base_rtx)
1808
1809 {
1810 unsigned regno;
1811 unsigned regno2;
1812 rtx insn;
1813 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1814
1815
1816 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1817 {
1818 if (aarch64_register_saved_on_entry (regno))
1819 {
1820 rtx mem;
1821 mem = gen_mem_ref (DFmode,
1822 plus_constant (Pmode,
1823 base_rtx,
1824 start_offset));
1825
1826 for (regno2 = regno + 1;
1827 regno2 <= V31_REGNUM
1828 && !aarch64_register_saved_on_entry (regno2);
1829 regno2++)
1830 {
1831 /* Empty loop. */
1832 }
1833 if (regno2 <= V31_REGNUM &&
1834 aarch64_register_saved_on_entry (regno2))
1835 {
1836 rtx mem2;
1837 /* Next highest register to be saved. */
1838 mem2 = gen_mem_ref (DFmode,
1839 plus_constant
1840 (Pmode,
1841 base_rtx,
1842 start_offset + increment));
1843 if (restore == false)
1844 {
1845 insn = emit_insn
1846 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1847 mem2, gen_rtx_REG (DFmode, regno2)));
1848
1849 }
1850 else
1851 {
1852 insn = emit_insn
1853 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1854 gen_rtx_REG (DFmode, regno2), mem2));
1855
1856 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1857 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1858 }
1859
1860 /* The first part of a frame-related parallel insn
1861 is always assumed to be relevant to the frame
1862 calculations; subsequent parts, are only
1863 frame-related if explicitly marked. */
1864 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1865 1)) = 1;
1866 regno = regno2;
1867 start_offset += increment * 2;
1868 }
1869 else
1870 {
1871 if (restore == false)
1872 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1873 else
1874 {
1875 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1876 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1877 }
1878 start_offset += increment;
1879 }
1880 RTX_FRAME_RELATED_P (insn) = 1;
1881 }
1882 }
1883
1884 }
1885
1886
1887 /* offset from the stack pointer of where the saves and
1888 restore's have to happen. */
1889 static void
1890 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1891 bool restore)
1892 {
1893 rtx insn;
1894 rtx base_rtx = stack_pointer_rtx;
1895 HOST_WIDE_INT start_offset = offset;
1896 HOST_WIDE_INT increment = UNITS_PER_WORD;
1897 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1898 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1899 unsigned regno;
1900 unsigned regno2;
1901
1902 for (regno = R0_REGNUM; regno <= limit; regno++)
1903 {
1904 if (aarch64_register_saved_on_entry (regno))
1905 {
1906 rtx mem;
1907 mem = gen_mem_ref (Pmode,
1908 plus_constant (Pmode,
1909 base_rtx,
1910 start_offset));
1911
1912 for (regno2 = regno + 1;
1913 regno2 <= limit
1914 && !aarch64_register_saved_on_entry (regno2);
1915 regno2++)
1916 {
1917 /* Empty loop. */
1918 }
1919 if (regno2 <= limit &&
1920 aarch64_register_saved_on_entry (regno2))
1921 {
1922 rtx mem2;
1923 /* Next highest register to be saved. */
1924 mem2 = gen_mem_ref (Pmode,
1925 plus_constant
1926 (Pmode,
1927 base_rtx,
1928 start_offset + increment));
1929 if (restore == false)
1930 {
1931 insn = emit_insn
1932 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1933 mem2, gen_rtx_REG (DImode, regno2)));
1934
1935 }
1936 else
1937 {
1938 insn = emit_insn
1939 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1940 gen_rtx_REG (DImode, regno2), mem2));
1941
1942 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1943 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1944 }
1945
1946 /* The first part of a frame-related parallel insn
1947 is always assumed to be relevant to the frame
1948 calculations; subsequent parts, are only
1949 frame-related if explicitly marked. */
1950 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1951 1)) = 1;
1952 regno = regno2;
1953 start_offset += increment * 2;
1954 }
1955 else
1956 {
1957 if (restore == false)
1958 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1959 else
1960 {
1961 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1962 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1963 }
1964 start_offset += increment;
1965 }
1966 RTX_FRAME_RELATED_P (insn) = 1;
1967 }
1968 }
1969
1970 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1971
1972 }
1973
1974 /* AArch64 stack frames generated by this compiler look like:
1975
1976 +-------------------------------+
1977 | |
1978 | incoming stack arguments |
1979 | |
1980 +-------------------------------+ <-- arg_pointer_rtx
1981 | |
1982 | callee-allocated save area |
1983 | for register varargs |
1984 | |
1985 +-------------------------------+
1986 | |
1987 | local variables |
1988 | |
1989 +-------------------------------+ <-- frame_pointer_rtx
1990 | |
1991 | callee-saved registers |
1992 | |
1993 +-------------------------------+
1994 | LR' |
1995 +-------------------------------+
1996 | FP' |
1997 P +-------------------------------+ <-- hard_frame_pointer_rtx
1998 | dynamic allocation |
1999 +-------------------------------+
2000 | |
2001 | outgoing stack arguments |
2002 | |
2003 +-------------------------------+ <-- stack_pointer_rtx
2004
2005 Dynamic stack allocations such as alloca insert data at point P.
2006 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2007 hard_frame_pointer_rtx unchanged. */
2008
2009 /* Generate the prologue instructions for entry into a function.
2010 Establish the stack frame by decreasing the stack pointer with a
2011 properly calculated size and, if necessary, create a frame record
2012 filled with the values of LR and previous frame pointer. The
2013 current FP is also set up is it is in use. */
2014
2015 void
2016 aarch64_expand_prologue (void)
2017 {
2018 /* sub sp, sp, #<frame_size>
2019 stp {fp, lr}, [sp, #<frame_size> - 16]
2020 add fp, sp, #<frame_size> - hardfp_offset
2021 stp {cs_reg}, [fp, #-16] etc.
2022
2023 sub sp, sp, <final_adjustment_if_any>
2024 */
2025 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2026 HOST_WIDE_INT frame_size, offset;
2027 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2028 rtx insn;
2029
2030 aarch64_layout_frame ();
2031 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2032 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2033 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2034 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2035 + crtl->outgoing_args_size);
2036 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2037 STACK_BOUNDARY / BITS_PER_UNIT);
2038
2039 if (flag_stack_usage_info)
2040 current_function_static_stack_size = frame_size;
2041
2042 fp_offset = (offset
2043 - original_frame_size
2044 - cfun->machine->frame.saved_regs_size);
2045
2046 /* Store pairs and load pairs have a range only -512 to 504. */
2047 if (offset >= 512)
2048 {
2049 /* When the frame has a large size, an initial decrease is done on
2050 the stack pointer to jump over the callee-allocated save area for
2051 register varargs, the local variable area and/or the callee-saved
2052 register area. This will allow the pre-index write-back
2053 store pair instructions to be used for setting up the stack frame
2054 efficiently. */
2055 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2056 if (offset >= 512)
2057 offset = cfun->machine->frame.saved_regs_size;
2058
2059 frame_size -= (offset + crtl->outgoing_args_size);
2060 fp_offset = 0;
2061
2062 if (frame_size >= 0x1000000)
2063 {
2064 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2065 emit_move_insn (op0, GEN_INT (-frame_size));
2066 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2067 aarch64_set_frame_expr (gen_rtx_SET
2068 (Pmode, stack_pointer_rtx,
2069 plus_constant (Pmode,
2070 stack_pointer_rtx,
2071 -frame_size)));
2072 }
2073 else if (frame_size > 0)
2074 {
2075 if ((frame_size & 0xfff) != frame_size)
2076 {
2077 insn = emit_insn (gen_add2_insn
2078 (stack_pointer_rtx,
2079 GEN_INT (-(frame_size
2080 & ~(HOST_WIDE_INT)0xfff))));
2081 RTX_FRAME_RELATED_P (insn) = 1;
2082 }
2083 if ((frame_size & 0xfff) != 0)
2084 {
2085 insn = emit_insn (gen_add2_insn
2086 (stack_pointer_rtx,
2087 GEN_INT (-(frame_size
2088 & (HOST_WIDE_INT)0xfff))));
2089 RTX_FRAME_RELATED_P (insn) = 1;
2090 }
2091 }
2092 }
2093 else
2094 frame_size = -1;
2095
2096 if (offset > 0)
2097 {
2098 /* Save the frame pointer and lr if the frame pointer is needed
2099 first. Make the frame pointer point to the location of the
2100 old frame pointer on the stack. */
2101 if (frame_pointer_needed)
2102 {
2103 rtx mem_fp, mem_lr;
2104
2105 if (fp_offset)
2106 {
2107 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2108 GEN_INT (-offset)));
2109 RTX_FRAME_RELATED_P (insn) = 1;
2110 aarch64_set_frame_expr (gen_rtx_SET
2111 (Pmode, stack_pointer_rtx,
2112 gen_rtx_MINUS (Pmode,
2113 stack_pointer_rtx,
2114 GEN_INT (offset))));
2115 mem_fp = gen_frame_mem (DImode,
2116 plus_constant (Pmode,
2117 stack_pointer_rtx,
2118 fp_offset));
2119 mem_lr = gen_frame_mem (DImode,
2120 plus_constant (Pmode,
2121 stack_pointer_rtx,
2122 fp_offset
2123 + UNITS_PER_WORD));
2124 insn = emit_insn (gen_store_pairdi (mem_fp,
2125 hard_frame_pointer_rtx,
2126 mem_lr,
2127 gen_rtx_REG (DImode,
2128 LR_REGNUM)));
2129 }
2130 else
2131 {
2132 insn = emit_insn (gen_storewb_pairdi_di
2133 (stack_pointer_rtx, stack_pointer_rtx,
2134 hard_frame_pointer_rtx,
2135 gen_rtx_REG (DImode, LR_REGNUM),
2136 GEN_INT (-offset),
2137 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2138 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2139 }
2140
2141 /* The first part of a frame-related parallel insn is always
2142 assumed to be relevant to the frame calculations;
2143 subsequent parts, are only frame-related if explicitly
2144 marked. */
2145 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2146 RTX_FRAME_RELATED_P (insn) = 1;
2147
2148 /* Set up frame pointer to point to the location of the
2149 previous frame pointer on the stack. */
2150 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2151 stack_pointer_rtx,
2152 GEN_INT (fp_offset)));
2153 aarch64_set_frame_expr (gen_rtx_SET
2154 (Pmode, hard_frame_pointer_rtx,
2155 plus_constant (Pmode,
2156 stack_pointer_rtx,
2157 fp_offset)));
2158 RTX_FRAME_RELATED_P (insn) = 1;
2159 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2160 hard_frame_pointer_rtx));
2161 }
2162 else
2163 {
2164 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2165 GEN_INT (-offset)));
2166 RTX_FRAME_RELATED_P (insn) = 1;
2167 }
2168
2169 aarch64_save_or_restore_callee_save_registers
2170 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2171 }
2172
2173 /* when offset >= 512,
2174 sub sp, sp, #<outgoing_args_size> */
2175 if (frame_size > -1)
2176 {
2177 if (crtl->outgoing_args_size > 0)
2178 {
2179 insn = emit_insn (gen_add2_insn
2180 (stack_pointer_rtx,
2181 GEN_INT (- crtl->outgoing_args_size)));
2182 RTX_FRAME_RELATED_P (insn) = 1;
2183 }
2184 }
2185 }
2186
2187 /* Generate the epilogue instructions for returning from a function. */
2188 void
2189 aarch64_expand_epilogue (bool for_sibcall)
2190 {
2191 HOST_WIDE_INT original_frame_size, frame_size, offset;
2192 HOST_WIDE_INT fp_offset;
2193 rtx insn;
2194 rtx cfa_reg;
2195
2196 aarch64_layout_frame ();
2197 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2198 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2199 + crtl->outgoing_args_size);
2200 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2201 STACK_BOUNDARY / BITS_PER_UNIT);
2202
2203 fp_offset = (offset
2204 - original_frame_size
2205 - cfun->machine->frame.saved_regs_size);
2206
2207 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2208
2209 /* Store pairs and load pairs have a range only -512 to 504. */
2210 if (offset >= 512)
2211 {
2212 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2213 if (offset >= 512)
2214 offset = cfun->machine->frame.saved_regs_size;
2215
2216 frame_size -= (offset + crtl->outgoing_args_size);
2217 fp_offset = 0;
2218 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2219 {
2220 insn = emit_insn (gen_add2_insn
2221 (stack_pointer_rtx,
2222 GEN_INT (crtl->outgoing_args_size)));
2223 RTX_FRAME_RELATED_P (insn) = 1;
2224 }
2225 }
2226 else
2227 frame_size = -1;
2228
2229 /* If there were outgoing arguments or we've done dynamic stack
2230 allocation, then restore the stack pointer from the frame
2231 pointer. This is at most one insn and more efficient than using
2232 GCC's internal mechanism. */
2233 if (frame_pointer_needed
2234 && (crtl->outgoing_args_size || cfun->calls_alloca))
2235 {
2236 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2237 hard_frame_pointer_rtx,
2238 GEN_INT (- fp_offset)));
2239 RTX_FRAME_RELATED_P (insn) = 1;
2240 /* As SP is set to (FP - fp_offset), according to the rules in
2241 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2242 from the value of SP from now on. */
2243 cfa_reg = stack_pointer_rtx;
2244 }
2245
2246 aarch64_save_or_restore_callee_save_registers
2247 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2248
2249 /* Restore the frame pointer and lr if the frame pointer is needed. */
2250 if (offset > 0)
2251 {
2252 if (frame_pointer_needed)
2253 {
2254 rtx mem_fp, mem_lr;
2255
2256 if (fp_offset)
2257 {
2258 mem_fp = gen_frame_mem (DImode,
2259 plus_constant (Pmode,
2260 stack_pointer_rtx,
2261 fp_offset));
2262 mem_lr = gen_frame_mem (DImode,
2263 plus_constant (Pmode,
2264 stack_pointer_rtx,
2265 fp_offset
2266 + UNITS_PER_WORD));
2267 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2268 mem_fp,
2269 gen_rtx_REG (DImode,
2270 LR_REGNUM),
2271 mem_lr));
2272 }
2273 else
2274 {
2275 insn = emit_insn (gen_loadwb_pairdi_di
2276 (stack_pointer_rtx,
2277 stack_pointer_rtx,
2278 hard_frame_pointer_rtx,
2279 gen_rtx_REG (DImode, LR_REGNUM),
2280 GEN_INT (offset),
2281 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2282 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2283 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2284 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2285 plus_constant (Pmode, cfa_reg,
2286 offset))));
2287 }
2288
2289 /* The first part of a frame-related parallel insn
2290 is always assumed to be relevant to the frame
2291 calculations; subsequent parts, are only
2292 frame-related if explicitly marked. */
2293 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2294 RTX_FRAME_RELATED_P (insn) = 1;
2295 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2296 add_reg_note (insn, REG_CFA_RESTORE,
2297 gen_rtx_REG (DImode, LR_REGNUM));
2298
2299 if (fp_offset)
2300 {
2301 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2302 GEN_INT (offset)));
2303 RTX_FRAME_RELATED_P (insn) = 1;
2304 }
2305 }
2306 else
2307 {
2308 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2309 GEN_INT (offset)));
2310 RTX_FRAME_RELATED_P (insn) = 1;
2311 }
2312 }
2313
2314 /* Stack adjustment for exception handler. */
2315 if (crtl->calls_eh_return)
2316 {
2317 /* We need to unwind the stack by the offset computed by
2318 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2319 based on SP. Ideally we would update the SP and define the
2320 CFA along the lines of:
2321
2322 SP = SP + EH_RETURN_STACKADJ_RTX
2323 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2324
2325 However the dwarf emitter only understands a constant
2326 register offset.
2327
2328 The solution chosen here is to use the otherwise unused IP0
2329 as a temporary register to hold the current SP value. The
2330 CFA is described using IP0 then SP is modified. */
2331
2332 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2333
2334 insn = emit_move_insn (ip0, stack_pointer_rtx);
2335 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2336 RTX_FRAME_RELATED_P (insn) = 1;
2337
2338 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2339
2340 /* Ensure the assignment to IP0 does not get optimized away. */
2341 emit_use (ip0);
2342 }
2343
2344 if (frame_size > -1)
2345 {
2346 if (frame_size >= 0x1000000)
2347 {
2348 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2349 emit_move_insn (op0, GEN_INT (frame_size));
2350 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2351 aarch64_set_frame_expr (gen_rtx_SET
2352 (Pmode, stack_pointer_rtx,
2353 plus_constant (Pmode,
2354 stack_pointer_rtx,
2355 frame_size)));
2356 }
2357 else if (frame_size > 0)
2358 {
2359 if ((frame_size & 0xfff) != 0)
2360 {
2361 insn = emit_insn (gen_add2_insn
2362 (stack_pointer_rtx,
2363 GEN_INT ((frame_size
2364 & (HOST_WIDE_INT) 0xfff))));
2365 RTX_FRAME_RELATED_P (insn) = 1;
2366 }
2367 if ((frame_size & 0xfff) != frame_size)
2368 {
2369 insn = emit_insn (gen_add2_insn
2370 (stack_pointer_rtx,
2371 GEN_INT ((frame_size
2372 & ~ (HOST_WIDE_INT) 0xfff))));
2373 RTX_FRAME_RELATED_P (insn) = 1;
2374 }
2375 }
2376
2377 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2378 plus_constant (Pmode,
2379 stack_pointer_rtx,
2380 offset)));
2381 }
2382
2383 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2384 if (!for_sibcall)
2385 emit_jump_insn (ret_rtx);
2386 }
2387
2388 /* Return the place to copy the exception unwinding return address to.
2389 This will probably be a stack slot, but could (in theory be the
2390 return register). */
2391 rtx
2392 aarch64_final_eh_return_addr (void)
2393 {
2394 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2395 aarch64_layout_frame ();
2396 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2397 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2398 + crtl->outgoing_args_size);
2399 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2400 STACK_BOUNDARY / BITS_PER_UNIT);
2401 fp_offset = offset
2402 - original_frame_size
2403 - cfun->machine->frame.saved_regs_size;
2404
2405 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2406 return gen_rtx_REG (DImode, LR_REGNUM);
2407
2408 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2409 result in a store to save LR introduced by builtin_eh_return () being
2410 incorrectly deleted because the alias is not detected.
2411 So in the calculation of the address to copy the exception unwinding
2412 return address to, we note 2 cases.
2413 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2414 we return a SP-relative location since all the addresses are SP-relative
2415 in this case. This prevents the store from being optimized away.
2416 If the fp_offset is not 0, then the addresses will be FP-relative and
2417 therefore we return a FP-relative location. */
2418
2419 if (frame_pointer_needed)
2420 {
2421 if (fp_offset)
2422 return gen_frame_mem (DImode,
2423 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2424 else
2425 return gen_frame_mem (DImode,
2426 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2427 }
2428
2429 /* If FP is not needed, we calculate the location of LR, which would be
2430 at the top of the saved registers block. */
2431
2432 return gen_frame_mem (DImode,
2433 plus_constant (Pmode,
2434 stack_pointer_rtx,
2435 fp_offset
2436 + cfun->machine->frame.saved_regs_size
2437 - 2 * UNITS_PER_WORD));
2438 }
2439
2440 /* Output code to build up a constant in a register. */
2441 static void
2442 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2443 {
2444 if (aarch64_bitmask_imm (val, DImode))
2445 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2446 else
2447 {
2448 int i;
2449 int ncount = 0;
2450 int zcount = 0;
2451 HOST_WIDE_INT valp = val >> 16;
2452 HOST_WIDE_INT valm;
2453 HOST_WIDE_INT tval;
2454
2455 for (i = 16; i < 64; i += 16)
2456 {
2457 valm = (valp & 0xffff);
2458
2459 if (valm != 0)
2460 ++ zcount;
2461
2462 if (valm != 0xffff)
2463 ++ ncount;
2464
2465 valp >>= 16;
2466 }
2467
2468 /* zcount contains the number of additional MOVK instructions
2469 required if the constant is built up with an initial MOVZ instruction,
2470 while ncount is the number of MOVK instructions required if starting
2471 with a MOVN instruction. Choose the sequence that yields the fewest
2472 number of instructions, preferring MOVZ instructions when they are both
2473 the same. */
2474 if (ncount < zcount)
2475 {
2476 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2477 GEN_INT ((~val) & 0xffff));
2478 tval = 0xffff;
2479 }
2480 else
2481 {
2482 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2483 GEN_INT (val & 0xffff));
2484 tval = 0;
2485 }
2486
2487 val >>= 16;
2488
2489 for (i = 16; i < 64; i += 16)
2490 {
2491 if ((val & 0xffff) != tval)
2492 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2493 GEN_INT (i), GEN_INT (val & 0xffff)));
2494 val >>= 16;
2495 }
2496 }
2497 }
2498
2499 static void
2500 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2501 {
2502 HOST_WIDE_INT mdelta = delta;
2503 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2504 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2505
2506 if (mdelta < 0)
2507 mdelta = -mdelta;
2508
2509 if (mdelta >= 4096 * 4096)
2510 {
2511 aarch64_build_constant (scratchreg, delta);
2512 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2513 }
2514 else if (mdelta > 0)
2515 {
2516 if (mdelta >= 4096)
2517 {
2518 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2519 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2520 if (delta < 0)
2521 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2522 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2523 else
2524 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2526 }
2527 if (mdelta % 4096 != 0)
2528 {
2529 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2530 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2531 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2532 }
2533 }
2534 }
2535
2536 /* Output code to add DELTA to the first argument, and then jump
2537 to FUNCTION. Used for C++ multiple inheritance. */
2538 static void
2539 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2540 HOST_WIDE_INT delta,
2541 HOST_WIDE_INT vcall_offset,
2542 tree function)
2543 {
2544 /* The this pointer is always in x0. Note that this differs from
2545 Arm where the this pointer maybe bumped to r1 if r0 is required
2546 to return a pointer to an aggregate. On AArch64 a result value
2547 pointer will be in x8. */
2548 int this_regno = R0_REGNUM;
2549 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2550
2551 reload_completed = 1;
2552 emit_note (NOTE_INSN_PROLOGUE_END);
2553
2554 if (vcall_offset == 0)
2555 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2556 else
2557 {
2558 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2559
2560 this_rtx = gen_rtx_REG (Pmode, this_regno);
2561 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2562 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2563
2564 addr = this_rtx;
2565 if (delta != 0)
2566 {
2567 if (delta >= -256 && delta < 256)
2568 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2569 plus_constant (Pmode, this_rtx, delta));
2570 else
2571 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2572 }
2573
2574 if (Pmode == ptr_mode)
2575 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2576 else
2577 aarch64_emit_move (temp0,
2578 gen_rtx_ZERO_EXTEND (Pmode,
2579 gen_rtx_MEM (ptr_mode, addr)));
2580
2581 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2582 addr = plus_constant (Pmode, temp0, vcall_offset);
2583 else
2584 {
2585 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2586 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2587 }
2588
2589 if (Pmode == ptr_mode)
2590 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2591 else
2592 aarch64_emit_move (temp1,
2593 gen_rtx_SIGN_EXTEND (Pmode,
2594 gen_rtx_MEM (ptr_mode, addr)));
2595
2596 emit_insn (gen_add2_insn (this_rtx, temp1));
2597 }
2598
2599 /* Generate a tail call to the target function. */
2600 if (!TREE_USED (function))
2601 {
2602 assemble_external (function);
2603 TREE_USED (function) = 1;
2604 }
2605 funexp = XEXP (DECL_RTL (function), 0);
2606 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2607 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2608 SIBLING_CALL_P (insn) = 1;
2609
2610 insn = get_insns ();
2611 shorten_branches (insn);
2612 final_start_function (insn, file, 1);
2613 final (insn, file, 1);
2614 final_end_function ();
2615
2616 /* Stop pretending to be a post-reload pass. */
2617 reload_completed = 0;
2618 }
2619
2620 static int
2621 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2622 {
2623 if (GET_CODE (*x) == SYMBOL_REF)
2624 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2625
2626 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2627 TLS offsets, not real symbol references. */
2628 if (GET_CODE (*x) == UNSPEC
2629 && XINT (*x, 1) == UNSPEC_TLS)
2630 return -1;
2631
2632 return 0;
2633 }
2634
2635 static bool
2636 aarch64_tls_referenced_p (rtx x)
2637 {
2638 if (!TARGET_HAVE_TLS)
2639 return false;
2640
2641 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2642 }
2643
2644
2645 static int
2646 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2647 {
2648 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2649 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2650
2651 if (*imm1 < *imm2)
2652 return -1;
2653 if (*imm1 > *imm2)
2654 return +1;
2655 return 0;
2656 }
2657
2658
2659 static void
2660 aarch64_build_bitmask_table (void)
2661 {
2662 unsigned HOST_WIDE_INT mask, imm;
2663 unsigned int log_e, e, s, r;
2664 unsigned int nimms = 0;
2665
2666 for (log_e = 1; log_e <= 6; log_e++)
2667 {
2668 e = 1 << log_e;
2669 if (e == 64)
2670 mask = ~(HOST_WIDE_INT) 0;
2671 else
2672 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2673 for (s = 1; s < e; s++)
2674 {
2675 for (r = 0; r < e; r++)
2676 {
2677 /* set s consecutive bits to 1 (s < 64) */
2678 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2679 /* rotate right by r */
2680 if (r != 0)
2681 imm = ((imm >> r) | (imm << (e - r))) & mask;
2682 /* replicate the constant depending on SIMD size */
2683 switch (log_e) {
2684 case 1: imm |= (imm << 2);
2685 case 2: imm |= (imm << 4);
2686 case 3: imm |= (imm << 8);
2687 case 4: imm |= (imm << 16);
2688 case 5: imm |= (imm << 32);
2689 case 6:
2690 break;
2691 default:
2692 gcc_unreachable ();
2693 }
2694 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2695 aarch64_bitmasks[nimms++] = imm;
2696 }
2697 }
2698 }
2699
2700 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2701 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2702 aarch64_bitmasks_cmp);
2703 }
2704
2705
2706 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2707 a left shift of 0 or 12 bits. */
2708 bool
2709 aarch64_uimm12_shift (HOST_WIDE_INT val)
2710 {
2711 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2712 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2713 );
2714 }
2715
2716
2717 /* Return true if val is an immediate that can be loaded into a
2718 register by a MOVZ instruction. */
2719 static bool
2720 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2721 {
2722 if (GET_MODE_SIZE (mode) > 4)
2723 {
2724 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2725 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2726 return 1;
2727 }
2728 else
2729 {
2730 /* Ignore sign extension. */
2731 val &= (HOST_WIDE_INT) 0xffffffff;
2732 }
2733 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2734 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2735 }
2736
2737
2738 /* Return true if val is a valid bitmask immediate. */
2739 bool
2740 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2741 {
2742 if (GET_MODE_SIZE (mode) < 8)
2743 {
2744 /* Replicate bit pattern. */
2745 val &= (HOST_WIDE_INT) 0xffffffff;
2746 val |= val << 32;
2747 }
2748 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2749 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2750 }
2751
2752
2753 /* Return true if val is an immediate that can be loaded into a
2754 register in a single instruction. */
2755 bool
2756 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2757 {
2758 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2759 return 1;
2760 return aarch64_bitmask_imm (val, mode);
2761 }
2762
2763 static bool
2764 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2765 {
2766 rtx base, offset;
2767
2768 if (GET_CODE (x) == HIGH)
2769 return true;
2770
2771 split_const (x, &base, &offset);
2772 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2773 {
2774 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2775 != SYMBOL_FORCE_TO_MEM)
2776 return true;
2777 else
2778 /* Avoid generating a 64-bit relocation in ILP32; leave
2779 to aarch64_expand_mov_immediate to handle it properly. */
2780 return mode != ptr_mode;
2781 }
2782
2783 return aarch64_tls_referenced_p (x);
2784 }
2785
2786 /* Return true if register REGNO is a valid index register.
2787 STRICT_P is true if REG_OK_STRICT is in effect. */
2788
2789 bool
2790 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2791 {
2792 if (!HARD_REGISTER_NUM_P (regno))
2793 {
2794 if (!strict_p)
2795 return true;
2796
2797 if (!reg_renumber)
2798 return false;
2799
2800 regno = reg_renumber[regno];
2801 }
2802 return GP_REGNUM_P (regno);
2803 }
2804
2805 /* Return true if register REGNO is a valid base register for mode MODE.
2806 STRICT_P is true if REG_OK_STRICT is in effect. */
2807
2808 bool
2809 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2810 {
2811 if (!HARD_REGISTER_NUM_P (regno))
2812 {
2813 if (!strict_p)
2814 return true;
2815
2816 if (!reg_renumber)
2817 return false;
2818
2819 regno = reg_renumber[regno];
2820 }
2821
2822 /* The fake registers will be eliminated to either the stack or
2823 hard frame pointer, both of which are usually valid base registers.
2824 Reload deals with the cases where the eliminated form isn't valid. */
2825 return (GP_REGNUM_P (regno)
2826 || regno == SP_REGNUM
2827 || regno == FRAME_POINTER_REGNUM
2828 || regno == ARG_POINTER_REGNUM);
2829 }
2830
2831 /* Return true if X is a valid base register for mode MODE.
2832 STRICT_P is true if REG_OK_STRICT is in effect. */
2833
2834 static bool
2835 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2836 {
2837 if (!strict_p && GET_CODE (x) == SUBREG)
2838 x = SUBREG_REG (x);
2839
2840 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2841 }
2842
2843 /* Return true if address offset is a valid index. If it is, fill in INFO
2844 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2845
2846 static bool
2847 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2848 enum machine_mode mode, bool strict_p)
2849 {
2850 enum aarch64_address_type type;
2851 rtx index;
2852 int shift;
2853
2854 /* (reg:P) */
2855 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2856 && GET_MODE (x) == Pmode)
2857 {
2858 type = ADDRESS_REG_REG;
2859 index = x;
2860 shift = 0;
2861 }
2862 /* (sign_extend:DI (reg:SI)) */
2863 else if ((GET_CODE (x) == SIGN_EXTEND
2864 || GET_CODE (x) == ZERO_EXTEND)
2865 && GET_MODE (x) == DImode
2866 && GET_MODE (XEXP (x, 0)) == SImode)
2867 {
2868 type = (GET_CODE (x) == SIGN_EXTEND)
2869 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2870 index = XEXP (x, 0);
2871 shift = 0;
2872 }
2873 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2874 else if (GET_CODE (x) == MULT
2875 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2876 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2877 && GET_MODE (XEXP (x, 0)) == DImode
2878 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2879 && CONST_INT_P (XEXP (x, 1)))
2880 {
2881 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2882 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2883 index = XEXP (XEXP (x, 0), 0);
2884 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2885 }
2886 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2887 else if (GET_CODE (x) == ASHIFT
2888 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2889 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2890 && GET_MODE (XEXP (x, 0)) == DImode
2891 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2892 && CONST_INT_P (XEXP (x, 1)))
2893 {
2894 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2895 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2896 index = XEXP (XEXP (x, 0), 0);
2897 shift = INTVAL (XEXP (x, 1));
2898 }
2899 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2900 else if ((GET_CODE (x) == SIGN_EXTRACT
2901 || GET_CODE (x) == ZERO_EXTRACT)
2902 && GET_MODE (x) == DImode
2903 && GET_CODE (XEXP (x, 0)) == MULT
2904 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2905 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2906 {
2907 type = (GET_CODE (x) == SIGN_EXTRACT)
2908 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2909 index = XEXP (XEXP (x, 0), 0);
2910 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2911 if (INTVAL (XEXP (x, 1)) != 32 + shift
2912 || INTVAL (XEXP (x, 2)) != 0)
2913 shift = -1;
2914 }
2915 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2916 (const_int 0xffffffff<<shift)) */
2917 else if (GET_CODE (x) == AND
2918 && GET_MODE (x) == DImode
2919 && GET_CODE (XEXP (x, 0)) == MULT
2920 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2921 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2922 && CONST_INT_P (XEXP (x, 1)))
2923 {
2924 type = ADDRESS_REG_UXTW;
2925 index = XEXP (XEXP (x, 0), 0);
2926 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2927 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2928 shift = -1;
2929 }
2930 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2931 else if ((GET_CODE (x) == SIGN_EXTRACT
2932 || GET_CODE (x) == ZERO_EXTRACT)
2933 && GET_MODE (x) == DImode
2934 && GET_CODE (XEXP (x, 0)) == ASHIFT
2935 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2936 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2937 {
2938 type = (GET_CODE (x) == SIGN_EXTRACT)
2939 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2940 index = XEXP (XEXP (x, 0), 0);
2941 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2942 if (INTVAL (XEXP (x, 1)) != 32 + shift
2943 || INTVAL (XEXP (x, 2)) != 0)
2944 shift = -1;
2945 }
2946 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2947 (const_int 0xffffffff<<shift)) */
2948 else if (GET_CODE (x) == AND
2949 && GET_MODE (x) == DImode
2950 && GET_CODE (XEXP (x, 0)) == ASHIFT
2951 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2952 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2953 && CONST_INT_P (XEXP (x, 1)))
2954 {
2955 type = ADDRESS_REG_UXTW;
2956 index = XEXP (XEXP (x, 0), 0);
2957 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2958 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2959 shift = -1;
2960 }
2961 /* (mult:P (reg:P) (const_int scale)) */
2962 else if (GET_CODE (x) == MULT
2963 && GET_MODE (x) == Pmode
2964 && GET_MODE (XEXP (x, 0)) == Pmode
2965 && CONST_INT_P (XEXP (x, 1)))
2966 {
2967 type = ADDRESS_REG_REG;
2968 index = XEXP (x, 0);
2969 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2970 }
2971 /* (ashift:P (reg:P) (const_int shift)) */
2972 else if (GET_CODE (x) == ASHIFT
2973 && GET_MODE (x) == Pmode
2974 && GET_MODE (XEXP (x, 0)) == Pmode
2975 && CONST_INT_P (XEXP (x, 1)))
2976 {
2977 type = ADDRESS_REG_REG;
2978 index = XEXP (x, 0);
2979 shift = INTVAL (XEXP (x, 1));
2980 }
2981 else
2982 return false;
2983
2984 if (GET_CODE (index) == SUBREG)
2985 index = SUBREG_REG (index);
2986
2987 if ((shift == 0 ||
2988 (shift > 0 && shift <= 3
2989 && (1 << shift) == GET_MODE_SIZE (mode)))
2990 && REG_P (index)
2991 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2992 {
2993 info->type = type;
2994 info->offset = index;
2995 info->shift = shift;
2996 return true;
2997 }
2998
2999 return false;
3000 }
3001
3002 static inline bool
3003 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3004 {
3005 return (offset >= -64 * GET_MODE_SIZE (mode)
3006 && offset < 64 * GET_MODE_SIZE (mode)
3007 && offset % GET_MODE_SIZE (mode) == 0);
3008 }
3009
3010 static inline bool
3011 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3012 HOST_WIDE_INT offset)
3013 {
3014 return offset >= -256 && offset < 256;
3015 }
3016
3017 static inline bool
3018 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3019 {
3020 return (offset >= 0
3021 && offset < 4096 * GET_MODE_SIZE (mode)
3022 && offset % GET_MODE_SIZE (mode) == 0);
3023 }
3024
3025 /* Return true if X is a valid address for machine mode MODE. If it is,
3026 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3027 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3028
3029 static bool
3030 aarch64_classify_address (struct aarch64_address_info *info,
3031 rtx x, enum machine_mode mode,
3032 RTX_CODE outer_code, bool strict_p)
3033 {
3034 enum rtx_code code = GET_CODE (x);
3035 rtx op0, op1;
3036 bool allow_reg_index_p =
3037 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3038
3039 /* Don't support anything other than POST_INC or REG addressing for
3040 AdvSIMD. */
3041 if (aarch64_vector_mode_p (mode)
3042 && (code != POST_INC && code != REG))
3043 return false;
3044
3045 switch (code)
3046 {
3047 case REG:
3048 case SUBREG:
3049 info->type = ADDRESS_REG_IMM;
3050 info->base = x;
3051 info->offset = const0_rtx;
3052 return aarch64_base_register_rtx_p (x, strict_p);
3053
3054 case PLUS:
3055 op0 = XEXP (x, 0);
3056 op1 = XEXP (x, 1);
3057 if (GET_MODE_SIZE (mode) != 0
3058 && CONST_INT_P (op1)
3059 && aarch64_base_register_rtx_p (op0, strict_p))
3060 {
3061 HOST_WIDE_INT offset = INTVAL (op1);
3062
3063 info->type = ADDRESS_REG_IMM;
3064 info->base = op0;
3065 info->offset = op1;
3066
3067 /* TImode and TFmode values are allowed in both pairs of X
3068 registers and individual Q registers. The available
3069 address modes are:
3070 X,X: 7-bit signed scaled offset
3071 Q: 9-bit signed offset
3072 We conservatively require an offset representable in either mode.
3073 */
3074 if (mode == TImode || mode == TFmode)
3075 return (offset_7bit_signed_scaled_p (mode, offset)
3076 && offset_9bit_signed_unscaled_p (mode, offset));
3077
3078 if (outer_code == PARALLEL)
3079 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3080 && offset_7bit_signed_scaled_p (mode, offset));
3081 else
3082 return (offset_9bit_signed_unscaled_p (mode, offset)
3083 || offset_12bit_unsigned_scaled_p (mode, offset));
3084 }
3085
3086 if (allow_reg_index_p)
3087 {
3088 /* Look for base + (scaled/extended) index register. */
3089 if (aarch64_base_register_rtx_p (op0, strict_p)
3090 && aarch64_classify_index (info, op1, mode, strict_p))
3091 {
3092 info->base = op0;
3093 return true;
3094 }
3095 if (aarch64_base_register_rtx_p (op1, strict_p)
3096 && aarch64_classify_index (info, op0, mode, strict_p))
3097 {
3098 info->base = op1;
3099 return true;
3100 }
3101 }
3102
3103 return false;
3104
3105 case POST_INC:
3106 case POST_DEC:
3107 case PRE_INC:
3108 case PRE_DEC:
3109 info->type = ADDRESS_REG_WB;
3110 info->base = XEXP (x, 0);
3111 info->offset = NULL_RTX;
3112 return aarch64_base_register_rtx_p (info->base, strict_p);
3113
3114 case POST_MODIFY:
3115 case PRE_MODIFY:
3116 info->type = ADDRESS_REG_WB;
3117 info->base = XEXP (x, 0);
3118 if (GET_CODE (XEXP (x, 1)) == PLUS
3119 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3120 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3121 && aarch64_base_register_rtx_p (info->base, strict_p))
3122 {
3123 HOST_WIDE_INT offset;
3124 info->offset = XEXP (XEXP (x, 1), 1);
3125 offset = INTVAL (info->offset);
3126
3127 /* TImode and TFmode values are allowed in both pairs of X
3128 registers and individual Q registers. The available
3129 address modes are:
3130 X,X: 7-bit signed scaled offset
3131 Q: 9-bit signed offset
3132 We conservatively require an offset representable in either mode.
3133 */
3134 if (mode == TImode || mode == TFmode)
3135 return (offset_7bit_signed_scaled_p (mode, offset)
3136 && offset_9bit_signed_unscaled_p (mode, offset));
3137
3138 if (outer_code == PARALLEL)
3139 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3140 && offset_7bit_signed_scaled_p (mode, offset));
3141 else
3142 return offset_9bit_signed_unscaled_p (mode, offset);
3143 }
3144 return false;
3145
3146 case CONST:
3147 case SYMBOL_REF:
3148 case LABEL_REF:
3149 /* load literal: pc-relative constant pool entry. Only supported
3150 for SI mode or larger. */
3151 info->type = ADDRESS_SYMBOLIC;
3152 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3153 {
3154 rtx sym, addend;
3155
3156 split_const (x, &sym, &addend);
3157 return (GET_CODE (sym) == LABEL_REF
3158 || (GET_CODE (sym) == SYMBOL_REF
3159 && CONSTANT_POOL_ADDRESS_P (sym)));
3160 }
3161 return false;
3162
3163 case LO_SUM:
3164 info->type = ADDRESS_LO_SUM;
3165 info->base = XEXP (x, 0);
3166 info->offset = XEXP (x, 1);
3167 if (allow_reg_index_p
3168 && aarch64_base_register_rtx_p (info->base, strict_p))
3169 {
3170 rtx sym, offs;
3171 split_const (info->offset, &sym, &offs);
3172 if (GET_CODE (sym) == SYMBOL_REF
3173 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3174 == SYMBOL_SMALL_ABSOLUTE))
3175 {
3176 /* The symbol and offset must be aligned to the access size. */
3177 unsigned int align;
3178 unsigned int ref_size;
3179
3180 if (CONSTANT_POOL_ADDRESS_P (sym))
3181 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3182 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3183 {
3184 tree exp = SYMBOL_REF_DECL (sym);
3185 align = TYPE_ALIGN (TREE_TYPE (exp));
3186 align = CONSTANT_ALIGNMENT (exp, align);
3187 }
3188 else if (SYMBOL_REF_DECL (sym))
3189 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3190 else
3191 align = BITS_PER_UNIT;
3192
3193 ref_size = GET_MODE_SIZE (mode);
3194 if (ref_size == 0)
3195 ref_size = GET_MODE_SIZE (DImode);
3196
3197 return ((INTVAL (offs) & (ref_size - 1)) == 0
3198 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3199 }
3200 }
3201 return false;
3202
3203 default:
3204 return false;
3205 }
3206 }
3207
3208 bool
3209 aarch64_symbolic_address_p (rtx x)
3210 {
3211 rtx offset;
3212
3213 split_const (x, &x, &offset);
3214 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3215 }
3216
3217 /* Classify the base of symbolic expression X, given that X appears in
3218 context CONTEXT. */
3219
3220 enum aarch64_symbol_type
3221 aarch64_classify_symbolic_expression (rtx x,
3222 enum aarch64_symbol_context context)
3223 {
3224 rtx offset;
3225
3226 split_const (x, &x, &offset);
3227 return aarch64_classify_symbol (x, context);
3228 }
3229
3230
3231 /* Return TRUE if X is a legitimate address for accessing memory in
3232 mode MODE. */
3233 static bool
3234 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3235 {
3236 struct aarch64_address_info addr;
3237
3238 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3239 }
3240
3241 /* Return TRUE if X is a legitimate address for accessing memory in
3242 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3243 pair operation. */
3244 bool
3245 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3246 RTX_CODE outer_code, bool strict_p)
3247 {
3248 struct aarch64_address_info addr;
3249
3250 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3251 }
3252
3253 /* Return TRUE if rtx X is immediate constant 0.0 */
3254 bool
3255 aarch64_float_const_zero_rtx_p (rtx x)
3256 {
3257 REAL_VALUE_TYPE r;
3258
3259 if (GET_MODE (x) == VOIDmode)
3260 return false;
3261
3262 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3263 if (REAL_VALUE_MINUS_ZERO (r))
3264 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3265 return REAL_VALUES_EQUAL (r, dconst0);
3266 }
3267
3268 /* Return the fixed registers used for condition codes. */
3269
3270 static bool
3271 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3272 {
3273 *p1 = CC_REGNUM;
3274 *p2 = INVALID_REGNUM;
3275 return true;
3276 }
3277
3278 enum machine_mode
3279 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3280 {
3281 /* All floating point compares return CCFP if it is an equality
3282 comparison, and CCFPE otherwise. */
3283 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3284 {
3285 switch (code)
3286 {
3287 case EQ:
3288 case NE:
3289 case UNORDERED:
3290 case ORDERED:
3291 case UNLT:
3292 case UNLE:
3293 case UNGT:
3294 case UNGE:
3295 case UNEQ:
3296 case LTGT:
3297 return CCFPmode;
3298
3299 case LT:
3300 case LE:
3301 case GT:
3302 case GE:
3303 return CCFPEmode;
3304
3305 default:
3306 gcc_unreachable ();
3307 }
3308 }
3309
3310 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3311 && y == const0_rtx
3312 && (code == EQ || code == NE || code == LT || code == GE)
3313 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3314 || GET_CODE (x) == NEG))
3315 return CC_NZmode;
3316
3317 /* A compare with a shifted or negated operand. Because of canonicalization,
3318 the comparison will have to be swapped when we emit the assembly
3319 code. */
3320 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3321 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3322 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3323 || GET_CODE (x) == LSHIFTRT
3324 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3325 || GET_CODE (x) == NEG))
3326 return CC_SWPmode;
3327
3328 /* A compare of a mode narrower than SI mode against zero can be done
3329 by extending the value in the comparison. */
3330 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3331 && y == const0_rtx)
3332 /* Only use sign-extension if we really need it. */
3333 return ((code == GT || code == GE || code == LE || code == LT)
3334 ? CC_SESWPmode : CC_ZESWPmode);
3335
3336 /* For everything else, return CCmode. */
3337 return CCmode;
3338 }
3339
3340 static unsigned
3341 aarch64_get_condition_code (rtx x)
3342 {
3343 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3344 enum rtx_code comp_code = GET_CODE (x);
3345
3346 if (GET_MODE_CLASS (mode) != MODE_CC)
3347 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3348
3349 switch (mode)
3350 {
3351 case CCFPmode:
3352 case CCFPEmode:
3353 switch (comp_code)
3354 {
3355 case GE: return AARCH64_GE;
3356 case GT: return AARCH64_GT;
3357 case LE: return AARCH64_LS;
3358 case LT: return AARCH64_MI;
3359 case NE: return AARCH64_NE;
3360 case EQ: return AARCH64_EQ;
3361 case ORDERED: return AARCH64_VC;
3362 case UNORDERED: return AARCH64_VS;
3363 case UNLT: return AARCH64_LT;
3364 case UNLE: return AARCH64_LE;
3365 case UNGT: return AARCH64_HI;
3366 case UNGE: return AARCH64_PL;
3367 default: gcc_unreachable ();
3368 }
3369 break;
3370
3371 case CCmode:
3372 switch (comp_code)
3373 {
3374 case NE: return AARCH64_NE;
3375 case EQ: return AARCH64_EQ;
3376 case GE: return AARCH64_GE;
3377 case GT: return AARCH64_GT;
3378 case LE: return AARCH64_LE;
3379 case LT: return AARCH64_LT;
3380 case GEU: return AARCH64_CS;
3381 case GTU: return AARCH64_HI;
3382 case LEU: return AARCH64_LS;
3383 case LTU: return AARCH64_CC;
3384 default: gcc_unreachable ();
3385 }
3386 break;
3387
3388 case CC_SWPmode:
3389 case CC_ZESWPmode:
3390 case CC_SESWPmode:
3391 switch (comp_code)
3392 {
3393 case NE: return AARCH64_NE;
3394 case EQ: return AARCH64_EQ;
3395 case GE: return AARCH64_LE;
3396 case GT: return AARCH64_LT;
3397 case LE: return AARCH64_GE;
3398 case LT: return AARCH64_GT;
3399 case GEU: return AARCH64_LS;
3400 case GTU: return AARCH64_CC;
3401 case LEU: return AARCH64_CS;
3402 case LTU: return AARCH64_HI;
3403 default: gcc_unreachable ();
3404 }
3405 break;
3406
3407 case CC_NZmode:
3408 switch (comp_code)
3409 {
3410 case NE: return AARCH64_NE;
3411 case EQ: return AARCH64_EQ;
3412 case GE: return AARCH64_PL;
3413 case LT: return AARCH64_MI;
3414 default: gcc_unreachable ();
3415 }
3416 break;
3417
3418 default:
3419 gcc_unreachable ();
3420 break;
3421 }
3422 }
3423
3424 static unsigned
3425 bit_count (unsigned HOST_WIDE_INT value)
3426 {
3427 unsigned count = 0;
3428
3429 while (value)
3430 {
3431 count++;
3432 value &= value - 1;
3433 }
3434
3435 return count;
3436 }
3437
3438 void
3439 aarch64_print_operand (FILE *f, rtx x, char code)
3440 {
3441 switch (code)
3442 {
3443 /* An integer or symbol address without a preceding # sign. */
3444 case 'c':
3445 switch (GET_CODE (x))
3446 {
3447 case CONST_INT:
3448 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3449 break;
3450
3451 case SYMBOL_REF:
3452 output_addr_const (f, x);
3453 break;
3454
3455 case CONST:
3456 if (GET_CODE (XEXP (x, 0)) == PLUS
3457 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3458 {
3459 output_addr_const (f, x);
3460 break;
3461 }
3462 /* Fall through. */
3463
3464 default:
3465 output_operand_lossage ("Unsupported operand for code '%c'", code);
3466 }
3467 break;
3468
3469 case 'e':
3470 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3471 {
3472 int n;
3473
3474 if (GET_CODE (x) != CONST_INT
3475 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3476 {
3477 output_operand_lossage ("invalid operand for '%%%c'", code);
3478 return;
3479 }
3480
3481 switch (n)
3482 {
3483 case 3:
3484 fputc ('b', f);
3485 break;
3486 case 4:
3487 fputc ('h', f);
3488 break;
3489 case 5:
3490 fputc ('w', f);
3491 break;
3492 default:
3493 output_operand_lossage ("invalid operand for '%%%c'", code);
3494 return;
3495 }
3496 }
3497 break;
3498
3499 case 'p':
3500 {
3501 int n;
3502
3503 /* Print N such that 2^N == X. */
3504 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3505 {
3506 output_operand_lossage ("invalid operand for '%%%c'", code);
3507 return;
3508 }
3509
3510 asm_fprintf (f, "%d", n);
3511 }
3512 break;
3513
3514 case 'P':
3515 /* Print the number of non-zero bits in X (a const_int). */
3516 if (GET_CODE (x) != CONST_INT)
3517 {
3518 output_operand_lossage ("invalid operand for '%%%c'", code);
3519 return;
3520 }
3521
3522 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3523 break;
3524
3525 case 'H':
3526 /* Print the higher numbered register of a pair (TImode) of regs. */
3527 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3528 {
3529 output_operand_lossage ("invalid operand for '%%%c'", code);
3530 return;
3531 }
3532
3533 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3534 break;
3535
3536 case 'm':
3537 /* Print a condition (eq, ne, etc). */
3538
3539 /* CONST_TRUE_RTX means always -- that's the default. */
3540 if (x == const_true_rtx)
3541 return;
3542
3543 if (!COMPARISON_P (x))
3544 {
3545 output_operand_lossage ("invalid operand for '%%%c'", code);
3546 return;
3547 }
3548
3549 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3550 break;
3551
3552 case 'M':
3553 /* Print the inverse of a condition (eq <-> ne, etc). */
3554
3555 /* CONST_TRUE_RTX means never -- that's the default. */
3556 if (x == const_true_rtx)
3557 {
3558 fputs ("nv", f);
3559 return;
3560 }
3561
3562 if (!COMPARISON_P (x))
3563 {
3564 output_operand_lossage ("invalid operand for '%%%c'", code);
3565 return;
3566 }
3567
3568 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3569 (aarch64_get_condition_code (x))], f);
3570 break;
3571
3572 case 'b':
3573 case 'h':
3574 case 's':
3575 case 'd':
3576 case 'q':
3577 /* Print a scalar FP/SIMD register name. */
3578 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3579 {
3580 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3581 return;
3582 }
3583 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3584 break;
3585
3586 case 'S':
3587 case 'T':
3588 case 'U':
3589 case 'V':
3590 /* Print the first FP/SIMD register name in a list. */
3591 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3592 {
3593 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3594 return;
3595 }
3596 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3597 break;
3598
3599 case 'X':
3600 /* Print bottom 16 bits of integer constant in hex. */
3601 if (GET_CODE (x) != CONST_INT)
3602 {
3603 output_operand_lossage ("invalid operand for '%%%c'", code);
3604 return;
3605 }
3606 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3607 break;
3608
3609 case 'w':
3610 case 'x':
3611 /* Print a general register name or the zero register (32-bit or
3612 64-bit). */
3613 if (x == const0_rtx
3614 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3615 {
3616 asm_fprintf (f, "%czr", code);
3617 break;
3618 }
3619
3620 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3621 {
3622 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3623 break;
3624 }
3625
3626 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3627 {
3628 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3629 break;
3630 }
3631
3632 /* Fall through */
3633
3634 case 0:
3635 /* Print a normal operand, if it's a general register, then we
3636 assume DImode. */
3637 if (x == NULL)
3638 {
3639 output_operand_lossage ("missing operand");
3640 return;
3641 }
3642
3643 switch (GET_CODE (x))
3644 {
3645 case REG:
3646 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3647 break;
3648
3649 case MEM:
3650 aarch64_memory_reference_mode = GET_MODE (x);
3651 output_address (XEXP (x, 0));
3652 break;
3653
3654 case LABEL_REF:
3655 case SYMBOL_REF:
3656 output_addr_const (asm_out_file, x);
3657 break;
3658
3659 case CONST_INT:
3660 asm_fprintf (f, "%wd", INTVAL (x));
3661 break;
3662
3663 case CONST_VECTOR:
3664 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3665 {
3666 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3667 HOST_WIDE_INT_MIN,
3668 HOST_WIDE_INT_MAX));
3669 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3670 }
3671 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3672 {
3673 fputc ('0', f);
3674 }
3675 else
3676 gcc_unreachable ();
3677 break;
3678
3679 case CONST_DOUBLE:
3680 /* CONST_DOUBLE can represent a double-width integer.
3681 In this case, the mode of x is VOIDmode. */
3682 if (GET_MODE (x) == VOIDmode)
3683 ; /* Do Nothing. */
3684 else if (aarch64_float_const_zero_rtx_p (x))
3685 {
3686 fputc ('0', f);
3687 break;
3688 }
3689 else if (aarch64_float_const_representable_p (x))
3690 {
3691 #define buf_size 20
3692 char float_buf[buf_size] = {'\0'};
3693 REAL_VALUE_TYPE r;
3694 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3695 real_to_decimal_for_mode (float_buf, &r,
3696 buf_size, buf_size,
3697 1, GET_MODE (x));
3698 asm_fprintf (asm_out_file, "%s", float_buf);
3699 break;
3700 #undef buf_size
3701 }
3702 output_operand_lossage ("invalid constant");
3703 return;
3704 default:
3705 output_operand_lossage ("invalid operand");
3706 return;
3707 }
3708 break;
3709
3710 case 'A':
3711 if (GET_CODE (x) == HIGH)
3712 x = XEXP (x, 0);
3713
3714 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3715 {
3716 case SYMBOL_SMALL_GOT:
3717 asm_fprintf (asm_out_file, ":got:");
3718 break;
3719
3720 case SYMBOL_SMALL_TLSGD:
3721 asm_fprintf (asm_out_file, ":tlsgd:");
3722 break;
3723
3724 case SYMBOL_SMALL_TLSDESC:
3725 asm_fprintf (asm_out_file, ":tlsdesc:");
3726 break;
3727
3728 case SYMBOL_SMALL_GOTTPREL:
3729 asm_fprintf (asm_out_file, ":gottprel:");
3730 break;
3731
3732 case SYMBOL_SMALL_TPREL:
3733 asm_fprintf (asm_out_file, ":tprel:");
3734 break;
3735
3736 case SYMBOL_TINY_GOT:
3737 gcc_unreachable ();
3738 break;
3739
3740 default:
3741 break;
3742 }
3743 output_addr_const (asm_out_file, x);
3744 break;
3745
3746 case 'L':
3747 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3748 {
3749 case SYMBOL_SMALL_GOT:
3750 asm_fprintf (asm_out_file, ":lo12:");
3751 break;
3752
3753 case SYMBOL_SMALL_TLSGD:
3754 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3755 break;
3756
3757 case SYMBOL_SMALL_TLSDESC:
3758 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3759 break;
3760
3761 case SYMBOL_SMALL_GOTTPREL:
3762 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3763 break;
3764
3765 case SYMBOL_SMALL_TPREL:
3766 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3767 break;
3768
3769 case SYMBOL_TINY_GOT:
3770 asm_fprintf (asm_out_file, ":got:");
3771 break;
3772
3773 default:
3774 break;
3775 }
3776 output_addr_const (asm_out_file, x);
3777 break;
3778
3779 case 'G':
3780
3781 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3782 {
3783 case SYMBOL_SMALL_TPREL:
3784 asm_fprintf (asm_out_file, ":tprel_hi12:");
3785 break;
3786 default:
3787 break;
3788 }
3789 output_addr_const (asm_out_file, x);
3790 break;
3791
3792 default:
3793 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3794 return;
3795 }
3796 }
3797
3798 void
3799 aarch64_print_operand_address (FILE *f, rtx x)
3800 {
3801 struct aarch64_address_info addr;
3802
3803 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3804 MEM, true))
3805 switch (addr.type)
3806 {
3807 case ADDRESS_REG_IMM:
3808 if (addr.offset == const0_rtx)
3809 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3810 else
3811 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3812 INTVAL (addr.offset));
3813 return;
3814
3815 case ADDRESS_REG_REG:
3816 if (addr.shift == 0)
3817 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3818 reg_names [REGNO (addr.offset)]);
3819 else
3820 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3821 reg_names [REGNO (addr.offset)], addr.shift);
3822 return;
3823
3824 case ADDRESS_REG_UXTW:
3825 if (addr.shift == 0)
3826 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3827 REGNO (addr.offset) - R0_REGNUM);
3828 else
3829 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3830 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3831 return;
3832
3833 case ADDRESS_REG_SXTW:
3834 if (addr.shift == 0)
3835 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3836 REGNO (addr.offset) - R0_REGNUM);
3837 else
3838 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3839 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3840 return;
3841
3842 case ADDRESS_REG_WB:
3843 switch (GET_CODE (x))
3844 {
3845 case PRE_INC:
3846 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3847 GET_MODE_SIZE (aarch64_memory_reference_mode));
3848 return;
3849 case POST_INC:
3850 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3851 GET_MODE_SIZE (aarch64_memory_reference_mode));
3852 return;
3853 case PRE_DEC:
3854 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3855 GET_MODE_SIZE (aarch64_memory_reference_mode));
3856 return;
3857 case POST_DEC:
3858 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3859 GET_MODE_SIZE (aarch64_memory_reference_mode));
3860 return;
3861 case PRE_MODIFY:
3862 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3863 INTVAL (addr.offset));
3864 return;
3865 case POST_MODIFY:
3866 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3867 INTVAL (addr.offset));
3868 return;
3869 default:
3870 break;
3871 }
3872 break;
3873
3874 case ADDRESS_LO_SUM:
3875 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3876 output_addr_const (f, addr.offset);
3877 asm_fprintf (f, "]");
3878 return;
3879
3880 case ADDRESS_SYMBOLIC:
3881 break;
3882 }
3883
3884 output_addr_const (f, x);
3885 }
3886
3887 bool
3888 aarch64_label_mentioned_p (rtx x)
3889 {
3890 const char *fmt;
3891 int i;
3892
3893 if (GET_CODE (x) == LABEL_REF)
3894 return true;
3895
3896 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3897 referencing instruction, but they are constant offsets, not
3898 symbols. */
3899 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3900 return false;
3901
3902 fmt = GET_RTX_FORMAT (GET_CODE (x));
3903 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3904 {
3905 if (fmt[i] == 'E')
3906 {
3907 int j;
3908
3909 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3910 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3911 return 1;
3912 }
3913 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3914 return 1;
3915 }
3916
3917 return 0;
3918 }
3919
3920 /* Implement REGNO_REG_CLASS. */
3921
3922 enum reg_class
3923 aarch64_regno_regclass (unsigned regno)
3924 {
3925 if (GP_REGNUM_P (regno))
3926 return CORE_REGS;
3927
3928 if (regno == SP_REGNUM)
3929 return STACK_REG;
3930
3931 if (regno == FRAME_POINTER_REGNUM
3932 || regno == ARG_POINTER_REGNUM)
3933 return POINTER_REGS;
3934
3935 if (FP_REGNUM_P (regno))
3936 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3937
3938 return NO_REGS;
3939 }
3940
3941 /* Try a machine-dependent way of reloading an illegitimate address
3942 operand. If we find one, push the reload and return the new rtx. */
3943
3944 rtx
3945 aarch64_legitimize_reload_address (rtx *x_p,
3946 enum machine_mode mode,
3947 int opnum, int type,
3948 int ind_levels ATTRIBUTE_UNUSED)
3949 {
3950 rtx x = *x_p;
3951
3952 /* Do not allow mem (plus (reg, const)) if vector mode. */
3953 if (aarch64_vector_mode_p (mode)
3954 && GET_CODE (x) == PLUS
3955 && REG_P (XEXP (x, 0))
3956 && CONST_INT_P (XEXP (x, 1)))
3957 {
3958 rtx orig_rtx = x;
3959 x = copy_rtx (x);
3960 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3961 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3962 opnum, (enum reload_type) type);
3963 return x;
3964 }
3965
3966 /* We must recognize output that we have already generated ourselves. */
3967 if (GET_CODE (x) == PLUS
3968 && GET_CODE (XEXP (x, 0)) == PLUS
3969 && REG_P (XEXP (XEXP (x, 0), 0))
3970 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3971 && CONST_INT_P (XEXP (x, 1)))
3972 {
3973 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3974 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3975 opnum, (enum reload_type) type);
3976 return x;
3977 }
3978
3979 /* We wish to handle large displacements off a base register by splitting
3980 the addend across an add and the mem insn. This can cut the number of
3981 extra insns needed from 3 to 1. It is only useful for load/store of a
3982 single register with 12 bit offset field. */
3983 if (GET_CODE (x) == PLUS
3984 && REG_P (XEXP (x, 0))
3985 && CONST_INT_P (XEXP (x, 1))
3986 && HARD_REGISTER_P (XEXP (x, 0))
3987 && mode != TImode
3988 && mode != TFmode
3989 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3990 {
3991 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3992 HOST_WIDE_INT low = val & 0xfff;
3993 HOST_WIDE_INT high = val - low;
3994 HOST_WIDE_INT offs;
3995 rtx cst;
3996 enum machine_mode xmode = GET_MODE (x);
3997
3998 /* In ILP32, xmode can be either DImode or SImode. */
3999 gcc_assert (xmode == DImode || xmode == SImode);
4000
4001 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4002 BLKmode alignment. */
4003 if (GET_MODE_SIZE (mode) == 0)
4004 return NULL_RTX;
4005
4006 offs = low % GET_MODE_SIZE (mode);
4007
4008 /* Align misaligned offset by adjusting high part to compensate. */
4009 if (offs != 0)
4010 {
4011 if (aarch64_uimm12_shift (high + offs))
4012 {
4013 /* Align down. */
4014 low = low - offs;
4015 high = high + offs;
4016 }
4017 else
4018 {
4019 /* Align up. */
4020 offs = GET_MODE_SIZE (mode) - offs;
4021 low = low + offs;
4022 high = high + (low & 0x1000) - offs;
4023 low &= 0xfff;
4024 }
4025 }
4026
4027 /* Check for overflow. */
4028 if (high + low != val)
4029 return NULL_RTX;
4030
4031 cst = GEN_INT (high);
4032 if (!aarch64_uimm12_shift (high))
4033 cst = force_const_mem (xmode, cst);
4034
4035 /* Reload high part into base reg, leaving the low part
4036 in the mem instruction.
4037 Note that replacing this gen_rtx_PLUS with plus_constant is
4038 wrong in this case because we rely on the
4039 (plus (plus reg c1) c2) structure being preserved so that
4040 XEXP (*p, 0) in push_reload below uses the correct term. */
4041 x = gen_rtx_PLUS (xmode,
4042 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4043 GEN_INT (low));
4044
4045 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4046 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4047 opnum, (enum reload_type) type);
4048 return x;
4049 }
4050
4051 return NULL_RTX;
4052 }
4053
4054
4055 static reg_class_t
4056 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4057 reg_class_t rclass,
4058 enum machine_mode mode,
4059 secondary_reload_info *sri)
4060 {
4061 /* Without the TARGET_SIMD instructions we cannot move a Q register
4062 to a Q register directly. We need a scratch. */
4063 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4064 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4065 && reg_class_subset_p (rclass, FP_REGS))
4066 {
4067 if (mode == TFmode)
4068 sri->icode = CODE_FOR_aarch64_reload_movtf;
4069 else if (mode == TImode)
4070 sri->icode = CODE_FOR_aarch64_reload_movti;
4071 return NO_REGS;
4072 }
4073
4074 /* A TFmode or TImode memory access should be handled via an FP_REGS
4075 because AArch64 has richer addressing modes for LDR/STR instructions
4076 than LDP/STP instructions. */
4077 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4078 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4079 return FP_REGS;
4080
4081 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4082 return CORE_REGS;
4083
4084 return NO_REGS;
4085 }
4086
4087 static bool
4088 aarch64_can_eliminate (const int from, const int to)
4089 {
4090 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4091 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4092
4093 if (frame_pointer_needed)
4094 {
4095 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4096 return true;
4097 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4098 return false;
4099 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4100 && !cfun->calls_alloca)
4101 return true;
4102 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4103 return true;
4104 return false;
4105 }
4106 else
4107 {
4108 /* If we decided that we didn't need a leaf frame pointer but then used
4109 LR in the function, then we'll want a frame pointer after all, so
4110 prevent this elimination to ensure a frame pointer is used.
4111
4112 NOTE: the original value of flag_omit_frame_pointer gets trashed
4113 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4114 of faked_omit_frame_pointer here (which is true when we always
4115 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4116 pointers when LR is clobbered). */
4117 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4118 && df_regs_ever_live_p (LR_REGNUM)
4119 && faked_omit_frame_pointer)
4120 return false;
4121 }
4122
4123 return true;
4124 }
4125
4126 HOST_WIDE_INT
4127 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4128 {
4129 HOST_WIDE_INT frame_size;
4130 HOST_WIDE_INT offset;
4131
4132 aarch64_layout_frame ();
4133 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4134 + crtl->outgoing_args_size
4135 + cfun->machine->saved_varargs_size);
4136
4137 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4138 offset = frame_size;
4139
4140 if (to == HARD_FRAME_POINTER_REGNUM)
4141 {
4142 if (from == ARG_POINTER_REGNUM)
4143 return offset - crtl->outgoing_args_size;
4144
4145 if (from == FRAME_POINTER_REGNUM)
4146 return cfun->machine->frame.saved_regs_size;
4147 }
4148
4149 if (to == STACK_POINTER_REGNUM)
4150 {
4151 if (from == FRAME_POINTER_REGNUM)
4152 {
4153 HOST_WIDE_INT elim = crtl->outgoing_args_size
4154 + cfun->machine->frame.saved_regs_size
4155 - cfun->machine->frame.fp_lr_offset;
4156 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4157 return elim;
4158 }
4159 }
4160
4161 return offset;
4162 }
4163
4164
4165 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4166 previous frame. */
4167
4168 rtx
4169 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4170 {
4171 if (count != 0)
4172 return const0_rtx;
4173 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4174 }
4175
4176
4177 static void
4178 aarch64_asm_trampoline_template (FILE *f)
4179 {
4180 if (TARGET_ILP32)
4181 {
4182 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4183 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4184 }
4185 else
4186 {
4187 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4188 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4189 }
4190 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4191 assemble_aligned_integer (4, const0_rtx);
4192 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4193 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4194 }
4195
4196 static void
4197 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4198 {
4199 rtx fnaddr, mem, a_tramp;
4200 const int tramp_code_sz = 16;
4201
4202 /* Don't need to copy the trailing D-words, we fill those in below. */
4203 emit_block_move (m_tramp, assemble_trampoline_template (),
4204 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4205 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4206 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4207 if (GET_MODE (fnaddr) != ptr_mode)
4208 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4209 emit_move_insn (mem, fnaddr);
4210
4211 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4212 emit_move_insn (mem, chain_value);
4213
4214 /* XXX We should really define a "clear_cache" pattern and use
4215 gen_clear_cache(). */
4216 a_tramp = XEXP (m_tramp, 0);
4217 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4218 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4219 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4220 ptr_mode);
4221 }
4222
4223 static unsigned char
4224 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4225 {
4226 switch (regclass)
4227 {
4228 case CORE_REGS:
4229 case POINTER_REGS:
4230 case GENERAL_REGS:
4231 case ALL_REGS:
4232 case FP_REGS:
4233 case FP_LO_REGS:
4234 return
4235 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4236 (GET_MODE_SIZE (mode) + 7) / 8;
4237 case STACK_REG:
4238 return 1;
4239
4240 case NO_REGS:
4241 return 0;
4242
4243 default:
4244 break;
4245 }
4246 gcc_unreachable ();
4247 }
4248
4249 static reg_class_t
4250 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4251 {
4252 if (regclass == POINTER_REGS)
4253 return GENERAL_REGS;
4254
4255 if (regclass == STACK_REG)
4256 {
4257 if (REG_P(x)
4258 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4259 return regclass;
4260
4261 return NO_REGS;
4262 }
4263
4264 /* If it's an integer immediate that MOVI can't handle, then
4265 FP_REGS is not an option, so we return NO_REGS instead. */
4266 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4267 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4268 return NO_REGS;
4269
4270 /* Register eliminiation can result in a request for
4271 SP+constant->FP_REGS. We cannot support such operations which
4272 use SP as source and an FP_REG as destination, so reject out
4273 right now. */
4274 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4275 {
4276 rtx lhs = XEXP (x, 0);
4277
4278 /* Look through a possible SUBREG introduced by ILP32. */
4279 if (GET_CODE (lhs) == SUBREG)
4280 lhs = SUBREG_REG (lhs);
4281
4282 gcc_assert (REG_P (lhs));
4283 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4284 POINTER_REGS));
4285 return NO_REGS;
4286 }
4287
4288 return regclass;
4289 }
4290
4291 void
4292 aarch64_asm_output_labelref (FILE* f, const char *name)
4293 {
4294 asm_fprintf (f, "%U%s", name);
4295 }
4296
4297 static void
4298 aarch64_elf_asm_constructor (rtx symbol, int priority)
4299 {
4300 if (priority == DEFAULT_INIT_PRIORITY)
4301 default_ctor_section_asm_out_constructor (symbol, priority);
4302 else
4303 {
4304 section *s;
4305 char buf[18];
4306 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4307 s = get_section (buf, SECTION_WRITE, NULL);
4308 switch_to_section (s);
4309 assemble_align (POINTER_SIZE);
4310 assemble_aligned_integer (POINTER_BYTES, symbol);
4311 }
4312 }
4313
4314 static void
4315 aarch64_elf_asm_destructor (rtx symbol, int priority)
4316 {
4317 if (priority == DEFAULT_INIT_PRIORITY)
4318 default_dtor_section_asm_out_destructor (symbol, priority);
4319 else
4320 {
4321 section *s;
4322 char buf[18];
4323 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4324 s = get_section (buf, SECTION_WRITE, NULL);
4325 switch_to_section (s);
4326 assemble_align (POINTER_SIZE);
4327 assemble_aligned_integer (POINTER_BYTES, symbol);
4328 }
4329 }
4330
4331 const char*
4332 aarch64_output_casesi (rtx *operands)
4333 {
4334 char buf[100];
4335 char label[100];
4336 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4337 int index;
4338 static const char *const patterns[4][2] =
4339 {
4340 {
4341 "ldrb\t%w3, [%0,%w1,uxtw]",
4342 "add\t%3, %4, %w3, sxtb #2"
4343 },
4344 {
4345 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4346 "add\t%3, %4, %w3, sxth #2"
4347 },
4348 {
4349 "ldr\t%w3, [%0,%w1,uxtw #2]",
4350 "add\t%3, %4, %w3, sxtw #2"
4351 },
4352 /* We assume that DImode is only generated when not optimizing and
4353 that we don't really need 64-bit address offsets. That would
4354 imply an object file with 8GB of code in a single function! */
4355 {
4356 "ldr\t%w3, [%0,%w1,uxtw #2]",
4357 "add\t%3, %4, %w3, sxtw #2"
4358 }
4359 };
4360
4361 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4362
4363 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4364
4365 gcc_assert (index >= 0 && index <= 3);
4366
4367 /* Need to implement table size reduction, by chaning the code below. */
4368 output_asm_insn (patterns[index][0], operands);
4369 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4370 snprintf (buf, sizeof (buf),
4371 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4372 output_asm_insn (buf, operands);
4373 output_asm_insn (patterns[index][1], operands);
4374 output_asm_insn ("br\t%3", operands);
4375 assemble_label (asm_out_file, label);
4376 return "";
4377 }
4378
4379
4380 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4381 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4382 operator. */
4383
4384 int
4385 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4386 {
4387 if (shift >= 0 && shift <= 3)
4388 {
4389 int size;
4390 for (size = 8; size <= 32; size *= 2)
4391 {
4392 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4393 if (mask == bits << shift)
4394 return size;
4395 }
4396 }
4397 return 0;
4398 }
4399
4400 static bool
4401 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4402 const_rtx x ATTRIBUTE_UNUSED)
4403 {
4404 /* We can't use blocks for constants when we're using a per-function
4405 constant pool. */
4406 return false;
4407 }
4408
4409 static section *
4410 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4411 rtx x ATTRIBUTE_UNUSED,
4412 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4413 {
4414 /* Force all constant pool entries into the current function section. */
4415 return function_section (current_function_decl);
4416 }
4417
4418
4419 /* Costs. */
4420
4421 /* Helper function for rtx cost calculation. Strip a shift expression
4422 from X. Returns the inner operand if successful, or the original
4423 expression on failure. */
4424 static rtx
4425 aarch64_strip_shift (rtx x)
4426 {
4427 rtx op = x;
4428
4429 if ((GET_CODE (op) == ASHIFT
4430 || GET_CODE (op) == ASHIFTRT
4431 || GET_CODE (op) == LSHIFTRT)
4432 && CONST_INT_P (XEXP (op, 1)))
4433 return XEXP (op, 0);
4434
4435 if (GET_CODE (op) == MULT
4436 && CONST_INT_P (XEXP (op, 1))
4437 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4438 return XEXP (op, 0);
4439
4440 return x;
4441 }
4442
4443 /* Helper function for rtx cost calculation. Strip a shift or extend
4444 expression from X. Returns the inner operand if successful, or the
4445 original expression on failure. We deal with a number of possible
4446 canonicalization variations here. */
4447 static rtx
4448 aarch64_strip_shift_or_extend (rtx x)
4449 {
4450 rtx op = x;
4451
4452 /* Zero and sign extraction of a widened value. */
4453 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4454 && XEXP (op, 2) == const0_rtx
4455 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4456 XEXP (op, 1)))
4457 return XEXP (XEXP (op, 0), 0);
4458
4459 /* It can also be represented (for zero-extend) as an AND with an
4460 immediate. */
4461 if (GET_CODE (op) == AND
4462 && GET_CODE (XEXP (op, 0)) == MULT
4463 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4464 && CONST_INT_P (XEXP (op, 1))
4465 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4466 INTVAL (XEXP (op, 1))) != 0)
4467 return XEXP (XEXP (op, 0), 0);
4468
4469 /* Now handle extended register, as this may also have an optional
4470 left shift by 1..4. */
4471 if (GET_CODE (op) == ASHIFT
4472 && CONST_INT_P (XEXP (op, 1))
4473 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4474 op = XEXP (op, 0);
4475
4476 if (GET_CODE (op) == ZERO_EXTEND
4477 || GET_CODE (op) == SIGN_EXTEND)
4478 op = XEXP (op, 0);
4479
4480 if (op != x)
4481 return op;
4482
4483 return aarch64_strip_shift (x);
4484 }
4485
4486 /* Calculate the cost of calculating X, storing it in *COST. Result
4487 is true if the total cost of the operation has now been calculated. */
4488 static bool
4489 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4490 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4491 {
4492 rtx op0, op1;
4493 const struct cpu_rtx_cost_table *extra_cost
4494 = aarch64_tune_params->insn_extra_cost;
4495
4496 switch (code)
4497 {
4498 case SET:
4499 op0 = SET_DEST (x);
4500 op1 = SET_SRC (x);
4501
4502 switch (GET_CODE (op0))
4503 {
4504 case MEM:
4505 if (speed)
4506 *cost += extra_cost->memory_store;
4507
4508 if (op1 != const0_rtx)
4509 *cost += rtx_cost (op1, SET, 1, speed);
4510 return true;
4511
4512 case SUBREG:
4513 if (! REG_P (SUBREG_REG (op0)))
4514 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4515 /* Fall through. */
4516 case REG:
4517 /* Cost is just the cost of the RHS of the set. */
4518 *cost += rtx_cost (op1, SET, 1, true);
4519 return true;
4520
4521 case ZERO_EXTRACT: /* Bit-field insertion. */
4522 case SIGN_EXTRACT:
4523 /* Strip any redundant widening of the RHS to meet the width of
4524 the target. */
4525 if (GET_CODE (op1) == SUBREG)
4526 op1 = SUBREG_REG (op1);
4527 if ((GET_CODE (op1) == ZERO_EXTEND
4528 || GET_CODE (op1) == SIGN_EXTEND)
4529 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4530 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4531 >= INTVAL (XEXP (op0, 1))))
4532 op1 = XEXP (op1, 0);
4533 *cost += rtx_cost (op1, SET, 1, speed);
4534 return true;
4535
4536 default:
4537 break;
4538 }
4539 return false;
4540
4541 case MEM:
4542 if (speed)
4543 *cost += extra_cost->memory_load;
4544
4545 return true;
4546
4547 case NEG:
4548 op0 = CONST0_RTX (GET_MODE (x));
4549 op1 = XEXP (x, 0);
4550 goto cost_minus;
4551
4552 case COMPARE:
4553 op0 = XEXP (x, 0);
4554 op1 = XEXP (x, 1);
4555
4556 if (op1 == const0_rtx
4557 && GET_CODE (op0) == AND)
4558 {
4559 x = op0;
4560 goto cost_logic;
4561 }
4562
4563 /* Comparisons can work if the order is swapped.
4564 Canonicalization puts the more complex operation first, but
4565 we want it in op1. */
4566 if (! (REG_P (op0)
4567 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4568 {
4569 op0 = XEXP (x, 1);
4570 op1 = XEXP (x, 0);
4571 }
4572 goto cost_minus;
4573
4574 case MINUS:
4575 op0 = XEXP (x, 0);
4576 op1 = XEXP (x, 1);
4577
4578 cost_minus:
4579 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4580 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4581 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4582 {
4583 if (op0 != const0_rtx)
4584 *cost += rtx_cost (op0, MINUS, 0, speed);
4585
4586 if (CONST_INT_P (op1))
4587 {
4588 if (!aarch64_uimm12_shift (INTVAL (op1)))
4589 *cost += rtx_cost (op1, MINUS, 1, speed);
4590 }
4591 else
4592 {
4593 op1 = aarch64_strip_shift_or_extend (op1);
4594 *cost += rtx_cost (op1, MINUS, 1, speed);
4595 }
4596 return true;
4597 }
4598
4599 return false;
4600
4601 case PLUS:
4602 op0 = XEXP (x, 0);
4603 op1 = XEXP (x, 1);
4604
4605 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4606 {
4607 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4608 {
4609 *cost += rtx_cost (op0, PLUS, 0, speed);
4610 }
4611 else
4612 {
4613 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4614
4615 if (new_op0 == op0
4616 && GET_CODE (op0) == MULT)
4617 {
4618 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4619 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4620 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4621 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4622 {
4623 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4624 speed)
4625 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4626 speed)
4627 + rtx_cost (op1, PLUS, 1, speed));
4628 if (speed)
4629 *cost += extra_cost->int_multiply_extend_add;
4630 return true;
4631 }
4632 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4633 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4634 + rtx_cost (op1, PLUS, 1, speed));
4635
4636 if (speed)
4637 *cost += extra_cost->int_multiply_add;
4638 }
4639
4640 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4641 + rtx_cost (op1, PLUS, 1, speed));
4642 }
4643 return true;
4644 }
4645
4646 return false;
4647
4648 case IOR:
4649 case XOR:
4650 case AND:
4651 cost_logic:
4652 op0 = XEXP (x, 0);
4653 op1 = XEXP (x, 1);
4654
4655 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4656 {
4657 if (CONST_INT_P (op1)
4658 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4659 {
4660 *cost += rtx_cost (op0, AND, 0, speed);
4661 }
4662 else
4663 {
4664 if (GET_CODE (op0) == NOT)
4665 op0 = XEXP (op0, 0);
4666 op0 = aarch64_strip_shift (op0);
4667 *cost += (rtx_cost (op0, AND, 0, speed)
4668 + rtx_cost (op1, AND, 1, speed));
4669 }
4670 return true;
4671 }
4672 return false;
4673
4674 case ZERO_EXTEND:
4675 if ((GET_MODE (x) == DImode
4676 && GET_MODE (XEXP (x, 0)) == SImode)
4677 || GET_CODE (XEXP (x, 0)) == MEM)
4678 {
4679 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4680 return true;
4681 }
4682 return false;
4683
4684 case SIGN_EXTEND:
4685 if (GET_CODE (XEXP (x, 0)) == MEM)
4686 {
4687 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4688 return true;
4689 }
4690 return false;
4691
4692 case ROTATE:
4693 if (!CONST_INT_P (XEXP (x, 1)))
4694 *cost += COSTS_N_INSNS (2);
4695 /* Fall through. */
4696 case ROTATERT:
4697 case LSHIFTRT:
4698 case ASHIFT:
4699 case ASHIFTRT:
4700
4701 /* Shifting by a register often takes an extra cycle. */
4702 if (speed && !CONST_INT_P (XEXP (x, 1)))
4703 *cost += extra_cost->register_shift;
4704
4705 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4706 return true;
4707
4708 case HIGH:
4709 if (!CONSTANT_P (XEXP (x, 0)))
4710 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4711 return true;
4712
4713 case LO_SUM:
4714 if (!CONSTANT_P (XEXP (x, 1)))
4715 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4716 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4717 return true;
4718
4719 case ZERO_EXTRACT:
4720 case SIGN_EXTRACT:
4721 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4722 return true;
4723
4724 case MULT:
4725 op0 = XEXP (x, 0);
4726 op1 = XEXP (x, 1);
4727
4728 *cost = COSTS_N_INSNS (1);
4729 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4730 {
4731 if (CONST_INT_P (op1)
4732 && exact_log2 (INTVAL (op1)) > 0)
4733 {
4734 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4735 return true;
4736 }
4737
4738 if ((GET_CODE (op0) == ZERO_EXTEND
4739 && GET_CODE (op1) == ZERO_EXTEND)
4740 || (GET_CODE (op0) == SIGN_EXTEND
4741 && GET_CODE (op1) == SIGN_EXTEND))
4742 {
4743 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4744 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4745 if (speed)
4746 *cost += extra_cost->int_multiply_extend;
4747 return true;
4748 }
4749
4750 if (speed)
4751 *cost += extra_cost->int_multiply;
4752 }
4753 else if (speed)
4754 {
4755 if (GET_MODE (x) == DFmode)
4756 *cost += extra_cost->double_multiply;
4757 else if (GET_MODE (x) == SFmode)
4758 *cost += extra_cost->float_multiply;
4759 }
4760
4761 return false; /* All arguments need to be in registers. */
4762
4763 case MOD:
4764 case UMOD:
4765 *cost = COSTS_N_INSNS (2);
4766 if (speed)
4767 {
4768 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4769 *cost += (extra_cost->int_multiply_add
4770 + extra_cost->int_divide);
4771 else if (GET_MODE (x) == DFmode)
4772 *cost += (extra_cost->double_multiply
4773 + extra_cost->double_divide);
4774 else if (GET_MODE (x) == SFmode)
4775 *cost += (extra_cost->float_multiply
4776 + extra_cost->float_divide);
4777 }
4778 return false; /* All arguments need to be in registers. */
4779
4780 case DIV:
4781 case UDIV:
4782 *cost = COSTS_N_INSNS (1);
4783 if (speed)
4784 {
4785 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4786 *cost += extra_cost->int_divide;
4787 else if (GET_MODE (x) == DFmode)
4788 *cost += extra_cost->double_divide;
4789 else if (GET_MODE (x) == SFmode)
4790 *cost += extra_cost->float_divide;
4791 }
4792 return false; /* All arguments need to be in registers. */
4793
4794 default:
4795 break;
4796 }
4797 return false;
4798 }
4799
4800 static int
4801 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4802 enum machine_mode mode ATTRIBUTE_UNUSED,
4803 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4804 {
4805 enum rtx_code c = GET_CODE (x);
4806 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4807
4808 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4809 return addr_cost->pre_modify;
4810
4811 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4812 return addr_cost->post_modify;
4813
4814 if (c == PLUS)
4815 {
4816 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4817 return addr_cost->imm_offset;
4818 else if (GET_CODE (XEXP (x, 0)) == MULT
4819 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4820 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4821 return addr_cost->register_extend;
4822
4823 return addr_cost->register_offset;
4824 }
4825 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4826 return addr_cost->imm_offset;
4827
4828 return 0;
4829 }
4830
4831 static int
4832 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4833 reg_class_t from, reg_class_t to)
4834 {
4835 const struct cpu_regmove_cost *regmove_cost
4836 = aarch64_tune_params->regmove_cost;
4837
4838 if (from == GENERAL_REGS && to == GENERAL_REGS)
4839 return regmove_cost->GP2GP;
4840 else if (from == GENERAL_REGS)
4841 return regmove_cost->GP2FP;
4842 else if (to == GENERAL_REGS)
4843 return regmove_cost->FP2GP;
4844
4845 /* When AdvSIMD instructions are disabled it is not possible to move
4846 a 128-bit value directly between Q registers. This is handled in
4847 secondary reload. A general register is used as a scratch to move
4848 the upper DI value and the lower DI value is moved directly,
4849 hence the cost is the sum of three moves. */
4850
4851 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4852 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4853
4854 return regmove_cost->FP2FP;
4855 }
4856
4857 static int
4858 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4859 reg_class_t rclass ATTRIBUTE_UNUSED,
4860 bool in ATTRIBUTE_UNUSED)
4861 {
4862 return aarch64_tune_params->memmov_cost;
4863 }
4864
4865 /* Vectorizer cost model target hooks. */
4866
4867 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4868 static int
4869 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4870 tree vectype,
4871 int misalign ATTRIBUTE_UNUSED)
4872 {
4873 unsigned elements;
4874
4875 switch (type_of_cost)
4876 {
4877 case scalar_stmt:
4878 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4879
4880 case scalar_load:
4881 return aarch64_tune_params->vec_costs->scalar_load_cost;
4882
4883 case scalar_store:
4884 return aarch64_tune_params->vec_costs->scalar_store_cost;
4885
4886 case vector_stmt:
4887 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4888
4889 case vector_load:
4890 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4891
4892 case vector_store:
4893 return aarch64_tune_params->vec_costs->vec_store_cost;
4894
4895 case vec_to_scalar:
4896 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4897
4898 case scalar_to_vec:
4899 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4900
4901 case unaligned_load:
4902 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4903
4904 case unaligned_store:
4905 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4906
4907 case cond_branch_taken:
4908 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4909
4910 case cond_branch_not_taken:
4911 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4912
4913 case vec_perm:
4914 case vec_promote_demote:
4915 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4916
4917 case vec_construct:
4918 elements = TYPE_VECTOR_SUBPARTS (vectype);
4919 return elements / 2 + 1;
4920
4921 default:
4922 gcc_unreachable ();
4923 }
4924 }
4925
4926 /* Implement targetm.vectorize.add_stmt_cost. */
4927 static unsigned
4928 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4929 struct _stmt_vec_info *stmt_info, int misalign,
4930 enum vect_cost_model_location where)
4931 {
4932 unsigned *cost = (unsigned *) data;
4933 unsigned retval = 0;
4934
4935 if (flag_vect_cost_model)
4936 {
4937 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4938 int stmt_cost =
4939 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4940
4941 /* Statements in an inner loop relative to the loop being
4942 vectorized are weighted more heavily. The value here is
4943 a function (linear for now) of the loop nest level. */
4944 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4945 {
4946 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4947 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4948 unsigned nest_level = loop_depth (loop);
4949
4950 count *= nest_level;
4951 }
4952
4953 retval = (unsigned) (count * stmt_cost);
4954 cost[where] += retval;
4955 }
4956
4957 return retval;
4958 }
4959
4960 static void initialize_aarch64_code_model (void);
4961
4962 /* Parse the architecture extension string. */
4963
4964 static void
4965 aarch64_parse_extension (char *str)
4966 {
4967 /* The extension string is parsed left to right. */
4968 const struct aarch64_option_extension *opt = NULL;
4969
4970 /* Flag to say whether we are adding or removing an extension. */
4971 int adding_ext = -1;
4972
4973 while (str != NULL && *str != 0)
4974 {
4975 char *ext;
4976 size_t len;
4977
4978 str++;
4979 ext = strchr (str, '+');
4980
4981 if (ext != NULL)
4982 len = ext - str;
4983 else
4984 len = strlen (str);
4985
4986 if (len >= 2 && strncmp (str, "no", 2) == 0)
4987 {
4988 adding_ext = 0;
4989 len -= 2;
4990 str += 2;
4991 }
4992 else if (len > 0)
4993 adding_ext = 1;
4994
4995 if (len == 0)
4996 {
4997 error ("missing feature modifier after %qs", "+no");
4998 return;
4999 }
5000
5001 /* Scan over the extensions table trying to find an exact match. */
5002 for (opt = all_extensions; opt->name != NULL; opt++)
5003 {
5004 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5005 {
5006 /* Add or remove the extension. */
5007 if (adding_ext)
5008 aarch64_isa_flags |= opt->flags_on;
5009 else
5010 aarch64_isa_flags &= ~(opt->flags_off);
5011 break;
5012 }
5013 }
5014
5015 if (opt->name == NULL)
5016 {
5017 /* Extension not found in list. */
5018 error ("unknown feature modifier %qs", str);
5019 return;
5020 }
5021
5022 str = ext;
5023 };
5024
5025 return;
5026 }
5027
5028 /* Parse the ARCH string. */
5029
5030 static void
5031 aarch64_parse_arch (void)
5032 {
5033 char *ext;
5034 const struct processor *arch;
5035 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5036 size_t len;
5037
5038 strcpy (str, aarch64_arch_string);
5039
5040 ext = strchr (str, '+');
5041
5042 if (ext != NULL)
5043 len = ext - str;
5044 else
5045 len = strlen (str);
5046
5047 if (len == 0)
5048 {
5049 error ("missing arch name in -march=%qs", str);
5050 return;
5051 }
5052
5053 /* Loop through the list of supported ARCHs to find a match. */
5054 for (arch = all_architectures; arch->name != NULL; arch++)
5055 {
5056 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5057 {
5058 selected_arch = arch;
5059 aarch64_isa_flags = selected_arch->flags;
5060 selected_cpu = &all_cores[selected_arch->core];
5061
5062 if (ext != NULL)
5063 {
5064 /* ARCH string contains at least one extension. */
5065 aarch64_parse_extension (ext);
5066 }
5067
5068 return;
5069 }
5070 }
5071
5072 /* ARCH name not found in list. */
5073 error ("unknown value %qs for -march", str);
5074 return;
5075 }
5076
5077 /* Parse the CPU string. */
5078
5079 static void
5080 aarch64_parse_cpu (void)
5081 {
5082 char *ext;
5083 const struct processor *cpu;
5084 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5085 size_t len;
5086
5087 strcpy (str, aarch64_cpu_string);
5088
5089 ext = strchr (str, '+');
5090
5091 if (ext != NULL)
5092 len = ext - str;
5093 else
5094 len = strlen (str);
5095
5096 if (len == 0)
5097 {
5098 error ("missing cpu name in -mcpu=%qs", str);
5099 return;
5100 }
5101
5102 /* Loop through the list of supported CPUs to find a match. */
5103 for (cpu = all_cores; cpu->name != NULL; cpu++)
5104 {
5105 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5106 {
5107 selected_cpu = cpu;
5108 aarch64_isa_flags = selected_cpu->flags;
5109
5110 if (ext != NULL)
5111 {
5112 /* CPU string contains at least one extension. */
5113 aarch64_parse_extension (ext);
5114 }
5115
5116 return;
5117 }
5118 }
5119
5120 /* CPU name not found in list. */
5121 error ("unknown value %qs for -mcpu", str);
5122 return;
5123 }
5124
5125 /* Parse the TUNE string. */
5126
5127 static void
5128 aarch64_parse_tune (void)
5129 {
5130 const struct processor *cpu;
5131 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5132 strcpy (str, aarch64_tune_string);
5133
5134 /* Loop through the list of supported CPUs to find a match. */
5135 for (cpu = all_cores; cpu->name != NULL; cpu++)
5136 {
5137 if (strcmp (cpu->name, str) == 0)
5138 {
5139 selected_tune = cpu;
5140 return;
5141 }
5142 }
5143
5144 /* CPU name not found in list. */
5145 error ("unknown value %qs for -mtune", str);
5146 return;
5147 }
5148
5149
5150 /* Implement TARGET_OPTION_OVERRIDE. */
5151
5152 static void
5153 aarch64_override_options (void)
5154 {
5155 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5156 otherwise march remains undefined. mtune can be used with either march or
5157 mcpu. */
5158
5159 if (aarch64_arch_string)
5160 {
5161 aarch64_parse_arch ();
5162 aarch64_cpu_string = NULL;
5163 }
5164
5165 if (aarch64_cpu_string)
5166 {
5167 aarch64_parse_cpu ();
5168 selected_arch = NULL;
5169 }
5170
5171 if (aarch64_tune_string)
5172 {
5173 aarch64_parse_tune ();
5174 }
5175
5176 initialize_aarch64_code_model ();
5177
5178 aarch64_build_bitmask_table ();
5179
5180 /* This target defaults to strict volatile bitfields. */
5181 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5182 flag_strict_volatile_bitfields = 1;
5183
5184 /* If the user did not specify a processor, choose the default
5185 one for them. This will be the CPU set during configuration using
5186 --with-cpu, otherwise it is "generic". */
5187 if (!selected_cpu)
5188 {
5189 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5190 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5191 }
5192
5193 gcc_assert (selected_cpu);
5194
5195 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5196 if (!selected_tune)
5197 selected_tune = &all_cores[selected_cpu->core];
5198
5199 aarch64_tune_flags = selected_tune->flags;
5200 aarch64_tune = selected_tune->core;
5201 aarch64_tune_params = selected_tune->tune;
5202
5203 aarch64_override_options_after_change ();
5204 }
5205
5206 /* Implement targetm.override_options_after_change. */
5207
5208 static void
5209 aarch64_override_options_after_change (void)
5210 {
5211 faked_omit_frame_pointer = false;
5212
5213 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5214 that aarch64_frame_pointer_required will be called. We need to remember
5215 whether flag_omit_frame_pointer was turned on normally or just faked. */
5216
5217 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5218 {
5219 flag_omit_frame_pointer = true;
5220 faked_omit_frame_pointer = true;
5221 }
5222 }
5223
5224 static struct machine_function *
5225 aarch64_init_machine_status (void)
5226 {
5227 struct machine_function *machine;
5228 machine = ggc_alloc_cleared_machine_function ();
5229 return machine;
5230 }
5231
5232 void
5233 aarch64_init_expanders (void)
5234 {
5235 init_machine_status = aarch64_init_machine_status;
5236 }
5237
5238 /* A checking mechanism for the implementation of the various code models. */
5239 static void
5240 initialize_aarch64_code_model (void)
5241 {
5242 if (flag_pic)
5243 {
5244 switch (aarch64_cmodel_var)
5245 {
5246 case AARCH64_CMODEL_TINY:
5247 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5248 break;
5249 case AARCH64_CMODEL_SMALL:
5250 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5251 break;
5252 case AARCH64_CMODEL_LARGE:
5253 sorry ("code model %qs with -f%s", "large",
5254 flag_pic > 1 ? "PIC" : "pic");
5255 default:
5256 gcc_unreachable ();
5257 }
5258 }
5259 else
5260 aarch64_cmodel = aarch64_cmodel_var;
5261 }
5262
5263 /* Return true if SYMBOL_REF X binds locally. */
5264
5265 static bool
5266 aarch64_symbol_binds_local_p (const_rtx x)
5267 {
5268 return (SYMBOL_REF_DECL (x)
5269 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5270 : SYMBOL_REF_LOCAL_P (x));
5271 }
5272
5273 /* Return true if SYMBOL_REF X is thread local */
5274 static bool
5275 aarch64_tls_symbol_p (rtx x)
5276 {
5277 if (! TARGET_HAVE_TLS)
5278 return false;
5279
5280 if (GET_CODE (x) != SYMBOL_REF)
5281 return false;
5282
5283 return SYMBOL_REF_TLS_MODEL (x) != 0;
5284 }
5285
5286 /* Classify a TLS symbol into one of the TLS kinds. */
5287 enum aarch64_symbol_type
5288 aarch64_classify_tls_symbol (rtx x)
5289 {
5290 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5291
5292 switch (tls_kind)
5293 {
5294 case TLS_MODEL_GLOBAL_DYNAMIC:
5295 case TLS_MODEL_LOCAL_DYNAMIC:
5296 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5297
5298 case TLS_MODEL_INITIAL_EXEC:
5299 return SYMBOL_SMALL_GOTTPREL;
5300
5301 case TLS_MODEL_LOCAL_EXEC:
5302 return SYMBOL_SMALL_TPREL;
5303
5304 case TLS_MODEL_EMULATED:
5305 case TLS_MODEL_NONE:
5306 return SYMBOL_FORCE_TO_MEM;
5307
5308 default:
5309 gcc_unreachable ();
5310 }
5311 }
5312
5313 /* Return the method that should be used to access SYMBOL_REF or
5314 LABEL_REF X in context CONTEXT. */
5315
5316 enum aarch64_symbol_type
5317 aarch64_classify_symbol (rtx x,
5318 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5319 {
5320 if (GET_CODE (x) == LABEL_REF)
5321 {
5322 switch (aarch64_cmodel)
5323 {
5324 case AARCH64_CMODEL_LARGE:
5325 return SYMBOL_FORCE_TO_MEM;
5326
5327 case AARCH64_CMODEL_TINY_PIC:
5328 case AARCH64_CMODEL_TINY:
5329 return SYMBOL_TINY_ABSOLUTE;
5330
5331 case AARCH64_CMODEL_SMALL_PIC:
5332 case AARCH64_CMODEL_SMALL:
5333 return SYMBOL_SMALL_ABSOLUTE;
5334
5335 default:
5336 gcc_unreachable ();
5337 }
5338 }
5339
5340 if (GET_CODE (x) == SYMBOL_REF)
5341 {
5342 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5343 || CONSTANT_POOL_ADDRESS_P (x))
5344 return SYMBOL_FORCE_TO_MEM;
5345
5346 if (aarch64_tls_symbol_p (x))
5347 return aarch64_classify_tls_symbol (x);
5348
5349 switch (aarch64_cmodel)
5350 {
5351 case AARCH64_CMODEL_TINY:
5352 if (SYMBOL_REF_WEAK (x))
5353 return SYMBOL_FORCE_TO_MEM;
5354 return SYMBOL_TINY_ABSOLUTE;
5355
5356 case AARCH64_CMODEL_SMALL:
5357 if (SYMBOL_REF_WEAK (x))
5358 return SYMBOL_FORCE_TO_MEM;
5359 return SYMBOL_SMALL_ABSOLUTE;
5360
5361 case AARCH64_CMODEL_TINY_PIC:
5362 if (!aarch64_symbol_binds_local_p (x))
5363 return SYMBOL_TINY_GOT;
5364 return SYMBOL_TINY_ABSOLUTE;
5365
5366 case AARCH64_CMODEL_SMALL_PIC:
5367 if (!aarch64_symbol_binds_local_p (x))
5368 return SYMBOL_SMALL_GOT;
5369 return SYMBOL_SMALL_ABSOLUTE;
5370
5371 default:
5372 gcc_unreachable ();
5373 }
5374 }
5375
5376 /* By default push everything into the constant pool. */
5377 return SYMBOL_FORCE_TO_MEM;
5378 }
5379
5380 bool
5381 aarch64_constant_address_p (rtx x)
5382 {
5383 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5384 }
5385
5386 bool
5387 aarch64_legitimate_pic_operand_p (rtx x)
5388 {
5389 if (GET_CODE (x) == SYMBOL_REF
5390 || (GET_CODE (x) == CONST
5391 && GET_CODE (XEXP (x, 0)) == PLUS
5392 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5393 return false;
5394
5395 return true;
5396 }
5397
5398 /* Return true if X holds either a quarter-precision or
5399 floating-point +0.0 constant. */
5400 static bool
5401 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5402 {
5403 if (!CONST_DOUBLE_P (x))
5404 return false;
5405
5406 /* TODO: We could handle moving 0.0 to a TFmode register,
5407 but first we would like to refactor the movtf_aarch64
5408 to be more amicable to split moves properly and
5409 correctly gate on TARGET_SIMD. For now - reject all
5410 constants which are not to SFmode or DFmode registers. */
5411 if (!(mode == SFmode || mode == DFmode))
5412 return false;
5413
5414 if (aarch64_float_const_zero_rtx_p (x))
5415 return true;
5416 return aarch64_float_const_representable_p (x);
5417 }
5418
5419 static bool
5420 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5421 {
5422 /* Do not allow vector struct mode constants. We could support
5423 0 and -1 easily, but they need support in aarch64-simd.md. */
5424 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5425 return false;
5426
5427 /* This could probably go away because
5428 we now decompose CONST_INTs according to expand_mov_immediate. */
5429 if ((GET_CODE (x) == CONST_VECTOR
5430 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5431 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5432 return !targetm.cannot_force_const_mem (mode, x);
5433
5434 if (GET_CODE (x) == HIGH
5435 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5436 return true;
5437
5438 return aarch64_constant_address_p (x);
5439 }
5440
5441 rtx
5442 aarch64_load_tp (rtx target)
5443 {
5444 if (!target
5445 || GET_MODE (target) != Pmode
5446 || !register_operand (target, Pmode))
5447 target = gen_reg_rtx (Pmode);
5448
5449 /* Can return in any reg. */
5450 emit_insn (gen_aarch64_load_tp_hard (target));
5451 return target;
5452 }
5453
5454 /* On AAPCS systems, this is the "struct __va_list". */
5455 static GTY(()) tree va_list_type;
5456
5457 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5458 Return the type to use as __builtin_va_list.
5459
5460 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5461
5462 struct __va_list
5463 {
5464 void *__stack;
5465 void *__gr_top;
5466 void *__vr_top;
5467 int __gr_offs;
5468 int __vr_offs;
5469 }; */
5470
5471 static tree
5472 aarch64_build_builtin_va_list (void)
5473 {
5474 tree va_list_name;
5475 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5476
5477 /* Create the type. */
5478 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5479 /* Give it the required name. */
5480 va_list_name = build_decl (BUILTINS_LOCATION,
5481 TYPE_DECL,
5482 get_identifier ("__va_list"),
5483 va_list_type);
5484 DECL_ARTIFICIAL (va_list_name) = 1;
5485 TYPE_NAME (va_list_type) = va_list_name;
5486 TYPE_STUB_DECL (va_list_type) = va_list_name;
5487
5488 /* Create the fields. */
5489 f_stack = build_decl (BUILTINS_LOCATION,
5490 FIELD_DECL, get_identifier ("__stack"),
5491 ptr_type_node);
5492 f_grtop = build_decl (BUILTINS_LOCATION,
5493 FIELD_DECL, get_identifier ("__gr_top"),
5494 ptr_type_node);
5495 f_vrtop = build_decl (BUILTINS_LOCATION,
5496 FIELD_DECL, get_identifier ("__vr_top"),
5497 ptr_type_node);
5498 f_groff = build_decl (BUILTINS_LOCATION,
5499 FIELD_DECL, get_identifier ("__gr_offs"),
5500 integer_type_node);
5501 f_vroff = build_decl (BUILTINS_LOCATION,
5502 FIELD_DECL, get_identifier ("__vr_offs"),
5503 integer_type_node);
5504
5505 DECL_ARTIFICIAL (f_stack) = 1;
5506 DECL_ARTIFICIAL (f_grtop) = 1;
5507 DECL_ARTIFICIAL (f_vrtop) = 1;
5508 DECL_ARTIFICIAL (f_groff) = 1;
5509 DECL_ARTIFICIAL (f_vroff) = 1;
5510
5511 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5512 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5513 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5514 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5515 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5516
5517 TYPE_FIELDS (va_list_type) = f_stack;
5518 DECL_CHAIN (f_stack) = f_grtop;
5519 DECL_CHAIN (f_grtop) = f_vrtop;
5520 DECL_CHAIN (f_vrtop) = f_groff;
5521 DECL_CHAIN (f_groff) = f_vroff;
5522
5523 /* Compute its layout. */
5524 layout_type (va_list_type);
5525
5526 return va_list_type;
5527 }
5528
5529 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5530 static void
5531 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5532 {
5533 const CUMULATIVE_ARGS *cum;
5534 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5535 tree stack, grtop, vrtop, groff, vroff;
5536 tree t;
5537 int gr_save_area_size;
5538 int vr_save_area_size;
5539 int vr_offset;
5540
5541 cum = &crtl->args.info;
5542 gr_save_area_size
5543 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5544 vr_save_area_size
5545 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5546
5547 if (TARGET_GENERAL_REGS_ONLY)
5548 {
5549 if (cum->aapcs_nvrn > 0)
5550 sorry ("%qs and floating point or vector arguments",
5551 "-mgeneral-regs-only");
5552 vr_save_area_size = 0;
5553 }
5554
5555 f_stack = TYPE_FIELDS (va_list_type_node);
5556 f_grtop = DECL_CHAIN (f_stack);
5557 f_vrtop = DECL_CHAIN (f_grtop);
5558 f_groff = DECL_CHAIN (f_vrtop);
5559 f_vroff = DECL_CHAIN (f_groff);
5560
5561 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5562 NULL_TREE);
5563 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5564 NULL_TREE);
5565 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5566 NULL_TREE);
5567 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5568 NULL_TREE);
5569 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5570 NULL_TREE);
5571
5572 /* Emit code to initialize STACK, which points to the next varargs stack
5573 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5574 by named arguments. STACK is 8-byte aligned. */
5575 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5576 if (cum->aapcs_stack_size > 0)
5577 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5578 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5579 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5580
5581 /* Emit code to initialize GRTOP, the top of the GR save area.
5582 virtual_incoming_args_rtx should have been 16 byte aligned. */
5583 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5584 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5585 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5586
5587 /* Emit code to initialize VRTOP, the top of the VR save area.
5588 This address is gr_save_area_bytes below GRTOP, rounded
5589 down to the next 16-byte boundary. */
5590 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5591 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5592 STACK_BOUNDARY / BITS_PER_UNIT);
5593
5594 if (vr_offset)
5595 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5596 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5597 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5598
5599 /* Emit code to initialize GROFF, the offset from GRTOP of the
5600 next GPR argument. */
5601 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5602 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5604
5605 /* Likewise emit code to initialize VROFF, the offset from FTOP
5606 of the next VR argument. */
5607 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5608 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5610 }
5611
5612 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5613
5614 static tree
5615 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5616 gimple_seq *post_p ATTRIBUTE_UNUSED)
5617 {
5618 tree addr;
5619 bool indirect_p;
5620 bool is_ha; /* is HFA or HVA. */
5621 bool dw_align; /* double-word align. */
5622 enum machine_mode ag_mode = VOIDmode;
5623 int nregs;
5624 enum machine_mode mode;
5625
5626 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5627 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5628 HOST_WIDE_INT size, rsize, adjust, align;
5629 tree t, u, cond1, cond2;
5630
5631 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5632 if (indirect_p)
5633 type = build_pointer_type (type);
5634
5635 mode = TYPE_MODE (type);
5636
5637 f_stack = TYPE_FIELDS (va_list_type_node);
5638 f_grtop = DECL_CHAIN (f_stack);
5639 f_vrtop = DECL_CHAIN (f_grtop);
5640 f_groff = DECL_CHAIN (f_vrtop);
5641 f_vroff = DECL_CHAIN (f_groff);
5642
5643 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5644 f_stack, NULL_TREE);
5645 size = int_size_in_bytes (type);
5646 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5647
5648 dw_align = false;
5649 adjust = 0;
5650 if (aarch64_vfp_is_call_or_return_candidate (mode,
5651 type,
5652 &ag_mode,
5653 &nregs,
5654 &is_ha))
5655 {
5656 /* TYPE passed in fp/simd registers. */
5657 if (TARGET_GENERAL_REGS_ONLY)
5658 sorry ("%qs and floating point or vector arguments",
5659 "-mgeneral-regs-only");
5660
5661 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5662 unshare_expr (valist), f_vrtop, NULL_TREE);
5663 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5664 unshare_expr (valist), f_vroff, NULL_TREE);
5665
5666 rsize = nregs * UNITS_PER_VREG;
5667
5668 if (is_ha)
5669 {
5670 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5671 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5672 }
5673 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5674 && size < UNITS_PER_VREG)
5675 {
5676 adjust = UNITS_PER_VREG - size;
5677 }
5678 }
5679 else
5680 {
5681 /* TYPE passed in general registers. */
5682 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5683 unshare_expr (valist), f_grtop, NULL_TREE);
5684 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5685 unshare_expr (valist), f_groff, NULL_TREE);
5686 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5687 nregs = rsize / UNITS_PER_WORD;
5688
5689 if (align > 8)
5690 dw_align = true;
5691
5692 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5693 && size < UNITS_PER_WORD)
5694 {
5695 adjust = UNITS_PER_WORD - size;
5696 }
5697 }
5698
5699 /* Get a local temporary for the field value. */
5700 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5701
5702 /* Emit code to branch if off >= 0. */
5703 t = build2 (GE_EXPR, boolean_type_node, off,
5704 build_int_cst (TREE_TYPE (off), 0));
5705 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5706
5707 if (dw_align)
5708 {
5709 /* Emit: offs = (offs + 15) & -16. */
5710 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5711 build_int_cst (TREE_TYPE (off), 15));
5712 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5713 build_int_cst (TREE_TYPE (off), -16));
5714 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5715 }
5716 else
5717 roundup = NULL;
5718
5719 /* Update ap.__[g|v]r_offs */
5720 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5721 build_int_cst (TREE_TYPE (off), rsize));
5722 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5723
5724 /* String up. */
5725 if (roundup)
5726 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5727
5728 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5729 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5730 build_int_cst (TREE_TYPE (f_off), 0));
5731 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5732
5733 /* String up: make sure the assignment happens before the use. */
5734 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5735 COND_EXPR_ELSE (cond1) = t;
5736
5737 /* Prepare the trees handling the argument that is passed on the stack;
5738 the top level node will store in ON_STACK. */
5739 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5740 if (align > 8)
5741 {
5742 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5743 t = fold_convert (intDI_type_node, arg);
5744 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5745 build_int_cst (TREE_TYPE (t), 15));
5746 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5747 build_int_cst (TREE_TYPE (t), -16));
5748 t = fold_convert (TREE_TYPE (arg), t);
5749 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5750 }
5751 else
5752 roundup = NULL;
5753 /* Advance ap.__stack */
5754 t = fold_convert (intDI_type_node, arg);
5755 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5756 build_int_cst (TREE_TYPE (t), size + 7));
5757 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5758 build_int_cst (TREE_TYPE (t), -8));
5759 t = fold_convert (TREE_TYPE (arg), t);
5760 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5761 /* String up roundup and advance. */
5762 if (roundup)
5763 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5764 /* String up with arg */
5765 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5766 /* Big-endianness related address adjustment. */
5767 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5768 && size < UNITS_PER_WORD)
5769 {
5770 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5771 size_int (UNITS_PER_WORD - size));
5772 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5773 }
5774
5775 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5776 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5777
5778 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5779 t = off;
5780 if (adjust)
5781 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5782 build_int_cst (TREE_TYPE (off), adjust));
5783
5784 t = fold_convert (sizetype, t);
5785 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5786
5787 if (is_ha)
5788 {
5789 /* type ha; // treat as "struct {ftype field[n];}"
5790 ... [computing offs]
5791 for (i = 0; i <nregs; ++i, offs += 16)
5792 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5793 return ha; */
5794 int i;
5795 tree tmp_ha, field_t, field_ptr_t;
5796
5797 /* Declare a local variable. */
5798 tmp_ha = create_tmp_var_raw (type, "ha");
5799 gimple_add_tmp_var (tmp_ha);
5800
5801 /* Establish the base type. */
5802 switch (ag_mode)
5803 {
5804 case SFmode:
5805 field_t = float_type_node;
5806 field_ptr_t = float_ptr_type_node;
5807 break;
5808 case DFmode:
5809 field_t = double_type_node;
5810 field_ptr_t = double_ptr_type_node;
5811 break;
5812 case TFmode:
5813 field_t = long_double_type_node;
5814 field_ptr_t = long_double_ptr_type_node;
5815 break;
5816 /* The half precision and quad precision are not fully supported yet. Enable
5817 the following code after the support is complete. Need to find the correct
5818 type node for __fp16 *. */
5819 #if 0
5820 case HFmode:
5821 field_t = float_type_node;
5822 field_ptr_t = float_ptr_type_node;
5823 break;
5824 #endif
5825 case V2SImode:
5826 case V4SImode:
5827 {
5828 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5829 field_t = build_vector_type_for_mode (innertype, ag_mode);
5830 field_ptr_t = build_pointer_type (field_t);
5831 }
5832 break;
5833 default:
5834 gcc_assert (0);
5835 }
5836
5837 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5838 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5839 addr = t;
5840 t = fold_convert (field_ptr_t, addr);
5841 t = build2 (MODIFY_EXPR, field_t,
5842 build1 (INDIRECT_REF, field_t, tmp_ha),
5843 build1 (INDIRECT_REF, field_t, t));
5844
5845 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5846 for (i = 1; i < nregs; ++i)
5847 {
5848 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5849 u = fold_convert (field_ptr_t, addr);
5850 u = build2 (MODIFY_EXPR, field_t,
5851 build2 (MEM_REF, field_t, tmp_ha,
5852 build_int_cst (field_ptr_t,
5853 (i *
5854 int_size_in_bytes (field_t)))),
5855 build1 (INDIRECT_REF, field_t, u));
5856 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5857 }
5858
5859 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5860 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5861 }
5862
5863 COND_EXPR_ELSE (cond2) = t;
5864 addr = fold_convert (build_pointer_type (type), cond1);
5865 addr = build_va_arg_indirect_ref (addr);
5866
5867 if (indirect_p)
5868 addr = build_va_arg_indirect_ref (addr);
5869
5870 return addr;
5871 }
5872
5873 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5874
5875 static void
5876 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5877 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5878 int no_rtl)
5879 {
5880 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5881 CUMULATIVE_ARGS local_cum;
5882 int gr_saved, vr_saved;
5883
5884 /* The caller has advanced CUM up to, but not beyond, the last named
5885 argument. Advance a local copy of CUM past the last "real" named
5886 argument, to find out how many registers are left over. */
5887 local_cum = *cum;
5888 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5889
5890 /* Found out how many registers we need to save. */
5891 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5892 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5893
5894 if (TARGET_GENERAL_REGS_ONLY)
5895 {
5896 if (local_cum.aapcs_nvrn > 0)
5897 sorry ("%qs and floating point or vector arguments",
5898 "-mgeneral-regs-only");
5899 vr_saved = 0;
5900 }
5901
5902 if (!no_rtl)
5903 {
5904 if (gr_saved > 0)
5905 {
5906 rtx ptr, mem;
5907
5908 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5909 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5910 - gr_saved * UNITS_PER_WORD);
5911 mem = gen_frame_mem (BLKmode, ptr);
5912 set_mem_alias_set (mem, get_varargs_alias_set ());
5913
5914 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5915 mem, gr_saved);
5916 }
5917 if (vr_saved > 0)
5918 {
5919 /* We can't use move_block_from_reg, because it will use
5920 the wrong mode, storing D regs only. */
5921 enum machine_mode mode = TImode;
5922 int off, i;
5923
5924 /* Set OFF to the offset from virtual_incoming_args_rtx of
5925 the first vector register. The VR save area lies below
5926 the GR one, and is aligned to 16 bytes. */
5927 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5928 STACK_BOUNDARY / BITS_PER_UNIT);
5929 off -= vr_saved * UNITS_PER_VREG;
5930
5931 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5932 {
5933 rtx ptr, mem;
5934
5935 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5936 mem = gen_frame_mem (mode, ptr);
5937 set_mem_alias_set (mem, get_varargs_alias_set ());
5938 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5939 off += UNITS_PER_VREG;
5940 }
5941 }
5942 }
5943
5944 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5945 any complication of having crtl->args.pretend_args_size changed. */
5946 cfun->machine->saved_varargs_size
5947 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5948 STACK_BOUNDARY / BITS_PER_UNIT)
5949 + vr_saved * UNITS_PER_VREG);
5950 }
5951
5952 static void
5953 aarch64_conditional_register_usage (void)
5954 {
5955 int i;
5956 if (!TARGET_FLOAT)
5957 {
5958 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5959 {
5960 fixed_regs[i] = 1;
5961 call_used_regs[i] = 1;
5962 }
5963 }
5964 }
5965
5966 /* Walk down the type tree of TYPE counting consecutive base elements.
5967 If *MODEP is VOIDmode, then set it to the first valid floating point
5968 type. If a non-floating point type is found, or if a floating point
5969 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5970 otherwise return the count in the sub-tree. */
5971 static int
5972 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5973 {
5974 enum machine_mode mode;
5975 HOST_WIDE_INT size;
5976
5977 switch (TREE_CODE (type))
5978 {
5979 case REAL_TYPE:
5980 mode = TYPE_MODE (type);
5981 if (mode != DFmode && mode != SFmode && mode != TFmode)
5982 return -1;
5983
5984 if (*modep == VOIDmode)
5985 *modep = mode;
5986
5987 if (*modep == mode)
5988 return 1;
5989
5990 break;
5991
5992 case COMPLEX_TYPE:
5993 mode = TYPE_MODE (TREE_TYPE (type));
5994 if (mode != DFmode && mode != SFmode && mode != TFmode)
5995 return -1;
5996
5997 if (*modep == VOIDmode)
5998 *modep = mode;
5999
6000 if (*modep == mode)
6001 return 2;
6002
6003 break;
6004
6005 case VECTOR_TYPE:
6006 /* Use V2SImode and V4SImode as representatives of all 64-bit
6007 and 128-bit vector types. */
6008 size = int_size_in_bytes (type);
6009 switch (size)
6010 {
6011 case 8:
6012 mode = V2SImode;
6013 break;
6014 case 16:
6015 mode = V4SImode;
6016 break;
6017 default:
6018 return -1;
6019 }
6020
6021 if (*modep == VOIDmode)
6022 *modep = mode;
6023
6024 /* Vector modes are considered to be opaque: two vectors are
6025 equivalent for the purposes of being homogeneous aggregates
6026 if they are the same size. */
6027 if (*modep == mode)
6028 return 1;
6029
6030 break;
6031
6032 case ARRAY_TYPE:
6033 {
6034 int count;
6035 tree index = TYPE_DOMAIN (type);
6036
6037 /* Can't handle incomplete types. */
6038 if (!COMPLETE_TYPE_P (type))
6039 return -1;
6040
6041 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6042 if (count == -1
6043 || !index
6044 || !TYPE_MAX_VALUE (index)
6045 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6046 || !TYPE_MIN_VALUE (index)
6047 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6048 || count < 0)
6049 return -1;
6050
6051 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6052 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6053
6054 /* There must be no padding. */
6055 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6056 return -1;
6057
6058 return count;
6059 }
6060
6061 case RECORD_TYPE:
6062 {
6063 int count = 0;
6064 int sub_count;
6065 tree field;
6066
6067 /* Can't handle incomplete types. */
6068 if (!COMPLETE_TYPE_P (type))
6069 return -1;
6070
6071 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6072 {
6073 if (TREE_CODE (field) != FIELD_DECL)
6074 continue;
6075
6076 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6077 if (sub_count < 0)
6078 return -1;
6079 count += sub_count;
6080 }
6081
6082 /* There must be no padding. */
6083 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6084 return -1;
6085
6086 return count;
6087 }
6088
6089 case UNION_TYPE:
6090 case QUAL_UNION_TYPE:
6091 {
6092 /* These aren't very interesting except in a degenerate case. */
6093 int count = 0;
6094 int sub_count;
6095 tree field;
6096
6097 /* Can't handle incomplete types. */
6098 if (!COMPLETE_TYPE_P (type))
6099 return -1;
6100
6101 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6102 {
6103 if (TREE_CODE (field) != FIELD_DECL)
6104 continue;
6105
6106 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6107 if (sub_count < 0)
6108 return -1;
6109 count = count > sub_count ? count : sub_count;
6110 }
6111
6112 /* There must be no padding. */
6113 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6114 return -1;
6115
6116 return count;
6117 }
6118
6119 default:
6120 break;
6121 }
6122
6123 return -1;
6124 }
6125
6126 /* Return true if we use LRA instead of reload pass. */
6127 static bool
6128 aarch64_lra_p (void)
6129 {
6130 return aarch64_lra_flag;
6131 }
6132
6133 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6134 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6135 array types. The C99 floating-point complex types are also considered
6136 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6137 types, which are GCC extensions and out of the scope of AAPCS64, are
6138 treated as composite types here as well.
6139
6140 Note that MODE itself is not sufficient in determining whether a type
6141 is such a composite type or not. This is because
6142 stor-layout.c:compute_record_mode may have already changed the MODE
6143 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6144 structure with only one field may have its MODE set to the mode of the
6145 field. Also an integer mode whose size matches the size of the
6146 RECORD_TYPE type may be used to substitute the original mode
6147 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6148 solely relied on. */
6149
6150 static bool
6151 aarch64_composite_type_p (const_tree type,
6152 enum machine_mode mode)
6153 {
6154 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6155 return true;
6156
6157 if (mode == BLKmode
6158 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6159 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6160 return true;
6161
6162 return false;
6163 }
6164
6165 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6166 type as described in AAPCS64 \S 4.1.2.
6167
6168 See the comment above aarch64_composite_type_p for the notes on MODE. */
6169
6170 static bool
6171 aarch64_short_vector_p (const_tree type,
6172 enum machine_mode mode)
6173 {
6174 HOST_WIDE_INT size = -1;
6175
6176 if (type && TREE_CODE (type) == VECTOR_TYPE)
6177 size = int_size_in_bytes (type);
6178 else if (!aarch64_composite_type_p (type, mode)
6179 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6180 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6181 size = GET_MODE_SIZE (mode);
6182
6183 return (size == 8 || size == 16) ? true : false;
6184 }
6185
6186 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6187 shall be passed or returned in simd/fp register(s) (providing these
6188 parameter passing registers are available).
6189
6190 Upon successful return, *COUNT returns the number of needed registers,
6191 *BASE_MODE returns the mode of the individual register and when IS_HAF
6192 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6193 floating-point aggregate or a homogeneous short-vector aggregate. */
6194
6195 static bool
6196 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6197 const_tree type,
6198 enum machine_mode *base_mode,
6199 int *count,
6200 bool *is_ha)
6201 {
6202 enum machine_mode new_mode = VOIDmode;
6203 bool composite_p = aarch64_composite_type_p (type, mode);
6204
6205 if (is_ha != NULL) *is_ha = false;
6206
6207 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6208 || aarch64_short_vector_p (type, mode))
6209 {
6210 *count = 1;
6211 new_mode = mode;
6212 }
6213 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6214 {
6215 if (is_ha != NULL) *is_ha = true;
6216 *count = 2;
6217 new_mode = GET_MODE_INNER (mode);
6218 }
6219 else if (type && composite_p)
6220 {
6221 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6222
6223 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6224 {
6225 if (is_ha != NULL) *is_ha = true;
6226 *count = ag_count;
6227 }
6228 else
6229 return false;
6230 }
6231 else
6232 return false;
6233
6234 *base_mode = new_mode;
6235 return true;
6236 }
6237
6238 /* Implement TARGET_STRUCT_VALUE_RTX. */
6239
6240 static rtx
6241 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6242 int incoming ATTRIBUTE_UNUSED)
6243 {
6244 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6245 }
6246
6247 /* Implements target hook vector_mode_supported_p. */
6248 static bool
6249 aarch64_vector_mode_supported_p (enum machine_mode mode)
6250 {
6251 if (TARGET_SIMD
6252 && (mode == V4SImode || mode == V8HImode
6253 || mode == V16QImode || mode == V2DImode
6254 || mode == V2SImode || mode == V4HImode
6255 || mode == V8QImode || mode == V2SFmode
6256 || mode == V4SFmode || mode == V2DFmode))
6257 return true;
6258
6259 return false;
6260 }
6261
6262 /* Return appropriate SIMD container
6263 for MODE within a vector of WIDTH bits. */
6264 static enum machine_mode
6265 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6266 {
6267 gcc_assert (width == 64 || width == 128);
6268 if (TARGET_SIMD)
6269 {
6270 if (width == 128)
6271 switch (mode)
6272 {
6273 case DFmode:
6274 return V2DFmode;
6275 case SFmode:
6276 return V4SFmode;
6277 case SImode:
6278 return V4SImode;
6279 case HImode:
6280 return V8HImode;
6281 case QImode:
6282 return V16QImode;
6283 case DImode:
6284 return V2DImode;
6285 default:
6286 break;
6287 }
6288 else
6289 switch (mode)
6290 {
6291 case SFmode:
6292 return V2SFmode;
6293 case SImode:
6294 return V2SImode;
6295 case HImode:
6296 return V4HImode;
6297 case QImode:
6298 return V8QImode;
6299 default:
6300 break;
6301 }
6302 }
6303 return word_mode;
6304 }
6305
6306 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6307 static enum machine_mode
6308 aarch64_preferred_simd_mode (enum machine_mode mode)
6309 {
6310 return aarch64_simd_container_mode (mode, 128);
6311 }
6312
6313 /* Return the bitmask of possible vector sizes for the vectorizer
6314 to iterate over. */
6315 static unsigned int
6316 aarch64_autovectorize_vector_sizes (void)
6317 {
6318 return (16 | 8);
6319 }
6320
6321 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6322 vector types in order to conform to the AAPCS64 (see "Procedure
6323 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6324 qualify for emission with the mangled names defined in that document,
6325 a vector type must not only be of the correct mode but also be
6326 composed of AdvSIMD vector element types (e.g.
6327 _builtin_aarch64_simd_qi); these types are registered by
6328 aarch64_init_simd_builtins (). In other words, vector types defined
6329 in other ways e.g. via vector_size attribute will get default
6330 mangled names. */
6331 typedef struct
6332 {
6333 enum machine_mode mode;
6334 const char *element_type_name;
6335 const char *mangled_name;
6336 } aarch64_simd_mangle_map_entry;
6337
6338 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6339 /* 64-bit containerized types. */
6340 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6341 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6342 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6343 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6344 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6345 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6346 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6347 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6348 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6349 /* 128-bit containerized types. */
6350 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6351 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6352 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6353 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6354 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6355 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6356 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6357 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6358 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6359 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6360 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6361 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6362 { VOIDmode, NULL, NULL }
6363 };
6364
6365 /* Implement TARGET_MANGLE_TYPE. */
6366
6367 static const char *
6368 aarch64_mangle_type (const_tree type)
6369 {
6370 /* The AArch64 ABI documents say that "__va_list" has to be
6371 managled as if it is in the "std" namespace. */
6372 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6373 return "St9__va_list";
6374
6375 /* Check the mode of the vector type, and the name of the vector
6376 element type, against the table. */
6377 if (TREE_CODE (type) == VECTOR_TYPE)
6378 {
6379 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6380
6381 while (pos->mode != VOIDmode)
6382 {
6383 tree elt_type = TREE_TYPE (type);
6384
6385 if (pos->mode == TYPE_MODE (type)
6386 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6387 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6388 pos->element_type_name))
6389 return pos->mangled_name;
6390
6391 pos++;
6392 }
6393 }
6394
6395 /* Use the default mangling. */
6396 return NULL;
6397 }
6398
6399 /* Return the equivalent letter for size. */
6400 static char
6401 sizetochar (int size)
6402 {
6403 switch (size)
6404 {
6405 case 64: return 'd';
6406 case 32: return 's';
6407 case 16: return 'h';
6408 case 8 : return 'b';
6409 default: gcc_unreachable ();
6410 }
6411 }
6412
6413 /* Return true iff x is a uniform vector of floating-point
6414 constants, and the constant can be represented in
6415 quarter-precision form. Note, as aarch64_float_const_representable
6416 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6417 static bool
6418 aarch64_vect_float_const_representable_p (rtx x)
6419 {
6420 int i = 0;
6421 REAL_VALUE_TYPE r0, ri;
6422 rtx x0, xi;
6423
6424 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6425 return false;
6426
6427 x0 = CONST_VECTOR_ELT (x, 0);
6428 if (!CONST_DOUBLE_P (x0))
6429 return false;
6430
6431 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6432
6433 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6434 {
6435 xi = CONST_VECTOR_ELT (x, i);
6436 if (!CONST_DOUBLE_P (xi))
6437 return false;
6438
6439 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6440 if (!REAL_VALUES_EQUAL (r0, ri))
6441 return false;
6442 }
6443
6444 return aarch64_float_const_representable_p (x0);
6445 }
6446
6447 /* Return true for valid and false for invalid. */
6448 bool
6449 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6450 struct simd_immediate_info *info)
6451 {
6452 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6453 matches = 1; \
6454 for (i = 0; i < idx; i += (STRIDE)) \
6455 if (!(TEST)) \
6456 matches = 0; \
6457 if (matches) \
6458 { \
6459 immtype = (CLASS); \
6460 elsize = (ELSIZE); \
6461 eshift = (SHIFT); \
6462 emvn = (NEG); \
6463 break; \
6464 }
6465
6466 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6467 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6468 unsigned char bytes[16];
6469 int immtype = -1, matches;
6470 unsigned int invmask = inverse ? 0xff : 0;
6471 int eshift, emvn;
6472
6473 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6474 {
6475 if (! (aarch64_simd_imm_zero_p (op, mode)
6476 || aarch64_vect_float_const_representable_p (op)))
6477 return false;
6478
6479 if (info)
6480 {
6481 info->value = CONST_VECTOR_ELT (op, 0);
6482 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6483 info->mvn = false;
6484 info->shift = 0;
6485 }
6486
6487 return true;
6488 }
6489
6490 /* Splat vector constant out into a byte vector. */
6491 for (i = 0; i < n_elts; i++)
6492 {
6493 rtx el = CONST_VECTOR_ELT (op, i);
6494 unsigned HOST_WIDE_INT elpart;
6495 unsigned int part, parts;
6496
6497 if (GET_CODE (el) == CONST_INT)
6498 {
6499 elpart = INTVAL (el);
6500 parts = 1;
6501 }
6502 else if (GET_CODE (el) == CONST_DOUBLE)
6503 {
6504 elpart = CONST_DOUBLE_LOW (el);
6505 parts = 2;
6506 }
6507 else
6508 gcc_unreachable ();
6509
6510 for (part = 0; part < parts; part++)
6511 {
6512 unsigned int byte;
6513 for (byte = 0; byte < innersize; byte++)
6514 {
6515 bytes[idx++] = (elpart & 0xff) ^ invmask;
6516 elpart >>= BITS_PER_UNIT;
6517 }
6518 if (GET_CODE (el) == CONST_DOUBLE)
6519 elpart = CONST_DOUBLE_HIGH (el);
6520 }
6521 }
6522
6523 /* Sanity check. */
6524 gcc_assert (idx == GET_MODE_SIZE (mode));
6525
6526 do
6527 {
6528 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6529 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6530
6531 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6532 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6533
6534 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6535 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6536
6537 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6538 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6539
6540 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6541
6542 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6543
6544 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6545 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6546
6547 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6548 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6549
6550 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6551 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6552
6553 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6554 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6555
6556 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6557
6558 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6559
6560 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6561 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6562
6563 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6564 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6565
6566 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6567 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6568
6569 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6570 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6571
6572 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6573
6574 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6575 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6576 }
6577 while (0);
6578
6579 if (immtype == -1)
6580 return false;
6581
6582 if (info)
6583 {
6584 info->element_width = elsize;
6585 info->mvn = emvn != 0;
6586 info->shift = eshift;
6587
6588 unsigned HOST_WIDE_INT imm = 0;
6589
6590 if (immtype >= 12 && immtype <= 15)
6591 info->msl = true;
6592
6593 /* Un-invert bytes of recognized vector, if necessary. */
6594 if (invmask != 0)
6595 for (i = 0; i < idx; i++)
6596 bytes[i] ^= invmask;
6597
6598 if (immtype == 17)
6599 {
6600 /* FIXME: Broken on 32-bit H_W_I hosts. */
6601 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6602
6603 for (i = 0; i < 8; i++)
6604 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6605 << (i * BITS_PER_UNIT);
6606
6607
6608 info->value = GEN_INT (imm);
6609 }
6610 else
6611 {
6612 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6613 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6614
6615 /* Construct 'abcdefgh' because the assembler cannot handle
6616 generic constants. */
6617 if (info->mvn)
6618 imm = ~imm;
6619 imm = (imm >> info->shift) & 0xff;
6620 info->value = GEN_INT (imm);
6621 }
6622 }
6623
6624 return true;
6625 #undef CHECK
6626 }
6627
6628 static bool
6629 aarch64_const_vec_all_same_int_p (rtx x,
6630 HOST_WIDE_INT minval,
6631 HOST_WIDE_INT maxval)
6632 {
6633 HOST_WIDE_INT firstval;
6634 int count, i;
6635
6636 if (GET_CODE (x) != CONST_VECTOR
6637 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6638 return false;
6639
6640 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6641 if (firstval < minval || firstval > maxval)
6642 return false;
6643
6644 count = CONST_VECTOR_NUNITS (x);
6645 for (i = 1; i < count; i++)
6646 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6647 return false;
6648
6649 return true;
6650 }
6651
6652 /* Check of immediate shift constants are within range. */
6653 bool
6654 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6655 {
6656 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6657 if (left)
6658 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6659 else
6660 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6661 }
6662
6663 /* Return true if X is a uniform vector where all elements
6664 are either the floating-point constant 0.0 or the
6665 integer constant 0. */
6666 bool
6667 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6668 {
6669 return x == CONST0_RTX (mode);
6670 }
6671
6672 bool
6673 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6674 {
6675 HOST_WIDE_INT imm = INTVAL (x);
6676 int i;
6677
6678 for (i = 0; i < 8; i++)
6679 {
6680 unsigned int byte = imm & 0xff;
6681 if (byte != 0xff && byte != 0)
6682 return false;
6683 imm >>= 8;
6684 }
6685
6686 return true;
6687 }
6688
6689 bool
6690 aarch64_mov_operand_p (rtx x,
6691 enum aarch64_symbol_context context,
6692 enum machine_mode mode)
6693 {
6694 if (GET_CODE (x) == HIGH
6695 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6696 return true;
6697
6698 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6699 return true;
6700
6701 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6702 return true;
6703
6704 return aarch64_classify_symbolic_expression (x, context)
6705 == SYMBOL_TINY_ABSOLUTE;
6706 }
6707
6708 /* Return a const_int vector of VAL. */
6709 rtx
6710 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6711 {
6712 int nunits = GET_MODE_NUNITS (mode);
6713 rtvec v = rtvec_alloc (nunits);
6714 int i;
6715
6716 for (i=0; i < nunits; i++)
6717 RTVEC_ELT (v, i) = GEN_INT (val);
6718
6719 return gen_rtx_CONST_VECTOR (mode, v);
6720 }
6721
6722 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6723
6724 bool
6725 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6726 {
6727 enum machine_mode vmode;
6728
6729 gcc_assert (!VECTOR_MODE_P (mode));
6730 vmode = aarch64_preferred_simd_mode (mode);
6731 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6732 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6733 }
6734
6735 /* Construct and return a PARALLEL RTX vector. */
6736 rtx
6737 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6738 {
6739 int nunits = GET_MODE_NUNITS (mode);
6740 rtvec v = rtvec_alloc (nunits / 2);
6741 int base = high ? nunits / 2 : 0;
6742 rtx t1;
6743 int i;
6744
6745 for (i=0; i < nunits / 2; i++)
6746 RTVEC_ELT (v, i) = GEN_INT (base + i);
6747
6748 t1 = gen_rtx_PARALLEL (mode, v);
6749 return t1;
6750 }
6751
6752 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6753 HIGH (exclusive). */
6754 void
6755 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6756 {
6757 HOST_WIDE_INT lane;
6758 gcc_assert (GET_CODE (operand) == CONST_INT);
6759 lane = INTVAL (operand);
6760
6761 if (lane < low || lane >= high)
6762 error ("lane out of range");
6763 }
6764
6765 void
6766 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6767 {
6768 gcc_assert (GET_CODE (operand) == CONST_INT);
6769 HOST_WIDE_INT lane = INTVAL (operand);
6770
6771 if (lane < low || lane >= high)
6772 error ("constant out of range");
6773 }
6774
6775 /* Emit code to reinterpret one AdvSIMD type as another,
6776 without altering bits. */
6777 void
6778 aarch64_simd_reinterpret (rtx dest, rtx src)
6779 {
6780 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6781 }
6782
6783 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6784 registers). */
6785 void
6786 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6787 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6788 rtx op1)
6789 {
6790 rtx mem = gen_rtx_MEM (mode, destaddr);
6791 rtx tmp1 = gen_reg_rtx (mode);
6792 rtx tmp2 = gen_reg_rtx (mode);
6793
6794 emit_insn (intfn (tmp1, op1, tmp2));
6795
6796 emit_move_insn (mem, tmp1);
6797 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6798 emit_move_insn (mem, tmp2);
6799 }
6800
6801 /* Return TRUE if OP is a valid vector addressing mode. */
6802 bool
6803 aarch64_simd_mem_operand_p (rtx op)
6804 {
6805 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6806 || GET_CODE (XEXP (op, 0)) == REG);
6807 }
6808
6809 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6810 not to early-clobber SRC registers in the process.
6811
6812 We assume that the operands described by SRC and DEST represent a
6813 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6814 number of components into which the copy has been decomposed. */
6815 void
6816 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6817 rtx *src, unsigned int count)
6818 {
6819 unsigned int i;
6820
6821 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6822 || REGNO (operands[0]) < REGNO (operands[1]))
6823 {
6824 for (i = 0; i < count; i++)
6825 {
6826 operands[2 * i] = dest[i];
6827 operands[2 * i + 1] = src[i];
6828 }
6829 }
6830 else
6831 {
6832 for (i = 0; i < count; i++)
6833 {
6834 operands[2 * i] = dest[count - i - 1];
6835 operands[2 * i + 1] = src[count - i - 1];
6836 }
6837 }
6838 }
6839
6840 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6841 one of VSTRUCT modes: OI, CI or XI. */
6842 int
6843 aarch64_simd_attr_length_move (rtx insn)
6844 {
6845 enum machine_mode mode;
6846
6847 extract_insn_cached (insn);
6848
6849 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6850 {
6851 mode = GET_MODE (recog_data.operand[0]);
6852 switch (mode)
6853 {
6854 case OImode:
6855 return 8;
6856 case CImode:
6857 return 12;
6858 case XImode:
6859 return 16;
6860 default:
6861 gcc_unreachable ();
6862 }
6863 }
6864 return 4;
6865 }
6866
6867 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6868 alignment of a vector to 128 bits. */
6869 static HOST_WIDE_INT
6870 aarch64_simd_vector_alignment (const_tree type)
6871 {
6872 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6873 return MIN (align, 128);
6874 }
6875
6876 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6877 static bool
6878 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6879 {
6880 if (is_packed)
6881 return false;
6882
6883 /* We guarantee alignment for vectors up to 128-bits. */
6884 if (tree_int_cst_compare (TYPE_SIZE (type),
6885 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6886 return false;
6887
6888 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6889 return true;
6890 }
6891
6892 /* If VALS is a vector constant that can be loaded into a register
6893 using DUP, generate instructions to do so and return an RTX to
6894 assign to the register. Otherwise return NULL_RTX. */
6895 static rtx
6896 aarch64_simd_dup_constant (rtx vals)
6897 {
6898 enum machine_mode mode = GET_MODE (vals);
6899 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6900 int n_elts = GET_MODE_NUNITS (mode);
6901 bool all_same = true;
6902 rtx x;
6903 int i;
6904
6905 if (GET_CODE (vals) != CONST_VECTOR)
6906 return NULL_RTX;
6907
6908 for (i = 1; i < n_elts; ++i)
6909 {
6910 x = CONST_VECTOR_ELT (vals, i);
6911 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6912 all_same = false;
6913 }
6914
6915 if (!all_same)
6916 return NULL_RTX;
6917
6918 /* We can load this constant by using DUP and a constant in a
6919 single ARM register. This will be cheaper than a vector
6920 load. */
6921 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6922 return gen_rtx_VEC_DUPLICATE (mode, x);
6923 }
6924
6925
6926 /* Generate code to load VALS, which is a PARALLEL containing only
6927 constants (for vec_init) or CONST_VECTOR, efficiently into a
6928 register. Returns an RTX to copy into the register, or NULL_RTX
6929 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6930 static rtx
6931 aarch64_simd_make_constant (rtx vals)
6932 {
6933 enum machine_mode mode = GET_MODE (vals);
6934 rtx const_dup;
6935 rtx const_vec = NULL_RTX;
6936 int n_elts = GET_MODE_NUNITS (mode);
6937 int n_const = 0;
6938 int i;
6939
6940 if (GET_CODE (vals) == CONST_VECTOR)
6941 const_vec = vals;
6942 else if (GET_CODE (vals) == PARALLEL)
6943 {
6944 /* A CONST_VECTOR must contain only CONST_INTs and
6945 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6946 Only store valid constants in a CONST_VECTOR. */
6947 for (i = 0; i < n_elts; ++i)
6948 {
6949 rtx x = XVECEXP (vals, 0, i);
6950 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6951 n_const++;
6952 }
6953 if (n_const == n_elts)
6954 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6955 }
6956 else
6957 gcc_unreachable ();
6958
6959 if (const_vec != NULL_RTX
6960 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6961 /* Load using MOVI/MVNI. */
6962 return const_vec;
6963 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6964 /* Loaded using DUP. */
6965 return const_dup;
6966 else if (const_vec != NULL_RTX)
6967 /* Load from constant pool. We can not take advantage of single-cycle
6968 LD1 because we need a PC-relative addressing mode. */
6969 return const_vec;
6970 else
6971 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6972 We can not construct an initializer. */
6973 return NULL_RTX;
6974 }
6975
6976 void
6977 aarch64_expand_vector_init (rtx target, rtx vals)
6978 {
6979 enum machine_mode mode = GET_MODE (target);
6980 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6981 int n_elts = GET_MODE_NUNITS (mode);
6982 int n_var = 0, one_var = -1;
6983 bool all_same = true;
6984 rtx x, mem;
6985 int i;
6986
6987 x = XVECEXP (vals, 0, 0);
6988 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6989 n_var = 1, one_var = 0;
6990
6991 for (i = 1; i < n_elts; ++i)
6992 {
6993 x = XVECEXP (vals, 0, i);
6994 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6995 ++n_var, one_var = i;
6996
6997 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6998 all_same = false;
6999 }
7000
7001 if (n_var == 0)
7002 {
7003 rtx constant = aarch64_simd_make_constant (vals);
7004 if (constant != NULL_RTX)
7005 {
7006 emit_move_insn (target, constant);
7007 return;
7008 }
7009 }
7010
7011 /* Splat a single non-constant element if we can. */
7012 if (all_same)
7013 {
7014 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7015 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7016 return;
7017 }
7018
7019 /* One field is non-constant. Load constant then overwrite varying
7020 field. This is more efficient than using the stack. */
7021 if (n_var == 1)
7022 {
7023 rtx copy = copy_rtx (vals);
7024 rtx index = GEN_INT (one_var);
7025 enum insn_code icode;
7026
7027 /* Load constant part of vector, substitute neighboring value for
7028 varying element. */
7029 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7030 aarch64_expand_vector_init (target, copy);
7031
7032 /* Insert variable. */
7033 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7034 icode = optab_handler (vec_set_optab, mode);
7035 gcc_assert (icode != CODE_FOR_nothing);
7036 emit_insn (GEN_FCN (icode) (target, x, index));
7037 return;
7038 }
7039
7040 /* Construct the vector in memory one field at a time
7041 and load the whole vector. */
7042 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7043 for (i = 0; i < n_elts; i++)
7044 emit_move_insn (adjust_address_nv (mem, inner_mode,
7045 i * GET_MODE_SIZE (inner_mode)),
7046 XVECEXP (vals, 0, i));
7047 emit_move_insn (target, mem);
7048
7049 }
7050
7051 static unsigned HOST_WIDE_INT
7052 aarch64_shift_truncation_mask (enum machine_mode mode)
7053 {
7054 return
7055 (aarch64_vector_mode_supported_p (mode)
7056 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7057 }
7058
7059 #ifndef TLS_SECTION_ASM_FLAG
7060 #define TLS_SECTION_ASM_FLAG 'T'
7061 #endif
7062
7063 void
7064 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7065 tree decl ATTRIBUTE_UNUSED)
7066 {
7067 char flagchars[10], *f = flagchars;
7068
7069 /* If we have already declared this section, we can use an
7070 abbreviated form to switch back to it -- unless this section is
7071 part of a COMDAT groups, in which case GAS requires the full
7072 declaration every time. */
7073 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7074 && (flags & SECTION_DECLARED))
7075 {
7076 fprintf (asm_out_file, "\t.section\t%s\n", name);
7077 return;
7078 }
7079
7080 if (!(flags & SECTION_DEBUG))
7081 *f++ = 'a';
7082 if (flags & SECTION_WRITE)
7083 *f++ = 'w';
7084 if (flags & SECTION_CODE)
7085 *f++ = 'x';
7086 if (flags & SECTION_SMALL)
7087 *f++ = 's';
7088 if (flags & SECTION_MERGE)
7089 *f++ = 'M';
7090 if (flags & SECTION_STRINGS)
7091 *f++ = 'S';
7092 if (flags & SECTION_TLS)
7093 *f++ = TLS_SECTION_ASM_FLAG;
7094 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7095 *f++ = 'G';
7096 *f = '\0';
7097
7098 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7099
7100 if (!(flags & SECTION_NOTYPE))
7101 {
7102 const char *type;
7103 const char *format;
7104
7105 if (flags & SECTION_BSS)
7106 type = "nobits";
7107 else
7108 type = "progbits";
7109
7110 #ifdef TYPE_OPERAND_FMT
7111 format = "," TYPE_OPERAND_FMT;
7112 #else
7113 format = ",@%s";
7114 #endif
7115
7116 fprintf (asm_out_file, format, type);
7117
7118 if (flags & SECTION_ENTSIZE)
7119 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7120 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7121 {
7122 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7123 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7124 else
7125 fprintf (asm_out_file, ",%s,comdat",
7126 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7127 }
7128 }
7129
7130 putc ('\n', asm_out_file);
7131 }
7132
7133 /* Select a format to encode pointers in exception handling data. */
7134 int
7135 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7136 {
7137 int type;
7138 switch (aarch64_cmodel)
7139 {
7140 case AARCH64_CMODEL_TINY:
7141 case AARCH64_CMODEL_TINY_PIC:
7142 case AARCH64_CMODEL_SMALL:
7143 case AARCH64_CMODEL_SMALL_PIC:
7144 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7145 for everything. */
7146 type = DW_EH_PE_sdata4;
7147 break;
7148 default:
7149 /* No assumptions here. 8-byte relocs required. */
7150 type = DW_EH_PE_sdata8;
7151 break;
7152 }
7153 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7154 }
7155
7156 /* Emit load exclusive. */
7157
7158 static void
7159 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7160 rtx mem, rtx model_rtx)
7161 {
7162 rtx (*gen) (rtx, rtx, rtx);
7163
7164 switch (mode)
7165 {
7166 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7167 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7168 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7169 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7170 default:
7171 gcc_unreachable ();
7172 }
7173
7174 emit_insn (gen (rval, mem, model_rtx));
7175 }
7176
7177 /* Emit store exclusive. */
7178
7179 static void
7180 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7181 rtx rval, rtx mem, rtx model_rtx)
7182 {
7183 rtx (*gen) (rtx, rtx, rtx, rtx);
7184
7185 switch (mode)
7186 {
7187 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7188 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7189 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7190 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7191 default:
7192 gcc_unreachable ();
7193 }
7194
7195 emit_insn (gen (bval, rval, mem, model_rtx));
7196 }
7197
7198 /* Mark the previous jump instruction as unlikely. */
7199
7200 static void
7201 aarch64_emit_unlikely_jump (rtx insn)
7202 {
7203 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7204
7205 insn = emit_jump_insn (insn);
7206 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7207 }
7208
7209 /* Expand a compare and swap pattern. */
7210
7211 void
7212 aarch64_expand_compare_and_swap (rtx operands[])
7213 {
7214 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7215 enum machine_mode mode, cmp_mode;
7216 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7217
7218 bval = operands[0];
7219 rval = operands[1];
7220 mem = operands[2];
7221 oldval = operands[3];
7222 newval = operands[4];
7223 is_weak = operands[5];
7224 mod_s = operands[6];
7225 mod_f = operands[7];
7226 mode = GET_MODE (mem);
7227 cmp_mode = mode;
7228
7229 /* Normally the succ memory model must be stronger than fail, but in the
7230 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7231 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7232
7233 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7234 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7235 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7236
7237 switch (mode)
7238 {
7239 case QImode:
7240 case HImode:
7241 /* For short modes, we're going to perform the comparison in SImode,
7242 so do the zero-extension now. */
7243 cmp_mode = SImode;
7244 rval = gen_reg_rtx (SImode);
7245 oldval = convert_modes (SImode, mode, oldval, true);
7246 /* Fall through. */
7247
7248 case SImode:
7249 case DImode:
7250 /* Force the value into a register if needed. */
7251 if (!aarch64_plus_operand (oldval, mode))
7252 oldval = force_reg (cmp_mode, oldval);
7253 break;
7254
7255 default:
7256 gcc_unreachable ();
7257 }
7258
7259 switch (mode)
7260 {
7261 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7262 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7263 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7264 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7265 default:
7266 gcc_unreachable ();
7267 }
7268
7269 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7270
7271 if (mode == QImode || mode == HImode)
7272 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7273
7274 x = gen_rtx_REG (CCmode, CC_REGNUM);
7275 x = gen_rtx_EQ (SImode, x, const0_rtx);
7276 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7277 }
7278
7279 /* Split a compare and swap pattern. */
7280
7281 void
7282 aarch64_split_compare_and_swap (rtx operands[])
7283 {
7284 rtx rval, mem, oldval, newval, scratch;
7285 enum machine_mode mode;
7286 bool is_weak;
7287 rtx label1, label2, x, cond;
7288
7289 rval = operands[0];
7290 mem = operands[1];
7291 oldval = operands[2];
7292 newval = operands[3];
7293 is_weak = (operands[4] != const0_rtx);
7294 scratch = operands[7];
7295 mode = GET_MODE (mem);
7296
7297 label1 = NULL_RTX;
7298 if (!is_weak)
7299 {
7300 label1 = gen_label_rtx ();
7301 emit_label (label1);
7302 }
7303 label2 = gen_label_rtx ();
7304
7305 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7306
7307 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7308 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7309 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7310 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7311 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7312
7313 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7314
7315 if (!is_weak)
7316 {
7317 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7318 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7319 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7320 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7321 }
7322 else
7323 {
7324 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7325 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7326 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7327 }
7328
7329 emit_label (label2);
7330 }
7331
7332 /* Split an atomic operation. */
7333
7334 void
7335 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7336 rtx value, rtx model_rtx, rtx cond)
7337 {
7338 enum machine_mode mode = GET_MODE (mem);
7339 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7340 rtx label, x;
7341
7342 label = gen_label_rtx ();
7343 emit_label (label);
7344
7345 if (new_out)
7346 new_out = gen_lowpart (wmode, new_out);
7347 if (old_out)
7348 old_out = gen_lowpart (wmode, old_out);
7349 else
7350 old_out = new_out;
7351 value = simplify_gen_subreg (wmode, value, mode, 0);
7352
7353 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7354
7355 switch (code)
7356 {
7357 case SET:
7358 new_out = value;
7359 break;
7360
7361 case NOT:
7362 x = gen_rtx_AND (wmode, old_out, value);
7363 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7364 x = gen_rtx_NOT (wmode, new_out);
7365 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7366 break;
7367
7368 case MINUS:
7369 if (CONST_INT_P (value))
7370 {
7371 value = GEN_INT (-INTVAL (value));
7372 code = PLUS;
7373 }
7374 /* Fall through. */
7375
7376 default:
7377 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7378 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7379 break;
7380 }
7381
7382 aarch64_emit_store_exclusive (mode, cond, mem,
7383 gen_lowpart (mode, new_out), model_rtx);
7384
7385 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7386 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7387 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7388 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7389 }
7390
7391 static void
7392 aarch64_print_extension (void)
7393 {
7394 const struct aarch64_option_extension *opt = NULL;
7395
7396 for (opt = all_extensions; opt->name != NULL; opt++)
7397 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7398 asm_fprintf (asm_out_file, "+%s", opt->name);
7399
7400 asm_fprintf (asm_out_file, "\n");
7401 }
7402
7403 static void
7404 aarch64_start_file (void)
7405 {
7406 if (selected_arch)
7407 {
7408 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7409 aarch64_print_extension ();
7410 }
7411 else if (selected_cpu)
7412 {
7413 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7414 aarch64_print_extension ();
7415 }
7416 default_file_start();
7417 }
7418
7419 /* Target hook for c_mode_for_suffix. */
7420 static enum machine_mode
7421 aarch64_c_mode_for_suffix (char suffix)
7422 {
7423 if (suffix == 'q')
7424 return TFmode;
7425
7426 return VOIDmode;
7427 }
7428
7429 /* We can only represent floating point constants which will fit in
7430 "quarter-precision" values. These values are characterised by
7431 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7432 by:
7433
7434 (-1)^s * (n/16) * 2^r
7435
7436 Where:
7437 's' is the sign bit.
7438 'n' is an integer in the range 16 <= n <= 31.
7439 'r' is an integer in the range -3 <= r <= 4. */
7440
7441 /* Return true iff X can be represented by a quarter-precision
7442 floating point immediate operand X. Note, we cannot represent 0.0. */
7443 bool
7444 aarch64_float_const_representable_p (rtx x)
7445 {
7446 /* This represents our current view of how many bits
7447 make up the mantissa. */
7448 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7449 int exponent;
7450 unsigned HOST_WIDE_INT mantissa, mask;
7451 REAL_VALUE_TYPE r, m;
7452 bool fail;
7453
7454 if (!CONST_DOUBLE_P (x))
7455 return false;
7456
7457 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7458
7459 /* We cannot represent infinities, NaNs or +/-zero. We won't
7460 know if we have +zero until we analyse the mantissa, but we
7461 can reject the other invalid values. */
7462 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7463 || REAL_VALUE_MINUS_ZERO (r))
7464 return false;
7465
7466 /* Extract exponent. */
7467 r = real_value_abs (&r);
7468 exponent = REAL_EXP (&r);
7469
7470 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7471 highest (sign) bit, with a fixed binary point at bit point_pos.
7472 m1 holds the low part of the mantissa, m2 the high part.
7473 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7474 bits for the mantissa, this can fail (low bits will be lost). */
7475 real_ldexp (&m, &r, point_pos - exponent);
7476 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
7477
7478 /* If the low part of the mantissa has bits set we cannot represent
7479 the value. */
7480 if (w.elt (0) != 0)
7481 return false;
7482 /* We have rejected the lower HOST_WIDE_INT, so update our
7483 understanding of how many bits lie in the mantissa and
7484 look only at the high HOST_WIDE_INT. */
7485 mantissa = w.elt (1);
7486 point_pos -= HOST_BITS_PER_WIDE_INT;
7487
7488 /* We can only represent values with a mantissa of the form 1.xxxx. */
7489 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7490 if ((mantissa & mask) != 0)
7491 return false;
7492
7493 /* Having filtered unrepresentable values, we may now remove all
7494 but the highest 5 bits. */
7495 mantissa >>= point_pos - 5;
7496
7497 /* We cannot represent the value 0.0, so reject it. This is handled
7498 elsewhere. */
7499 if (mantissa == 0)
7500 return false;
7501
7502 /* Then, as bit 4 is always set, we can mask it off, leaving
7503 the mantissa in the range [0, 15]. */
7504 mantissa &= ~(1 << 4);
7505 gcc_assert (mantissa <= 15);
7506
7507 /* GCC internally does not use IEEE754-like encoding (where normalized
7508 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7509 Our mantissa values are shifted 4 places to the left relative to
7510 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7511 by 5 places to correct for GCC's representation. */
7512 exponent = 5 - exponent;
7513
7514 return (exponent >= 0 && exponent <= 7);
7515 }
7516
7517 char*
7518 aarch64_output_simd_mov_immediate (rtx const_vector,
7519 enum machine_mode mode,
7520 unsigned width)
7521 {
7522 bool is_valid;
7523 static char templ[40];
7524 const char *mnemonic;
7525 const char *shift_op;
7526 unsigned int lane_count = 0;
7527 char element_char;
7528
7529 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7530
7531 /* This will return true to show const_vector is legal for use as either
7532 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7533 also update INFO to show how the immediate should be generated. */
7534 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7535 gcc_assert (is_valid);
7536
7537 element_char = sizetochar (info.element_width);
7538 lane_count = width / info.element_width;
7539
7540 mode = GET_MODE_INNER (mode);
7541 if (mode == SFmode || mode == DFmode)
7542 {
7543 gcc_assert (info.shift == 0 && ! info.mvn);
7544 if (aarch64_float_const_zero_rtx_p (info.value))
7545 info.value = GEN_INT (0);
7546 else
7547 {
7548 #define buf_size 20
7549 REAL_VALUE_TYPE r;
7550 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7551 char float_buf[buf_size] = {'\0'};
7552 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7553 #undef buf_size
7554
7555 if (lane_count == 1)
7556 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7557 else
7558 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7559 lane_count, element_char, float_buf);
7560 return templ;
7561 }
7562 }
7563
7564 mnemonic = info.mvn ? "mvni" : "movi";
7565 shift_op = info.msl ? "msl" : "lsl";
7566
7567 if (lane_count == 1)
7568 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7569 mnemonic, UINTVAL (info.value));
7570 else if (info.shift)
7571 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7572 ", %s %d", mnemonic, lane_count, element_char,
7573 UINTVAL (info.value), shift_op, info.shift);
7574 else
7575 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7576 mnemonic, lane_count, element_char, UINTVAL (info.value));
7577 return templ;
7578 }
7579
7580 char*
7581 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7582 enum machine_mode mode)
7583 {
7584 enum machine_mode vmode;
7585
7586 gcc_assert (!VECTOR_MODE_P (mode));
7587 vmode = aarch64_simd_container_mode (mode, 64);
7588 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7589 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7590 }
7591
7592 /* Split operands into moves from op[1] + op[2] into op[0]. */
7593
7594 void
7595 aarch64_split_combinev16qi (rtx operands[3])
7596 {
7597 unsigned int dest = REGNO (operands[0]);
7598 unsigned int src1 = REGNO (operands[1]);
7599 unsigned int src2 = REGNO (operands[2]);
7600 enum machine_mode halfmode = GET_MODE (operands[1]);
7601 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7602 rtx destlo, desthi;
7603
7604 gcc_assert (halfmode == V16QImode);
7605
7606 if (src1 == dest && src2 == dest + halfregs)
7607 {
7608 /* No-op move. Can't split to nothing; emit something. */
7609 emit_note (NOTE_INSN_DELETED);
7610 return;
7611 }
7612
7613 /* Preserve register attributes for variable tracking. */
7614 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7615 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7616 GET_MODE_SIZE (halfmode));
7617
7618 /* Special case of reversed high/low parts. */
7619 if (reg_overlap_mentioned_p (operands[2], destlo)
7620 && reg_overlap_mentioned_p (operands[1], desthi))
7621 {
7622 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7623 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7624 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7625 }
7626 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7627 {
7628 /* Try to avoid unnecessary moves if part of the result
7629 is in the right place already. */
7630 if (src1 != dest)
7631 emit_move_insn (destlo, operands[1]);
7632 if (src2 != dest + halfregs)
7633 emit_move_insn (desthi, operands[2]);
7634 }
7635 else
7636 {
7637 if (src2 != dest + halfregs)
7638 emit_move_insn (desthi, operands[2]);
7639 if (src1 != dest)
7640 emit_move_insn (destlo, operands[1]);
7641 }
7642 }
7643
7644 /* vec_perm support. */
7645
7646 #define MAX_VECT_LEN 16
7647
7648 struct expand_vec_perm_d
7649 {
7650 rtx target, op0, op1;
7651 unsigned char perm[MAX_VECT_LEN];
7652 enum machine_mode vmode;
7653 unsigned char nelt;
7654 bool one_vector_p;
7655 bool testing_p;
7656 };
7657
7658 /* Generate a variable permutation. */
7659
7660 static void
7661 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7662 {
7663 enum machine_mode vmode = GET_MODE (target);
7664 bool one_vector_p = rtx_equal_p (op0, op1);
7665
7666 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7667 gcc_checking_assert (GET_MODE (op0) == vmode);
7668 gcc_checking_assert (GET_MODE (op1) == vmode);
7669 gcc_checking_assert (GET_MODE (sel) == vmode);
7670 gcc_checking_assert (TARGET_SIMD);
7671
7672 if (one_vector_p)
7673 {
7674 if (vmode == V8QImode)
7675 {
7676 /* Expand the argument to a V16QI mode by duplicating it. */
7677 rtx pair = gen_reg_rtx (V16QImode);
7678 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7679 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7680 }
7681 else
7682 {
7683 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7684 }
7685 }
7686 else
7687 {
7688 rtx pair;
7689
7690 if (vmode == V8QImode)
7691 {
7692 pair = gen_reg_rtx (V16QImode);
7693 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7694 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7695 }
7696 else
7697 {
7698 pair = gen_reg_rtx (OImode);
7699 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7700 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7701 }
7702 }
7703 }
7704
7705 void
7706 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7707 {
7708 enum machine_mode vmode = GET_MODE (target);
7709 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7710 bool one_vector_p = rtx_equal_p (op0, op1);
7711 rtx rmask[MAX_VECT_LEN], mask;
7712
7713 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7714
7715 /* The TBL instruction does not use a modulo index, so we must take care
7716 of that ourselves. */
7717 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7718 for (i = 0; i < nelt; ++i)
7719 rmask[i] = mask;
7720 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7721 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7722
7723 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7724 }
7725
7726 /* Recognize patterns suitable for the TRN instructions. */
7727 static bool
7728 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7729 {
7730 unsigned int i, odd, mask, nelt = d->nelt;
7731 rtx out, in0, in1, x;
7732 rtx (*gen) (rtx, rtx, rtx);
7733 enum machine_mode vmode = d->vmode;
7734
7735 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7736 return false;
7737
7738 /* Note that these are little-endian tests.
7739 We correct for big-endian later. */
7740 if (d->perm[0] == 0)
7741 odd = 0;
7742 else if (d->perm[0] == 1)
7743 odd = 1;
7744 else
7745 return false;
7746 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7747
7748 for (i = 0; i < nelt; i += 2)
7749 {
7750 if (d->perm[i] != i + odd)
7751 return false;
7752 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7753 return false;
7754 }
7755
7756 /* Success! */
7757 if (d->testing_p)
7758 return true;
7759
7760 in0 = d->op0;
7761 in1 = d->op1;
7762 if (BYTES_BIG_ENDIAN)
7763 {
7764 x = in0, in0 = in1, in1 = x;
7765 odd = !odd;
7766 }
7767 out = d->target;
7768
7769 if (odd)
7770 {
7771 switch (vmode)
7772 {
7773 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7774 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7775 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7776 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7777 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7778 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7779 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7780 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7781 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7782 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7783 default:
7784 return false;
7785 }
7786 }
7787 else
7788 {
7789 switch (vmode)
7790 {
7791 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7792 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7793 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7794 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7795 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7796 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7797 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7798 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7799 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7800 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7801 default:
7802 return false;
7803 }
7804 }
7805
7806 emit_insn (gen (out, in0, in1));
7807 return true;
7808 }
7809
7810 /* Recognize patterns suitable for the UZP instructions. */
7811 static bool
7812 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7813 {
7814 unsigned int i, odd, mask, nelt = d->nelt;
7815 rtx out, in0, in1, x;
7816 rtx (*gen) (rtx, rtx, rtx);
7817 enum machine_mode vmode = d->vmode;
7818
7819 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7820 return false;
7821
7822 /* Note that these are little-endian tests.
7823 We correct for big-endian later. */
7824 if (d->perm[0] == 0)
7825 odd = 0;
7826 else if (d->perm[0] == 1)
7827 odd = 1;
7828 else
7829 return false;
7830 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7831
7832 for (i = 0; i < nelt; i++)
7833 {
7834 unsigned elt = (i * 2 + odd) & mask;
7835 if (d->perm[i] != elt)
7836 return false;
7837 }
7838
7839 /* Success! */
7840 if (d->testing_p)
7841 return true;
7842
7843 in0 = d->op0;
7844 in1 = d->op1;
7845 if (BYTES_BIG_ENDIAN)
7846 {
7847 x = in0, in0 = in1, in1 = x;
7848 odd = !odd;
7849 }
7850 out = d->target;
7851
7852 if (odd)
7853 {
7854 switch (vmode)
7855 {
7856 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7857 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7858 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7859 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7860 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7861 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7862 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7863 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7864 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7865 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7866 default:
7867 return false;
7868 }
7869 }
7870 else
7871 {
7872 switch (vmode)
7873 {
7874 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7875 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7876 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7877 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7878 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7879 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7880 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7881 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7882 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7883 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7884 default:
7885 return false;
7886 }
7887 }
7888
7889 emit_insn (gen (out, in0, in1));
7890 return true;
7891 }
7892
7893 /* Recognize patterns suitable for the ZIP instructions. */
7894 static bool
7895 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7896 {
7897 unsigned int i, high, mask, nelt = d->nelt;
7898 rtx out, in0, in1, x;
7899 rtx (*gen) (rtx, rtx, rtx);
7900 enum machine_mode vmode = d->vmode;
7901
7902 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7903 return false;
7904
7905 /* Note that these are little-endian tests.
7906 We correct for big-endian later. */
7907 high = nelt / 2;
7908 if (d->perm[0] == high)
7909 /* Do Nothing. */
7910 ;
7911 else if (d->perm[0] == 0)
7912 high = 0;
7913 else
7914 return false;
7915 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7916
7917 for (i = 0; i < nelt / 2; i++)
7918 {
7919 unsigned elt = (i + high) & mask;
7920 if (d->perm[i * 2] != elt)
7921 return false;
7922 elt = (elt + nelt) & mask;
7923 if (d->perm[i * 2 + 1] != elt)
7924 return false;
7925 }
7926
7927 /* Success! */
7928 if (d->testing_p)
7929 return true;
7930
7931 in0 = d->op0;
7932 in1 = d->op1;
7933 if (BYTES_BIG_ENDIAN)
7934 {
7935 x = in0, in0 = in1, in1 = x;
7936 high = !high;
7937 }
7938 out = d->target;
7939
7940 if (high)
7941 {
7942 switch (vmode)
7943 {
7944 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7945 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7946 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7947 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7948 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7949 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7950 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7951 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7952 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7953 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7954 default:
7955 return false;
7956 }
7957 }
7958 else
7959 {
7960 switch (vmode)
7961 {
7962 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7963 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7964 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7965 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7966 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7967 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7968 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7969 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7970 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7971 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7972 default:
7973 return false;
7974 }
7975 }
7976
7977 emit_insn (gen (out, in0, in1));
7978 return true;
7979 }
7980
7981 static bool
7982 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7983 {
7984 rtx (*gen) (rtx, rtx, rtx);
7985 rtx out = d->target;
7986 rtx in0;
7987 enum machine_mode vmode = d->vmode;
7988 unsigned int i, elt, nelt = d->nelt;
7989 rtx lane;
7990
7991 /* TODO: This may not be big-endian safe. */
7992 if (BYTES_BIG_ENDIAN)
7993 return false;
7994
7995 elt = d->perm[0];
7996 for (i = 1; i < nelt; i++)
7997 {
7998 if (elt != d->perm[i])
7999 return false;
8000 }
8001
8002 /* The generic preparation in aarch64_expand_vec_perm_const_1
8003 swaps the operand order and the permute indices if it finds
8004 d->perm[0] to be in the second operand. Thus, we can always
8005 use d->op0 and need not do any extra arithmetic to get the
8006 correct lane number. */
8007 in0 = d->op0;
8008 lane = GEN_INT (elt);
8009
8010 switch (vmode)
8011 {
8012 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8013 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8014 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8015 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8016 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8017 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8018 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8019 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8020 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8021 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8022 default:
8023 return false;
8024 }
8025
8026 emit_insn (gen (out, in0, lane));
8027 return true;
8028 }
8029
8030 static bool
8031 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8032 {
8033 rtx rperm[MAX_VECT_LEN], sel;
8034 enum machine_mode vmode = d->vmode;
8035 unsigned int i, nelt = d->nelt;
8036
8037 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8038 numbering of elements for big-endian, we must reverse the order. */
8039 if (BYTES_BIG_ENDIAN)
8040 return false;
8041
8042 if (d->testing_p)
8043 return true;
8044
8045 /* Generic code will try constant permutation twice. Once with the
8046 original mode and again with the elements lowered to QImode.
8047 So wait and don't do the selector expansion ourselves. */
8048 if (vmode != V8QImode && vmode != V16QImode)
8049 return false;
8050
8051 for (i = 0; i < nelt; ++i)
8052 rperm[i] = GEN_INT (d->perm[i]);
8053 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8054 sel = force_reg (vmode, sel);
8055
8056 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8057 return true;
8058 }
8059
8060 static bool
8061 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8062 {
8063 /* The pattern matching functions above are written to look for a small
8064 number to begin the sequence (0, 1, N/2). If we begin with an index
8065 from the second operand, we can swap the operands. */
8066 if (d->perm[0] >= d->nelt)
8067 {
8068 unsigned i, nelt = d->nelt;
8069 rtx x;
8070
8071 for (i = 0; i < nelt; ++i)
8072 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8073
8074 x = d->op0;
8075 d->op0 = d->op1;
8076 d->op1 = x;
8077 }
8078
8079 if (TARGET_SIMD)
8080 {
8081 if (aarch64_evpc_zip (d))
8082 return true;
8083 else if (aarch64_evpc_uzp (d))
8084 return true;
8085 else if (aarch64_evpc_trn (d))
8086 return true;
8087 else if (aarch64_evpc_dup (d))
8088 return true;
8089 return aarch64_evpc_tbl (d);
8090 }
8091 return false;
8092 }
8093
8094 /* Expand a vec_perm_const pattern. */
8095
8096 bool
8097 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8098 {
8099 struct expand_vec_perm_d d;
8100 int i, nelt, which;
8101
8102 d.target = target;
8103 d.op0 = op0;
8104 d.op1 = op1;
8105
8106 d.vmode = GET_MODE (target);
8107 gcc_assert (VECTOR_MODE_P (d.vmode));
8108 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8109 d.testing_p = false;
8110
8111 for (i = which = 0; i < nelt; ++i)
8112 {
8113 rtx e = XVECEXP (sel, 0, i);
8114 int ei = INTVAL (e) & (2 * nelt - 1);
8115 which |= (ei < nelt ? 1 : 2);
8116 d.perm[i] = ei;
8117 }
8118
8119 switch (which)
8120 {
8121 default:
8122 gcc_unreachable ();
8123
8124 case 3:
8125 d.one_vector_p = false;
8126 if (!rtx_equal_p (op0, op1))
8127 break;
8128
8129 /* The elements of PERM do not suggest that only the first operand
8130 is used, but both operands are identical. Allow easier matching
8131 of the permutation by folding the permutation into the single
8132 input vector. */
8133 /* Fall Through. */
8134 case 2:
8135 for (i = 0; i < nelt; ++i)
8136 d.perm[i] &= nelt - 1;
8137 d.op0 = op1;
8138 d.one_vector_p = true;
8139 break;
8140
8141 case 1:
8142 d.op1 = op0;
8143 d.one_vector_p = true;
8144 break;
8145 }
8146
8147 return aarch64_expand_vec_perm_const_1 (&d);
8148 }
8149
8150 static bool
8151 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8152 const unsigned char *sel)
8153 {
8154 struct expand_vec_perm_d d;
8155 unsigned int i, nelt, which;
8156 bool ret;
8157
8158 d.vmode = vmode;
8159 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8160 d.testing_p = true;
8161 memcpy (d.perm, sel, nelt);
8162
8163 /* Calculate whether all elements are in one vector. */
8164 for (i = which = 0; i < nelt; ++i)
8165 {
8166 unsigned char e = d.perm[i];
8167 gcc_assert (e < 2 * nelt);
8168 which |= (e < nelt ? 1 : 2);
8169 }
8170
8171 /* If all elements are from the second vector, reindex as if from the
8172 first vector. */
8173 if (which == 2)
8174 for (i = 0; i < nelt; ++i)
8175 d.perm[i] -= nelt;
8176
8177 /* Check whether the mask can be applied to a single vector. */
8178 d.one_vector_p = (which != 3);
8179
8180 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8181 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8182 if (!d.one_vector_p)
8183 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8184
8185 start_sequence ();
8186 ret = aarch64_expand_vec_perm_const_1 (&d);
8187 end_sequence ();
8188
8189 return ret;
8190 }
8191
8192 #undef TARGET_ADDRESS_COST
8193 #define TARGET_ADDRESS_COST aarch64_address_cost
8194
8195 /* This hook will determines whether unnamed bitfields affect the alignment
8196 of the containing structure. The hook returns true if the structure
8197 should inherit the alignment requirements of an unnamed bitfield's
8198 type. */
8199 #undef TARGET_ALIGN_ANON_BITFIELD
8200 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8201
8202 #undef TARGET_ASM_ALIGNED_DI_OP
8203 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8204
8205 #undef TARGET_ASM_ALIGNED_HI_OP
8206 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8207
8208 #undef TARGET_ASM_ALIGNED_SI_OP
8209 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8210
8211 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8212 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8213 hook_bool_const_tree_hwi_hwi_const_tree_true
8214
8215 #undef TARGET_ASM_FILE_START
8216 #define TARGET_ASM_FILE_START aarch64_start_file
8217
8218 #undef TARGET_ASM_OUTPUT_MI_THUNK
8219 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8220
8221 #undef TARGET_ASM_SELECT_RTX_SECTION
8222 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8223
8224 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8225 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8226
8227 #undef TARGET_BUILD_BUILTIN_VA_LIST
8228 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8229
8230 #undef TARGET_CALLEE_COPIES
8231 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8232
8233 #undef TARGET_CAN_ELIMINATE
8234 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8235
8236 #undef TARGET_CANNOT_FORCE_CONST_MEM
8237 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8238
8239 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8240 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8241
8242 /* Only the least significant bit is used for initialization guard
8243 variables. */
8244 #undef TARGET_CXX_GUARD_MASK_BIT
8245 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8246
8247 #undef TARGET_C_MODE_FOR_SUFFIX
8248 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8249
8250 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8251 #undef TARGET_DEFAULT_TARGET_FLAGS
8252 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8253 #endif
8254
8255 #undef TARGET_CLASS_MAX_NREGS
8256 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8257
8258 #undef TARGET_BUILTIN_DECL
8259 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8260
8261 #undef TARGET_EXPAND_BUILTIN
8262 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8263
8264 #undef TARGET_EXPAND_BUILTIN_VA_START
8265 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8266
8267 #undef TARGET_FOLD_BUILTIN
8268 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8269
8270 #undef TARGET_FUNCTION_ARG
8271 #define TARGET_FUNCTION_ARG aarch64_function_arg
8272
8273 #undef TARGET_FUNCTION_ARG_ADVANCE
8274 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8275
8276 #undef TARGET_FUNCTION_ARG_BOUNDARY
8277 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8278
8279 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8280 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8281
8282 #undef TARGET_FUNCTION_VALUE
8283 #define TARGET_FUNCTION_VALUE aarch64_function_value
8284
8285 #undef TARGET_FUNCTION_VALUE_REGNO_P
8286 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8287
8288 #undef TARGET_FRAME_POINTER_REQUIRED
8289 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8290
8291 #undef TARGET_GIMPLE_FOLD_BUILTIN
8292 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8293
8294 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8295 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8296
8297 #undef TARGET_INIT_BUILTINS
8298 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8299
8300 #undef TARGET_LEGITIMATE_ADDRESS_P
8301 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8302
8303 #undef TARGET_LEGITIMATE_CONSTANT_P
8304 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8305
8306 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8307 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8308
8309 #undef TARGET_LRA_P
8310 #define TARGET_LRA_P aarch64_lra_p
8311
8312 #undef TARGET_MANGLE_TYPE
8313 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8314
8315 #undef TARGET_MEMORY_MOVE_COST
8316 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8317
8318 #undef TARGET_MUST_PASS_IN_STACK
8319 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8320
8321 /* This target hook should return true if accesses to volatile bitfields
8322 should use the narrowest mode possible. It should return false if these
8323 accesses should use the bitfield container type. */
8324 #undef TARGET_NARROW_VOLATILE_BITFIELD
8325 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8326
8327 #undef TARGET_OPTION_OVERRIDE
8328 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8329
8330 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8331 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8332 aarch64_override_options_after_change
8333
8334 #undef TARGET_PASS_BY_REFERENCE
8335 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8336
8337 #undef TARGET_PREFERRED_RELOAD_CLASS
8338 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8339
8340 #undef TARGET_SECONDARY_RELOAD
8341 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8342
8343 #undef TARGET_SHIFT_TRUNCATION_MASK
8344 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8345
8346 #undef TARGET_SETUP_INCOMING_VARARGS
8347 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8348
8349 #undef TARGET_STRUCT_VALUE_RTX
8350 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8351
8352 #undef TARGET_REGISTER_MOVE_COST
8353 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8354
8355 #undef TARGET_RETURN_IN_MEMORY
8356 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8357
8358 #undef TARGET_RETURN_IN_MSB
8359 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8360
8361 #undef TARGET_RTX_COSTS
8362 #define TARGET_RTX_COSTS aarch64_rtx_costs
8363
8364 #undef TARGET_TRAMPOLINE_INIT
8365 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8366
8367 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8368 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8369
8370 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8371 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8372
8373 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8374 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8375
8376 #undef TARGET_VECTORIZE_ADD_STMT_COST
8377 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8378
8379 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8380 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8381 aarch64_builtin_vectorization_cost
8382
8383 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8384 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8385
8386 #undef TARGET_VECTORIZE_BUILTINS
8387 #define TARGET_VECTORIZE_BUILTINS
8388
8389 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8390 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8391 aarch64_builtin_vectorized_function
8392
8393 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8394 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8395 aarch64_autovectorize_vector_sizes
8396
8397 /* Section anchor support. */
8398
8399 #undef TARGET_MIN_ANCHOR_OFFSET
8400 #define TARGET_MIN_ANCHOR_OFFSET -256
8401
8402 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8403 byte offset; we can do much more for larger data types, but have no way
8404 to determine the size of the access. We assume accesses are aligned. */
8405 #undef TARGET_MAX_ANCHOR_OFFSET
8406 #define TARGET_MAX_ANCHOR_OFFSET 4095
8407
8408 #undef TARGET_VECTOR_ALIGNMENT
8409 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8410
8411 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8412 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8413 aarch64_simd_vector_alignment_reachable
8414
8415 /* vec_perm support. */
8416
8417 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8418 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8419 aarch64_vectorize_vec_perm_const_ok
8420
8421
8422 #undef TARGET_FIXED_CONDITION_CODE_REGS
8423 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8424
8425 struct gcc_target targetm = TARGET_INITIALIZER;
8426
8427 #include "gt-aarch64.h"