]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64.c
gimple.h: Remove all includes.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66
67 /* Defined for convenience. */
68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
69
70 /* Classifies an address.
71
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
74
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
77
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
80
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
83
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
86
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
89
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
92
93 enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
101 };
102
103 struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
109 };
110
111 struct simd_immediate_info
112 {
113 rtx value;
114 int shift;
115 int element_width;
116 bool mvn;
117 bool msl;
118 };
119
120 /* The current code model. */
121 enum aarch64_code_model aarch64_cmodel;
122
123 #ifdef HAVE_AS_TLS
124 #undef TARGET_HAVE_TLS
125 #define TARGET_HAVE_TLS 1
126 #endif
127
128 static bool aarch64_lra_p (void);
129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_override_options_after_change (void);
137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
138 static unsigned bit_count (unsigned HOST_WIDE_INT);
139 static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
141
142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
144
145 /* The processor for which instructions should be scheduled. */
146 enum aarch64_processor aarch64_tune = cortexa53;
147
148 /* The current tuning set. */
149 const struct tune_params *aarch64_tune_params;
150
151 /* Mask to specify which instructions we are allowed to generate. */
152 unsigned long aarch64_isa_flags = 0;
153
154 /* Mask to specify which instruction scheduling options should be used. */
155 unsigned long aarch64_tune_flags = 0;
156
157 /* Tuning parameters. */
158
159 #if HAVE_DESIGNATED_INITIALIZERS
160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161 #else
162 #define NAMED_PARAM(NAME, VAL) (VAL)
163 #endif
164
165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166 __extension__
167 #endif
168
169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 __extension__
171 #endif
172 static const struct cpu_addrcost_table generic_addrcost_table =
173 {
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
179 };
180
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct cpu_regmove_cost generic_regmove_cost =
185 {
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
193 };
194
195 /* Generic costs for vector insn classes. */
196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197 __extension__
198 #endif
199 static const struct cpu_vector_cost generic_vector_cost =
200 {
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213 };
214
215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 __extension__
217 #endif
218 static const struct tune_params generic_tunings =
219 {
220 &generic_extra_costs,
221 &generic_addrcost_table,
222 &generic_regmove_cost,
223 &generic_vector_cost,
224 NAMED_PARAM (memmov_cost, 4)
225 };
226
227 /* A processor implementing AArch64. */
228 struct processor
229 {
230 const char *const name;
231 enum aarch64_processor core;
232 const char *arch;
233 const unsigned long flags;
234 const struct tune_params *const tune;
235 };
236
237 /* Processor cores implementing AArch64. */
238 static const struct processor all_cores[] =
239 {
240 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
241 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
242 #include "aarch64-cores.def"
243 #undef AARCH64_CORE
244 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
245 {NULL, aarch64_none, NULL, 0, NULL}
246 };
247
248 /* Architectures implementing AArch64. */
249 static const struct processor all_architectures[] =
250 {
251 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
252 {NAME, CORE, #ARCH, FLAGS, NULL},
253 #include "aarch64-arches.def"
254 #undef AARCH64_ARCH
255 {NULL, aarch64_none, NULL, 0, NULL}
256 };
257
258 /* Target specification. These are populated as commandline arguments
259 are processed, or NULL if not specified. */
260 static const struct processor *selected_arch;
261 static const struct processor *selected_cpu;
262 static const struct processor *selected_tune;
263
264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
265
266 /* An ISA extension in the co-processor and main instruction set space. */
267 struct aarch64_option_extension
268 {
269 const char *const name;
270 const unsigned long flags_on;
271 const unsigned long flags_off;
272 };
273
274 /* ISA extensions in AArch64. */
275 static const struct aarch64_option_extension all_extensions[] =
276 {
277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
278 {NAME, FLAGS_ON, FLAGS_OFF},
279 #include "aarch64-option-extensions.def"
280 #undef AARCH64_OPT_EXTENSION
281 {NULL, 0, 0}
282 };
283
284 /* Used to track the size of an address when generating a pre/post
285 increment address. */
286 static enum machine_mode aarch64_memory_reference_mode;
287
288 /* Used to force GTY into this file. */
289 static GTY(()) int gty_dummy;
290
291 /* A table of valid AArch64 "bitmask immediate" values for
292 logical instructions. */
293
294 #define AARCH64_NUM_BITMASKS 5334
295 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
296
297 /* Did we set flag_omit_frame_pointer just so
298 aarch64_frame_pointer_required would be called? */
299 static bool faked_omit_frame_pointer;
300
301 typedef enum aarch64_cond_code
302 {
303 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
304 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
305 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
306 }
307 aarch64_cc;
308
309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
310
311 /* The condition codes of the processor, and the inverse function. */
312 static const char * const aarch64_condition_codes[] =
313 {
314 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
315 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
316 };
317
318 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
319 unsigned
320 aarch64_dbx_register_number (unsigned regno)
321 {
322 if (GP_REGNUM_P (regno))
323 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
324 else if (regno == SP_REGNUM)
325 return AARCH64_DWARF_SP;
326 else if (FP_REGNUM_P (regno))
327 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
328
329 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
330 equivalent DWARF register. */
331 return DWARF_FRAME_REGISTERS;
332 }
333
334 /* Return TRUE if MODE is any of the large INT modes. */
335 static bool
336 aarch64_vect_struct_mode_p (enum machine_mode mode)
337 {
338 return mode == OImode || mode == CImode || mode == XImode;
339 }
340
341 /* Return TRUE if MODE is any of the vector modes. */
342 static bool
343 aarch64_vector_mode_p (enum machine_mode mode)
344 {
345 return aarch64_vector_mode_supported_p (mode)
346 || aarch64_vect_struct_mode_p (mode);
347 }
348
349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
350 static bool
351 aarch64_array_mode_supported_p (enum machine_mode mode,
352 unsigned HOST_WIDE_INT nelems)
353 {
354 if (TARGET_SIMD
355 && AARCH64_VALID_SIMD_QREG_MODE (mode)
356 && (nelems >= 2 && nelems <= 4))
357 return true;
358
359 return false;
360 }
361
362 /* Implement HARD_REGNO_NREGS. */
363
364 int
365 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
366 {
367 switch (aarch64_regno_regclass (regno))
368 {
369 case FP_REGS:
370 case FP_LO_REGS:
371 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
372 default:
373 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
374 }
375 gcc_unreachable ();
376 }
377
378 /* Implement HARD_REGNO_MODE_OK. */
379
380 int
381 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
382 {
383 if (GET_MODE_CLASS (mode) == MODE_CC)
384 return regno == CC_REGNUM;
385
386 if (regno == SP_REGNUM)
387 /* The purpose of comparing with ptr_mode is to support the
388 global register variable associated with the stack pointer
389 register via the syntax of asm ("wsp") in ILP32. */
390 return mode == Pmode || mode == ptr_mode;
391
392 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
393 return mode == Pmode;
394
395 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
396 return 1;
397
398 if (FP_REGNUM_P (regno))
399 {
400 if (aarch64_vect_struct_mode_p (mode))
401 return
402 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
403 else
404 return 1;
405 }
406
407 return 0;
408 }
409
410 /* Return true if calls to DECL should be treated as
411 long-calls (ie called via a register). */
412 static bool
413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
414 {
415 return false;
416 }
417
418 /* Return true if calls to symbol-ref SYM should be treated as
419 long-calls (ie called via a register). */
420 bool
421 aarch64_is_long_call_p (rtx sym)
422 {
423 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
424 }
425
426 /* Return true if the offsets to a zero/sign-extract operation
427 represent an expression that matches an extend operation. The
428 operands represent the paramters from
429
430 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
431 bool
432 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
433 rtx extract_imm)
434 {
435 HOST_WIDE_INT mult_val, extract_val;
436
437 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
438 return false;
439
440 mult_val = INTVAL (mult_imm);
441 extract_val = INTVAL (extract_imm);
442
443 if (extract_val > 8
444 && extract_val < GET_MODE_BITSIZE (mode)
445 && exact_log2 (extract_val & ~7) > 0
446 && (extract_val & 7) <= 4
447 && mult_val == (1 << (extract_val & 7)))
448 return true;
449
450 return false;
451 }
452
453 /* Emit an insn that's a simple single-set. Both the operands must be
454 known to be valid. */
455 inline static rtx
456 emit_set_insn (rtx x, rtx y)
457 {
458 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
459 }
460
461 /* X and Y are two things to compare using CODE. Emit the compare insn and
462 return the rtx for register 0 in the proper mode. */
463 rtx
464 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
465 {
466 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
467 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
468
469 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
470 return cc_reg;
471 }
472
473 /* Build the SYMBOL_REF for __tls_get_addr. */
474
475 static GTY(()) rtx tls_get_addr_libfunc;
476
477 rtx
478 aarch64_tls_get_addr (void)
479 {
480 if (!tls_get_addr_libfunc)
481 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
482 return tls_get_addr_libfunc;
483 }
484
485 /* Return the TLS model to use for ADDR. */
486
487 static enum tls_model
488 tls_symbolic_operand_type (rtx addr)
489 {
490 enum tls_model tls_kind = TLS_MODEL_NONE;
491 rtx sym, addend;
492
493 if (GET_CODE (addr) == CONST)
494 {
495 split_const (addr, &sym, &addend);
496 if (GET_CODE (sym) == SYMBOL_REF)
497 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
498 }
499 else if (GET_CODE (addr) == SYMBOL_REF)
500 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
501
502 return tls_kind;
503 }
504
505 /* We'll allow lo_sum's in addresses in our legitimate addresses
506 so that combine would take care of combining addresses where
507 necessary, but for generation purposes, we'll generate the address
508 as :
509 RTL Absolute
510 tmp = hi (symbol_ref); adrp x1, foo
511 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
512 nop
513
514 PIC TLS
515 adrp x1, :got:foo adrp tmp, :tlsgd:foo
516 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
517 bl __tls_get_addr
518 nop
519
520 Load TLS symbol, depending on TLS mechanism and TLS access model.
521
522 Global Dynamic - Traditional TLS:
523 adrp tmp, :tlsgd:imm
524 add dest, tmp, #:tlsgd_lo12:imm
525 bl __tls_get_addr
526
527 Global Dynamic - TLS Descriptors:
528 adrp dest, :tlsdesc:imm
529 ldr tmp, [dest, #:tlsdesc_lo12:imm]
530 add dest, dest, #:tlsdesc_lo12:imm
531 blr tmp
532 mrs tp, tpidr_el0
533 add dest, dest, tp
534
535 Initial Exec:
536 mrs tp, tpidr_el0
537 adrp tmp, :gottprel:imm
538 ldr dest, [tmp, #:gottprel_lo12:imm]
539 add dest, dest, tp
540
541 Local Exec:
542 mrs tp, tpidr_el0
543 add t0, tp, #:tprel_hi12:imm
544 add t0, #:tprel_lo12_nc:imm
545 */
546
547 static void
548 aarch64_load_symref_appropriately (rtx dest, rtx imm,
549 enum aarch64_symbol_type type)
550 {
551 switch (type)
552 {
553 case SYMBOL_SMALL_ABSOLUTE:
554 {
555 /* In ILP32, the mode of dest can be either SImode or DImode. */
556 rtx tmp_reg = dest;
557 enum machine_mode mode = GET_MODE (dest);
558
559 gcc_assert (mode == Pmode || mode == ptr_mode);
560
561 if (can_create_pseudo_p ())
562 tmp_reg = gen_reg_rtx (mode);
563
564 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
565 emit_insn (gen_add_losym (dest, tmp_reg, imm));
566 return;
567 }
568
569 case SYMBOL_TINY_ABSOLUTE:
570 emit_insn (gen_rtx_SET (Pmode, dest, imm));
571 return;
572
573 case SYMBOL_SMALL_GOT:
574 {
575 /* In ILP32, the mode of dest can be either SImode or DImode,
576 while the got entry is always of SImode size. The mode of
577 dest depends on how dest is used: if dest is assigned to a
578 pointer (e.g. in the memory), it has SImode; it may have
579 DImode if dest is dereferenced to access the memeory.
580 This is why we have to handle three different ldr_got_small
581 patterns here (two patterns for ILP32). */
582 rtx tmp_reg = dest;
583 enum machine_mode mode = GET_MODE (dest);
584
585 if (can_create_pseudo_p ())
586 tmp_reg = gen_reg_rtx (mode);
587
588 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
589 if (mode == ptr_mode)
590 {
591 if (mode == DImode)
592 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
593 else
594 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
595 }
596 else
597 {
598 gcc_assert (mode == Pmode);
599 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
600 }
601
602 return;
603 }
604
605 case SYMBOL_SMALL_TLSGD:
606 {
607 rtx insns;
608 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
609
610 start_sequence ();
611 emit_call_insn (gen_tlsgd_small (result, imm));
612 insns = get_insns ();
613 end_sequence ();
614
615 RTL_CONST_CALL_P (insns) = 1;
616 emit_libcall_block (insns, dest, result, imm);
617 return;
618 }
619
620 case SYMBOL_SMALL_TLSDESC:
621 {
622 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
623 rtx tp;
624
625 emit_insn (gen_tlsdesc_small (imm));
626 tp = aarch64_load_tp (NULL);
627 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
629 return;
630 }
631
632 case SYMBOL_SMALL_GOTTPREL:
633 {
634 rtx tmp_reg = gen_reg_rtx (Pmode);
635 rtx tp = aarch64_load_tp (NULL);
636 emit_insn (gen_tlsie_small (tmp_reg, imm));
637 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
638 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
639 return;
640 }
641
642 case SYMBOL_SMALL_TPREL:
643 {
644 rtx tp = aarch64_load_tp (NULL);
645 emit_insn (gen_tlsle_small (dest, tp, imm));
646 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
647 return;
648 }
649
650 case SYMBOL_TINY_GOT:
651 emit_insn (gen_ldr_got_tiny (dest, imm));
652 return;
653
654 default:
655 gcc_unreachable ();
656 }
657 }
658
659 /* Emit a move from SRC to DEST. Assume that the move expanders can
660 handle all moves if !can_create_pseudo_p (). The distinction is
661 important because, unlike emit_move_insn, the move expanders know
662 how to force Pmode objects into the constant pool even when the
663 constant pool address is not itself legitimate. */
664 static rtx
665 aarch64_emit_move (rtx dest, rtx src)
666 {
667 return (can_create_pseudo_p ()
668 ? emit_move_insn (dest, src)
669 : emit_move_insn_1 (dest, src));
670 }
671
672 void
673 aarch64_split_128bit_move (rtx dst, rtx src)
674 {
675 rtx low_dst;
676
677 enum machine_mode src_mode = GET_MODE (src);
678 enum machine_mode dst_mode = GET_MODE (dst);
679 int src_regno = REGNO (src);
680 int dst_regno = REGNO (dst);
681
682 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
683
684 if (REG_P (dst) && REG_P (src))
685 {
686 gcc_assert (src_mode == TImode || src_mode == TFmode);
687
688 /* Handle r -> w, w -> r. */
689 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
690 {
691 switch (src_mode) {
692 case TImode:
693 emit_insn
694 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
695 emit_insn
696 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
697 return;
698 case TFmode:
699 emit_insn
700 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
701 emit_insn
702 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
703 return;
704 default:
705 gcc_unreachable ();
706 }
707 }
708 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
709 {
710 switch (src_mode) {
711 case TImode:
712 emit_insn
713 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
714 emit_insn
715 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
716 return;
717 case TFmode:
718 emit_insn
719 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
720 emit_insn
721 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
722 return;
723 default:
724 gcc_unreachable ();
725 }
726 }
727 /* Fall through to r -> r cases. */
728 }
729
730 switch (dst_mode) {
731 case TImode:
732 low_dst = gen_lowpart (word_mode, dst);
733 if (REG_P (low_dst)
734 && reg_overlap_mentioned_p (low_dst, src))
735 {
736 aarch64_emit_move (gen_highpart (word_mode, dst),
737 gen_highpart_mode (word_mode, TImode, src));
738 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
739 }
740 else
741 {
742 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
743 aarch64_emit_move (gen_highpart (word_mode, dst),
744 gen_highpart_mode (word_mode, TImode, src));
745 }
746 return;
747 case TFmode:
748 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
749 gen_rtx_REG (DFmode, src_regno));
750 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
751 gen_rtx_REG (DFmode, src_regno + 1));
752 return;
753 default:
754 gcc_unreachable ();
755 }
756 }
757
758 bool
759 aarch64_split_128bit_move_p (rtx dst, rtx src)
760 {
761 return (! REG_P (src)
762 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
763 }
764
765 /* Split a complex SIMD combine. */
766
767 void
768 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
769 {
770 enum machine_mode src_mode = GET_MODE (src1);
771 enum machine_mode dst_mode = GET_MODE (dst);
772
773 gcc_assert (VECTOR_MODE_P (dst_mode));
774
775 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
776 {
777 rtx (*gen) (rtx, rtx, rtx);
778
779 switch (src_mode)
780 {
781 case V8QImode:
782 gen = gen_aarch64_simd_combinev8qi;
783 break;
784 case V4HImode:
785 gen = gen_aarch64_simd_combinev4hi;
786 break;
787 case V2SImode:
788 gen = gen_aarch64_simd_combinev2si;
789 break;
790 case V2SFmode:
791 gen = gen_aarch64_simd_combinev2sf;
792 break;
793 case DImode:
794 gen = gen_aarch64_simd_combinedi;
795 break;
796 case DFmode:
797 gen = gen_aarch64_simd_combinedf;
798 break;
799 default:
800 gcc_unreachable ();
801 }
802
803 emit_insn (gen (dst, src1, src2));
804 return;
805 }
806 }
807
808 /* Split a complex SIMD move. */
809
810 void
811 aarch64_split_simd_move (rtx dst, rtx src)
812 {
813 enum machine_mode src_mode = GET_MODE (src);
814 enum machine_mode dst_mode = GET_MODE (dst);
815
816 gcc_assert (VECTOR_MODE_P (dst_mode));
817
818 if (REG_P (dst) && REG_P (src))
819 {
820 rtx (*gen) (rtx, rtx);
821
822 gcc_assert (VECTOR_MODE_P (src_mode));
823
824 switch (src_mode)
825 {
826 case V16QImode:
827 gen = gen_aarch64_split_simd_movv16qi;
828 break;
829 case V8HImode:
830 gen = gen_aarch64_split_simd_movv8hi;
831 break;
832 case V4SImode:
833 gen = gen_aarch64_split_simd_movv4si;
834 break;
835 case V2DImode:
836 gen = gen_aarch64_split_simd_movv2di;
837 break;
838 case V4SFmode:
839 gen = gen_aarch64_split_simd_movv4sf;
840 break;
841 case V2DFmode:
842 gen = gen_aarch64_split_simd_movv2df;
843 break;
844 default:
845 gcc_unreachable ();
846 }
847
848 emit_insn (gen (dst, src));
849 return;
850 }
851 }
852
853 static rtx
854 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
855 {
856 if (can_create_pseudo_p ())
857 return force_reg (mode, value);
858 else
859 {
860 x = aarch64_emit_move (x, value);
861 return x;
862 }
863 }
864
865
866 static rtx
867 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
868 {
869 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
870 {
871 rtx high;
872 /* Load the full offset into a register. This
873 might be improvable in the future. */
874 high = GEN_INT (offset);
875 offset = 0;
876 high = aarch64_force_temporary (mode, temp, high);
877 reg = aarch64_force_temporary (mode, temp,
878 gen_rtx_PLUS (mode, high, reg));
879 }
880 return plus_constant (mode, reg, offset);
881 }
882
883 void
884 aarch64_expand_mov_immediate (rtx dest, rtx imm)
885 {
886 enum machine_mode mode = GET_MODE (dest);
887 unsigned HOST_WIDE_INT mask;
888 int i;
889 bool first;
890 unsigned HOST_WIDE_INT val;
891 bool subtargets;
892 rtx subtarget;
893 int one_match, zero_match;
894
895 gcc_assert (mode == SImode || mode == DImode);
896
897 /* Check on what type of symbol it is. */
898 if (GET_CODE (imm) == SYMBOL_REF
899 || GET_CODE (imm) == LABEL_REF
900 || GET_CODE (imm) == CONST)
901 {
902 rtx mem, base, offset;
903 enum aarch64_symbol_type sty;
904
905 /* If we have (const (plus symbol offset)), separate out the offset
906 before we start classifying the symbol. */
907 split_const (imm, &base, &offset);
908
909 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
910 switch (sty)
911 {
912 case SYMBOL_FORCE_TO_MEM:
913 if (offset != const0_rtx
914 && targetm.cannot_force_const_mem (mode, imm))
915 {
916 gcc_assert(can_create_pseudo_p ());
917 base = aarch64_force_temporary (mode, dest, base);
918 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
919 aarch64_emit_move (dest, base);
920 return;
921 }
922 mem = force_const_mem (ptr_mode, imm);
923 gcc_assert (mem);
924 if (mode != ptr_mode)
925 mem = gen_rtx_ZERO_EXTEND (mode, mem);
926 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
927 return;
928
929 case SYMBOL_SMALL_TLSGD:
930 case SYMBOL_SMALL_TLSDESC:
931 case SYMBOL_SMALL_GOTTPREL:
932 case SYMBOL_SMALL_GOT:
933 case SYMBOL_TINY_GOT:
934 if (offset != const0_rtx)
935 {
936 gcc_assert(can_create_pseudo_p ());
937 base = aarch64_force_temporary (mode, dest, base);
938 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
939 aarch64_emit_move (dest, base);
940 return;
941 }
942 /* FALLTHRU */
943
944 case SYMBOL_SMALL_TPREL:
945 case SYMBOL_SMALL_ABSOLUTE:
946 case SYMBOL_TINY_ABSOLUTE:
947 aarch64_load_symref_appropriately (dest, imm, sty);
948 return;
949
950 default:
951 gcc_unreachable ();
952 }
953 }
954
955 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
956 {
957 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
958 return;
959 }
960
961 if (!CONST_INT_P (imm))
962 {
963 if (GET_CODE (imm) == HIGH)
964 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
965 else
966 {
967 rtx mem = force_const_mem (mode, imm);
968 gcc_assert (mem);
969 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
970 }
971
972 return;
973 }
974
975 if (mode == SImode)
976 {
977 /* We know we can't do this in 1 insn, and we must be able to do it
978 in two; so don't mess around looking for sequences that don't buy
979 us anything. */
980 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
981 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
982 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
983 return;
984 }
985
986 /* Remaining cases are all for DImode. */
987
988 val = INTVAL (imm);
989 subtargets = optimize && can_create_pseudo_p ();
990
991 one_match = 0;
992 zero_match = 0;
993 mask = 0xffff;
994
995 for (i = 0; i < 64; i += 16, mask <<= 16)
996 {
997 if ((val & mask) == 0)
998 zero_match++;
999 else if ((val & mask) == mask)
1000 one_match++;
1001 }
1002
1003 if (one_match == 2)
1004 {
1005 mask = 0xffff;
1006 for (i = 0; i < 64; i += 16, mask <<= 16)
1007 {
1008 if ((val & mask) != mask)
1009 {
1010 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1011 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1012 GEN_INT ((val >> i) & 0xffff)));
1013 return;
1014 }
1015 }
1016 gcc_unreachable ();
1017 }
1018
1019 if (zero_match == 2)
1020 goto simple_sequence;
1021
1022 mask = 0x0ffff0000UL;
1023 for (i = 16; i < 64; i += 16, mask <<= 16)
1024 {
1025 HOST_WIDE_INT comp = mask & ~(mask - 1);
1026
1027 if (aarch64_uimm12_shift (val - (val & mask)))
1028 {
1029 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1030
1031 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1032 emit_insn (gen_adddi3 (dest, subtarget,
1033 GEN_INT (val - (val & mask))));
1034 return;
1035 }
1036 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1037 {
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041 GEN_INT ((val + comp) & mask)));
1042 emit_insn (gen_adddi3 (dest, subtarget,
1043 GEN_INT (val - ((val + comp) & mask))));
1044 return;
1045 }
1046 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1047 {
1048 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049
1050 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051 GEN_INT ((val - comp) | ~mask)));
1052 emit_insn (gen_adddi3 (dest, subtarget,
1053 GEN_INT (val - ((val - comp) | ~mask))));
1054 return;
1055 }
1056 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1057 {
1058 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059
1060 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1061 GEN_INT (val | ~mask)));
1062 emit_insn (gen_adddi3 (dest, subtarget,
1063 GEN_INT (val - (val | ~mask))));
1064 return;
1065 }
1066 }
1067
1068 /* See if we can do it by arithmetically combining two
1069 immediates. */
1070 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1071 {
1072 int j;
1073 mask = 0xffff;
1074
1075 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1076 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1077 {
1078 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1079 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1080 GEN_INT (aarch64_bitmasks[i])));
1081 emit_insn (gen_adddi3 (dest, subtarget,
1082 GEN_INT (val - aarch64_bitmasks[i])));
1083 return;
1084 }
1085
1086 for (j = 0; j < 64; j += 16, mask <<= 16)
1087 {
1088 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1089 {
1090 emit_insn (gen_rtx_SET (VOIDmode, dest,
1091 GEN_INT (aarch64_bitmasks[i])));
1092 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1093 GEN_INT ((val >> j) & 0xffff)));
1094 return;
1095 }
1096 }
1097 }
1098
1099 /* See if we can do it by logically combining two immediates. */
1100 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1101 {
1102 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1103 {
1104 int j;
1105
1106 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1107 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1108 {
1109 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1110 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1111 GEN_INT (aarch64_bitmasks[i])));
1112 emit_insn (gen_iordi3 (dest, subtarget,
1113 GEN_INT (aarch64_bitmasks[j])));
1114 return;
1115 }
1116 }
1117 else if ((val & aarch64_bitmasks[i]) == val)
1118 {
1119 int j;
1120
1121 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1122 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1123 {
1124
1125 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1126 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1127 GEN_INT (aarch64_bitmasks[j])));
1128 emit_insn (gen_anddi3 (dest, subtarget,
1129 GEN_INT (aarch64_bitmasks[i])));
1130 return;
1131 }
1132 }
1133 }
1134
1135 simple_sequence:
1136 first = true;
1137 mask = 0xffff;
1138 for (i = 0; i < 64; i += 16, mask <<= 16)
1139 {
1140 if ((val & mask) != 0)
1141 {
1142 if (first)
1143 {
1144 emit_insn (gen_rtx_SET (VOIDmode, dest,
1145 GEN_INT (val & mask)));
1146 first = false;
1147 }
1148 else
1149 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1150 GEN_INT ((val >> i) & 0xffff)));
1151 }
1152 }
1153 }
1154
1155 static bool
1156 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1157 {
1158 /* Indirect calls are not currently supported. */
1159 if (decl == NULL)
1160 return false;
1161
1162 /* Cannot tail-call to long-calls, since these are outside of the
1163 range of a branch instruction (we could handle this if we added
1164 support for indirect tail-calls. */
1165 if (aarch64_decl_is_long_call_p (decl))
1166 return false;
1167
1168 return true;
1169 }
1170
1171 /* Implement TARGET_PASS_BY_REFERENCE. */
1172
1173 static bool
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1175 enum machine_mode mode,
1176 const_tree type,
1177 bool named ATTRIBUTE_UNUSED)
1178 {
1179 HOST_WIDE_INT size;
1180 enum machine_mode dummymode;
1181 int nregs;
1182
1183 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1184 size = (mode == BLKmode && type)
1185 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1186
1187 if (type)
1188 {
1189 /* Arrays always passed by reference. */
1190 if (TREE_CODE (type) == ARRAY_TYPE)
1191 return true;
1192 /* Other aggregates based on their size. */
1193 if (AGGREGATE_TYPE_P (type))
1194 size = int_size_in_bytes (type);
1195 }
1196
1197 /* Variable sized arguments are always returned by reference. */
1198 if (size < 0)
1199 return true;
1200
1201 /* Can this be a candidate to be passed in fp/simd register(s)? */
1202 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1203 &dummymode, &nregs,
1204 NULL))
1205 return false;
1206
1207 /* Arguments which are variable sized or larger than 2 registers are
1208 passed by reference unless they are a homogenous floating point
1209 aggregate. */
1210 return size > 2 * UNITS_PER_WORD;
1211 }
1212
1213 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1214 static bool
1215 aarch64_return_in_msb (const_tree valtype)
1216 {
1217 enum machine_mode dummy_mode;
1218 int dummy_int;
1219
1220 /* Never happens in little-endian mode. */
1221 if (!BYTES_BIG_ENDIAN)
1222 return false;
1223
1224 /* Only composite types smaller than or equal to 16 bytes can
1225 be potentially returned in registers. */
1226 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1227 || int_size_in_bytes (valtype) <= 0
1228 || int_size_in_bytes (valtype) > 16)
1229 return false;
1230
1231 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233 is always passed/returned in the least significant bits of fp/simd
1234 register(s). */
1235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1236 &dummy_mode, &dummy_int, NULL))
1237 return false;
1238
1239 return true;
1240 }
1241
1242 /* Implement TARGET_FUNCTION_VALUE.
1243 Define how to find the value returned by a function. */
1244
1245 static rtx
1246 aarch64_function_value (const_tree type, const_tree func,
1247 bool outgoing ATTRIBUTE_UNUSED)
1248 {
1249 enum machine_mode mode;
1250 int unsignedp;
1251 int count;
1252 enum machine_mode ag_mode;
1253
1254 mode = TYPE_MODE (type);
1255 if (INTEGRAL_TYPE_P (type))
1256 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1257
1258 if (aarch64_return_in_msb (type))
1259 {
1260 HOST_WIDE_INT size = int_size_in_bytes (type);
1261
1262 if (size % UNITS_PER_WORD != 0)
1263 {
1264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1266 }
1267 }
1268
1269 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1270 &ag_mode, &count, NULL))
1271 {
1272 if (!aarch64_composite_type_p (type, mode))
1273 {
1274 gcc_assert (count == 1 && mode == ag_mode);
1275 return gen_rtx_REG (mode, V0_REGNUM);
1276 }
1277 else
1278 {
1279 int i;
1280 rtx par;
1281
1282 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1283 for (i = 0; i < count; i++)
1284 {
1285 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1286 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1287 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1288 XVECEXP (par, 0, i) = tmp;
1289 }
1290 return par;
1291 }
1292 }
1293 else
1294 return gen_rtx_REG (mode, R0_REGNUM);
1295 }
1296
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298 Return true if REGNO is the number of a hard register in which the values
1299 of called function may come back. */
1300
1301 static bool
1302 aarch64_function_value_regno_p (const unsigned int regno)
1303 {
1304 /* Maximum of 16 bytes can be returned in the general registers. Examples
1305 of 16-byte return values are: 128-bit integers and 16-byte small
1306 structures (excluding homogeneous floating-point aggregates). */
1307 if (regno == R0_REGNUM || regno == R1_REGNUM)
1308 return true;
1309
1310 /* Up to four fp/simd registers can return a function value, e.g. a
1311 homogeneous floating-point aggregate having four members. */
1312 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1313 return !TARGET_GENERAL_REGS_ONLY;
1314
1315 return false;
1316 }
1317
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1319
1320 If the type T of the result of a function is such that
1321 void func (T arg)
1322 would require that arg be passed as a value in a register (or set of
1323 registers) according to the parameter passing rules, then the result
1324 is returned in the same registers as would be used for such an
1325 argument. */
1326
1327 static bool
1328 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1329 {
1330 HOST_WIDE_INT size;
1331 enum machine_mode ag_mode;
1332 int count;
1333
1334 if (!AGGREGATE_TYPE_P (type)
1335 && TREE_CODE (type) != COMPLEX_TYPE
1336 && TREE_CODE (type) != VECTOR_TYPE)
1337 /* Simple scalar types always returned in registers. */
1338 return false;
1339
1340 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1341 type,
1342 &ag_mode,
1343 &count,
1344 NULL))
1345 return false;
1346
1347 /* Types larger than 2 registers returned in memory. */
1348 size = int_size_in_bytes (type);
1349 return (size < 0 || size > 2 * UNITS_PER_WORD);
1350 }
1351
1352 static bool
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1354 const_tree type, int *nregs)
1355 {
1356 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1357 return aarch64_vfp_is_call_or_return_candidate (mode,
1358 type,
1359 &pcum->aapcs_vfp_rmode,
1360 nregs,
1361 NULL);
1362 }
1363
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365 bits. The idea is to suppress any stronger alignment requested by
1366 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367 This is a helper function for local use only. */
1368
1369 static unsigned int
1370 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1371 {
1372 unsigned int alignment;
1373
1374 if (type)
1375 {
1376 if (!integer_zerop (TYPE_SIZE (type)))
1377 {
1378 if (TYPE_MODE (type) == mode)
1379 alignment = TYPE_ALIGN (type);
1380 else
1381 alignment = GET_MODE_ALIGNMENT (mode);
1382 }
1383 else
1384 alignment = 0;
1385 }
1386 else
1387 alignment = GET_MODE_ALIGNMENT (mode);
1388
1389 return alignment;
1390 }
1391
1392 /* Layout a function argument according to the AAPCS64 rules. The rule
1393 numbers refer to the rule numbers in the AAPCS64. */
1394
1395 static void
1396 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1397 const_tree type,
1398 bool named ATTRIBUTE_UNUSED)
1399 {
1400 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1401 int ncrn, nvrn, nregs;
1402 bool allocate_ncrn, allocate_nvrn;
1403
1404 /* We need to do this once per argument. */
1405 if (pcum->aapcs_arg_processed)
1406 return;
1407
1408 pcum->aapcs_arg_processed = true;
1409
1410 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1411 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1412 mode,
1413 type,
1414 &nregs);
1415
1416 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417 The following code thus handles passing by SIMD/FP registers first. */
1418
1419 nvrn = pcum->aapcs_nvrn;
1420
1421 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422 and homogenous short-vector aggregates (HVA). */
1423 if (allocate_nvrn)
1424 {
1425 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1426 {
1427 pcum->aapcs_nextnvrn = nvrn + nregs;
1428 if (!aarch64_composite_type_p (type, mode))
1429 {
1430 gcc_assert (nregs == 1);
1431 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1432 }
1433 else
1434 {
1435 rtx par;
1436 int i;
1437 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1438 for (i = 0; i < nregs; i++)
1439 {
1440 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1441 V0_REGNUM + nvrn + i);
1442 tmp = gen_rtx_EXPR_LIST
1443 (VOIDmode, tmp,
1444 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1445 XVECEXP (par, 0, i) = tmp;
1446 }
1447 pcum->aapcs_reg = par;
1448 }
1449 return;
1450 }
1451 else
1452 {
1453 /* C.3 NSRN is set to 8. */
1454 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1455 goto on_stack;
1456 }
1457 }
1458
1459 ncrn = pcum->aapcs_ncrn;
1460 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1461 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1462
1463
1464 /* C6 - C9. though the sign and zero extension semantics are
1465 handled elsewhere. This is the case where the argument fits
1466 entirely general registers. */
1467 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1468 {
1469 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1470
1471 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1472
1473 /* C.8 if the argument has an alignment of 16 then the NGRN is
1474 rounded up to the next even number. */
1475 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1476 {
1477 ++ncrn;
1478 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1479 }
1480 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481 A reg is still generated for it, but the caller should be smart
1482 enough not to use it. */
1483 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1484 {
1485 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1486 }
1487 else
1488 {
1489 rtx par;
1490 int i;
1491
1492 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1493 for (i = 0; i < nregs; i++)
1494 {
1495 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1496 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1497 GEN_INT (i * UNITS_PER_WORD));
1498 XVECEXP (par, 0, i) = tmp;
1499 }
1500 pcum->aapcs_reg = par;
1501 }
1502
1503 pcum->aapcs_nextncrn = ncrn + nregs;
1504 return;
1505 }
1506
1507 /* C.11 */
1508 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1509
1510 /* The argument is passed on stack; record the needed number of words for
1511 this argument (we can re-use NREGS) and align the total size if
1512 necessary. */
1513 on_stack:
1514 pcum->aapcs_stack_words = nregs;
1515 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1516 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1517 16 / UNITS_PER_WORD) + 1;
1518 return;
1519 }
1520
1521 /* Implement TARGET_FUNCTION_ARG. */
1522
1523 static rtx
1524 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1525 const_tree type, bool named)
1526 {
1527 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1529
1530 if (mode == VOIDmode)
1531 return NULL_RTX;
1532
1533 aarch64_layout_arg (pcum_v, mode, type, named);
1534 return pcum->aapcs_reg;
1535 }
1536
1537 void
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1539 const_tree fntype ATTRIBUTE_UNUSED,
1540 rtx libname ATTRIBUTE_UNUSED,
1541 const_tree fndecl ATTRIBUTE_UNUSED,
1542 unsigned n_named ATTRIBUTE_UNUSED)
1543 {
1544 pcum->aapcs_ncrn = 0;
1545 pcum->aapcs_nvrn = 0;
1546 pcum->aapcs_nextncrn = 0;
1547 pcum->aapcs_nextnvrn = 0;
1548 pcum->pcs_variant = ARM_PCS_AAPCS64;
1549 pcum->aapcs_reg = NULL_RTX;
1550 pcum->aapcs_arg_processed = false;
1551 pcum->aapcs_stack_words = 0;
1552 pcum->aapcs_stack_size = 0;
1553
1554 return;
1555 }
1556
1557 static void
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1559 enum machine_mode mode,
1560 const_tree type,
1561 bool named)
1562 {
1563 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1564 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1565 {
1566 aarch64_layout_arg (pcum_v, mode, type, named);
1567 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1568 != (pcum->aapcs_stack_words != 0));
1569 pcum->aapcs_arg_processed = false;
1570 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1571 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1572 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1573 pcum->aapcs_stack_words = 0;
1574 pcum->aapcs_reg = NULL_RTX;
1575 }
1576 }
1577
1578 bool
1579 aarch64_function_arg_regno_p (unsigned regno)
1580 {
1581 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1582 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1583 }
1584
1585 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1586 PARM_BOUNDARY bits of alignment, but will be given anything up
1587 to STACK_BOUNDARY bits if the type requires it. This makes sure
1588 that both before and after the layout of each argument, the Next
1589 Stacked Argument Address (NSAA) will have a minimum alignment of
1590 8 bytes. */
1591
1592 static unsigned int
1593 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1594 {
1595 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1596
1597 if (alignment < PARM_BOUNDARY)
1598 alignment = PARM_BOUNDARY;
1599 if (alignment > STACK_BOUNDARY)
1600 alignment = STACK_BOUNDARY;
1601 return alignment;
1602 }
1603
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1605
1606 Return true if an argument passed on the stack should be padded upwards,
1607 i.e. if the least-significant byte of the stack slot has useful data.
1608
1609 Small aggregate types are placed in the lowest memory address.
1610
1611 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1612
1613 bool
1614 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1615 {
1616 /* On little-endian targets, the least significant byte of every stack
1617 argument is passed at the lowest byte address of the stack slot. */
1618 if (!BYTES_BIG_ENDIAN)
1619 return true;
1620
1621 /* Otherwise, integral, floating-point and pointer types are padded downward:
1622 the least significant byte of a stack argument is passed at the highest
1623 byte address of the stack slot. */
1624 if (type
1625 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1626 || POINTER_TYPE_P (type))
1627 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1628 return false;
1629
1630 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1631 return true;
1632 }
1633
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1635
1636 It specifies padding for the last (may also be the only)
1637 element of a block move between registers and memory. If
1638 assuming the block is in the memory, padding upward means that
1639 the last element is padded after its highest significant byte,
1640 while in downward padding, the last element is padded at the
1641 its least significant byte side.
1642
1643 Small aggregates and small complex types are always padded
1644 upwards.
1645
1646 We don't need to worry about homogeneous floating-point or
1647 short-vector aggregates; their move is not affected by the
1648 padding direction determined here. Regardless of endianness,
1649 each element of such an aggregate is put in the least
1650 significant bits of a fp/simd register.
1651
1652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653 register has useful data, and return the opposite if the most
1654 significant byte does. */
1655
1656 bool
1657 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1658 bool first ATTRIBUTE_UNUSED)
1659 {
1660
1661 /* Small composite types are always padded upward. */
1662 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1663 {
1664 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1665 : GET_MODE_SIZE (mode));
1666 if (size < 2 * UNITS_PER_WORD)
1667 return true;
1668 }
1669
1670 /* Otherwise, use the default padding. */
1671 return !BYTES_BIG_ENDIAN;
1672 }
1673
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1676 {
1677 return SImode;
1678 }
1679
1680 static bool
1681 aarch64_frame_pointer_required (void)
1682 {
1683 /* If the function contains dynamic stack allocations, we need to
1684 use the frame pointer to access the static parts of the frame. */
1685 if (cfun->calls_alloca)
1686 return true;
1687
1688 /* We may have turned flag_omit_frame_pointer on in order to have this
1689 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690 and we'll check it here.
1691 If we really did set flag_omit_frame_pointer normally, then we return false
1692 (no frame pointer required) in all cases. */
1693
1694 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1695 return false;
1696 else if (flag_omit_leaf_frame_pointer)
1697 return !crtl->is_leaf;
1698 return true;
1699 }
1700
1701 /* Mark the registers that need to be saved by the callee and calculate
1702 the size of the callee-saved registers area and frame record (both FP
1703 and LR may be omitted). */
1704 static void
1705 aarch64_layout_frame (void)
1706 {
1707 HOST_WIDE_INT offset = 0;
1708 int regno;
1709
1710 if (reload_completed && cfun->machine->frame.laid_out)
1711 return;
1712
1713 cfun->machine->frame.fp_lr_offset = 0;
1714
1715 /* First mark all the registers that really need to be saved... */
1716 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1717 cfun->machine->frame.reg_offset[regno] = -1;
1718
1719 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1720 cfun->machine->frame.reg_offset[regno] = -1;
1721
1722 /* ... that includes the eh data registers (if needed)... */
1723 if (crtl->calls_eh_return)
1724 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1725 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1726
1727 /* ... and any callee saved register that dataflow says is live. */
1728 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1729 if (df_regs_ever_live_p (regno)
1730 && !call_used_regs[regno])
1731 cfun->machine->frame.reg_offset[regno] = 0;
1732
1733 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1734 if (df_regs_ever_live_p (regno)
1735 && !call_used_regs[regno])
1736 cfun->machine->frame.reg_offset[regno] = 0;
1737
1738 if (frame_pointer_needed)
1739 {
1740 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1741 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1742 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1743 }
1744
1745 /* Now assign stack slots for them. */
1746 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1747 if (cfun->machine->frame.reg_offset[regno] != -1)
1748 {
1749 cfun->machine->frame.reg_offset[regno] = offset;
1750 offset += UNITS_PER_WORD;
1751 }
1752
1753 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1754 if (cfun->machine->frame.reg_offset[regno] != -1)
1755 {
1756 cfun->machine->frame.reg_offset[regno] = offset;
1757 offset += UNITS_PER_WORD;
1758 }
1759
1760 if (frame_pointer_needed)
1761 {
1762 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1763 offset += UNITS_PER_WORD;
1764 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1765 }
1766
1767 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1768 {
1769 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1770 offset += UNITS_PER_WORD;
1771 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1772 }
1773
1774 cfun->machine->frame.padding0 =
1775 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1776 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1777
1778 cfun->machine->frame.saved_regs_size = offset;
1779 cfun->machine->frame.laid_out = true;
1780 }
1781
1782 /* Make the last instruction frame-related and note that it performs
1783 the operation described by FRAME_PATTERN. */
1784
1785 static void
1786 aarch64_set_frame_expr (rtx frame_pattern)
1787 {
1788 rtx insn;
1789
1790 insn = get_last_insn ();
1791 RTX_FRAME_RELATED_P (insn) = 1;
1792 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1793 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1794 frame_pattern,
1795 REG_NOTES (insn));
1796 }
1797
1798 static bool
1799 aarch64_register_saved_on_entry (int regno)
1800 {
1801 return cfun->machine->frame.reg_offset[regno] != -1;
1802 }
1803
1804
1805 static void
1806 aarch64_save_or_restore_fprs (int start_offset, int increment,
1807 bool restore, rtx base_rtx)
1808
1809 {
1810 unsigned regno;
1811 unsigned regno2;
1812 rtx insn;
1813 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1814 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1815
1816
1817 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1818 {
1819 if (aarch64_register_saved_on_entry (regno))
1820 {
1821 rtx mem;
1822 mem = gen_mem_ref (DFmode,
1823 plus_constant (Pmode,
1824 base_rtx,
1825 start_offset));
1826
1827 for (regno2 = regno + 1;
1828 regno2 <= V31_REGNUM
1829 && !aarch64_register_saved_on_entry (regno2);
1830 regno2++)
1831 {
1832 /* Empty loop. */
1833 }
1834 if (regno2 <= V31_REGNUM &&
1835 aarch64_register_saved_on_entry (regno2))
1836 {
1837 rtx mem2;
1838 /* Next highest register to be saved. */
1839 mem2 = gen_mem_ref (DFmode,
1840 plus_constant
1841 (Pmode,
1842 base_rtx,
1843 start_offset + increment));
1844 if (restore == false)
1845 {
1846 insn = emit_insn
1847 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1848 mem2, gen_rtx_REG (DFmode, regno2)));
1849
1850 }
1851 else
1852 {
1853 insn = emit_insn
1854 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1855 gen_rtx_REG (DFmode, regno2), mem2));
1856
1857 add_reg_note (insn, REG_CFA_RESTORE,
1858 gen_rtx_REG (DFmode, regno));
1859 add_reg_note (insn, REG_CFA_RESTORE,
1860 gen_rtx_REG (DFmode, regno2));
1861 }
1862
1863 /* The first part of a frame-related parallel insn
1864 is always assumed to be relevant to the frame
1865 calculations; subsequent parts, are only
1866 frame-related if explicitly marked. */
1867 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1868 regno = regno2;
1869 start_offset += increment * 2;
1870 }
1871 else
1872 {
1873 if (restore == false)
1874 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1875 else
1876 {
1877 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1878 add_reg_note (insn, REG_CFA_RESTORE,
1879 gen_rtx_REG (DImode, regno));
1880 }
1881 start_offset += increment;
1882 }
1883 RTX_FRAME_RELATED_P (insn) = 1;
1884 }
1885 }
1886
1887 }
1888
1889
1890 /* offset from the stack pointer of where the saves and
1891 restore's have to happen. */
1892 static void
1893 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1894 bool restore)
1895 {
1896 rtx insn;
1897 rtx base_rtx = stack_pointer_rtx;
1898 HOST_WIDE_INT start_offset = offset;
1899 HOST_WIDE_INT increment = UNITS_PER_WORD;
1900 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1901 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1902 unsigned regno;
1903 unsigned regno2;
1904
1905 for (regno = R0_REGNUM; regno <= limit; regno++)
1906 {
1907 if (aarch64_register_saved_on_entry (regno))
1908 {
1909 rtx mem;
1910 mem = gen_mem_ref (Pmode,
1911 plus_constant (Pmode,
1912 base_rtx,
1913 start_offset));
1914
1915 for (regno2 = regno + 1;
1916 regno2 <= limit
1917 && !aarch64_register_saved_on_entry (regno2);
1918 regno2++)
1919 {
1920 /* Empty loop. */
1921 }
1922 if (regno2 <= limit &&
1923 aarch64_register_saved_on_entry (regno2))
1924 {
1925 rtx mem2;
1926 /* Next highest register to be saved. */
1927 mem2 = gen_mem_ref (Pmode,
1928 plus_constant
1929 (Pmode,
1930 base_rtx,
1931 start_offset + increment));
1932 if (restore == false)
1933 {
1934 insn = emit_insn
1935 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1936 mem2, gen_rtx_REG (DImode, regno2)));
1937
1938 }
1939 else
1940 {
1941 insn = emit_insn
1942 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1943 gen_rtx_REG (DImode, regno2), mem2));
1944
1945 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1946 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1947 }
1948
1949 /* The first part of a frame-related parallel insn
1950 is always assumed to be relevant to the frame
1951 calculations; subsequent parts, are only
1952 frame-related if explicitly marked. */
1953 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1954 1)) = 1;
1955 regno = regno2;
1956 start_offset += increment * 2;
1957 }
1958 else
1959 {
1960 if (restore == false)
1961 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1962 else
1963 {
1964 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1965 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1966 }
1967 start_offset += increment;
1968 }
1969 RTX_FRAME_RELATED_P (insn) = 1;
1970 }
1971 }
1972
1973 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1974
1975 }
1976
1977 /* AArch64 stack frames generated by this compiler look like:
1978
1979 +-------------------------------+
1980 | |
1981 | incoming stack arguments |
1982 | |
1983 +-------------------------------+ <-- arg_pointer_rtx
1984 | |
1985 | callee-allocated save area |
1986 | for register varargs |
1987 | |
1988 +-------------------------------+
1989 | |
1990 | local variables |
1991 | |
1992 +-------------------------------+ <-- frame_pointer_rtx
1993 | |
1994 | callee-saved registers |
1995 | |
1996 +-------------------------------+
1997 | LR' |
1998 +-------------------------------+
1999 | FP' |
2000 P +-------------------------------+ <-- hard_frame_pointer_rtx
2001 | dynamic allocation |
2002 +-------------------------------+
2003 | |
2004 | outgoing stack arguments |
2005 | |
2006 +-------------------------------+ <-- stack_pointer_rtx
2007
2008 Dynamic stack allocations such as alloca insert data at point P.
2009 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2010 hard_frame_pointer_rtx unchanged. */
2011
2012 /* Generate the prologue instructions for entry into a function.
2013 Establish the stack frame by decreasing the stack pointer with a
2014 properly calculated size and, if necessary, create a frame record
2015 filled with the values of LR and previous frame pointer. The
2016 current FP is also set up if it is in use. */
2017
2018 void
2019 aarch64_expand_prologue (void)
2020 {
2021 /* sub sp, sp, #<frame_size>
2022 stp {fp, lr}, [sp, #<frame_size> - 16]
2023 add fp, sp, #<frame_size> - hardfp_offset
2024 stp {cs_reg}, [fp, #-16] etc.
2025
2026 sub sp, sp, <final_adjustment_if_any>
2027 */
2028 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2029 HOST_WIDE_INT frame_size, offset;
2030 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2031 rtx insn;
2032
2033 aarch64_layout_frame ();
2034 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2035 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2036 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2037 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2038 + crtl->outgoing_args_size);
2039 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2040 STACK_BOUNDARY / BITS_PER_UNIT);
2041
2042 if (flag_stack_usage_info)
2043 current_function_static_stack_size = frame_size;
2044
2045 fp_offset = (offset
2046 - original_frame_size
2047 - cfun->machine->frame.saved_regs_size);
2048
2049 /* Store pairs and load pairs have a range only -512 to 504. */
2050 if (offset >= 512)
2051 {
2052 /* When the frame has a large size, an initial decrease is done on
2053 the stack pointer to jump over the callee-allocated save area for
2054 register varargs, the local variable area and/or the callee-saved
2055 register area. This will allow the pre-index write-back
2056 store pair instructions to be used for setting up the stack frame
2057 efficiently. */
2058 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2059 if (offset >= 512)
2060 offset = cfun->machine->frame.saved_regs_size;
2061
2062 frame_size -= (offset + crtl->outgoing_args_size);
2063 fp_offset = 0;
2064
2065 if (frame_size >= 0x1000000)
2066 {
2067 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2068 emit_move_insn (op0, GEN_INT (-frame_size));
2069 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2070 aarch64_set_frame_expr (gen_rtx_SET
2071 (Pmode, stack_pointer_rtx,
2072 plus_constant (Pmode,
2073 stack_pointer_rtx,
2074 -frame_size)));
2075 }
2076 else if (frame_size > 0)
2077 {
2078 if ((frame_size & 0xfff) != frame_size)
2079 {
2080 insn = emit_insn (gen_add2_insn
2081 (stack_pointer_rtx,
2082 GEN_INT (-(frame_size
2083 & ~(HOST_WIDE_INT)0xfff))));
2084 RTX_FRAME_RELATED_P (insn) = 1;
2085 }
2086 if ((frame_size & 0xfff) != 0)
2087 {
2088 insn = emit_insn (gen_add2_insn
2089 (stack_pointer_rtx,
2090 GEN_INT (-(frame_size
2091 & (HOST_WIDE_INT)0xfff))));
2092 RTX_FRAME_RELATED_P (insn) = 1;
2093 }
2094 }
2095 }
2096 else
2097 frame_size = -1;
2098
2099 if (offset > 0)
2100 {
2101 /* Save the frame pointer and lr if the frame pointer is needed
2102 first. Make the frame pointer point to the location of the
2103 old frame pointer on the stack. */
2104 if (frame_pointer_needed)
2105 {
2106 rtx mem_fp, mem_lr;
2107
2108 if (fp_offset)
2109 {
2110 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2111 GEN_INT (-offset)));
2112 RTX_FRAME_RELATED_P (insn) = 1;
2113 aarch64_set_frame_expr (gen_rtx_SET
2114 (Pmode, stack_pointer_rtx,
2115 gen_rtx_MINUS (Pmode,
2116 stack_pointer_rtx,
2117 GEN_INT (offset))));
2118 mem_fp = gen_frame_mem (DImode,
2119 plus_constant (Pmode,
2120 stack_pointer_rtx,
2121 fp_offset));
2122 mem_lr = gen_frame_mem (DImode,
2123 plus_constant (Pmode,
2124 stack_pointer_rtx,
2125 fp_offset
2126 + UNITS_PER_WORD));
2127 insn = emit_insn (gen_store_pairdi (mem_fp,
2128 hard_frame_pointer_rtx,
2129 mem_lr,
2130 gen_rtx_REG (DImode,
2131 LR_REGNUM)));
2132 }
2133 else
2134 {
2135 insn = emit_insn (gen_storewb_pairdi_di
2136 (stack_pointer_rtx, stack_pointer_rtx,
2137 hard_frame_pointer_rtx,
2138 gen_rtx_REG (DImode, LR_REGNUM),
2139 GEN_INT (-offset),
2140 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2141 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2142 }
2143
2144 /* The first part of a frame-related parallel insn is always
2145 assumed to be relevant to the frame calculations;
2146 subsequent parts, are only frame-related if explicitly
2147 marked. */
2148 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2149 RTX_FRAME_RELATED_P (insn) = 1;
2150
2151 /* Set up frame pointer to point to the location of the
2152 previous frame pointer on the stack. */
2153 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2154 stack_pointer_rtx,
2155 GEN_INT (fp_offset)));
2156 aarch64_set_frame_expr (gen_rtx_SET
2157 (Pmode, hard_frame_pointer_rtx,
2158 plus_constant (Pmode,
2159 stack_pointer_rtx,
2160 fp_offset)));
2161 RTX_FRAME_RELATED_P (insn) = 1;
2162 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2163 hard_frame_pointer_rtx));
2164 }
2165 else
2166 {
2167 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2168 GEN_INT (-offset)));
2169 RTX_FRAME_RELATED_P (insn) = 1;
2170 }
2171
2172 aarch64_save_or_restore_callee_save_registers
2173 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2174 }
2175
2176 /* when offset >= 512,
2177 sub sp, sp, #<outgoing_args_size> */
2178 if (frame_size > -1)
2179 {
2180 if (crtl->outgoing_args_size > 0)
2181 {
2182 insn = emit_insn (gen_add2_insn
2183 (stack_pointer_rtx,
2184 GEN_INT (- crtl->outgoing_args_size)));
2185 RTX_FRAME_RELATED_P (insn) = 1;
2186 }
2187 }
2188 }
2189
2190 /* Generate the epilogue instructions for returning from a function. */
2191 void
2192 aarch64_expand_epilogue (bool for_sibcall)
2193 {
2194 HOST_WIDE_INT original_frame_size, frame_size, offset;
2195 HOST_WIDE_INT fp_offset;
2196 rtx insn;
2197 rtx cfa_reg;
2198
2199 aarch64_layout_frame ();
2200 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2201 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2202 + crtl->outgoing_args_size);
2203 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2204 STACK_BOUNDARY / BITS_PER_UNIT);
2205
2206 fp_offset = (offset
2207 - original_frame_size
2208 - cfun->machine->frame.saved_regs_size);
2209
2210 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2211
2212 /* Store pairs and load pairs have a range only -512 to 504. */
2213 if (offset >= 512)
2214 {
2215 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2216 if (offset >= 512)
2217 offset = cfun->machine->frame.saved_regs_size;
2218
2219 frame_size -= (offset + crtl->outgoing_args_size);
2220 fp_offset = 0;
2221 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2222 {
2223 insn = emit_insn (gen_add2_insn
2224 (stack_pointer_rtx,
2225 GEN_INT (crtl->outgoing_args_size)));
2226 RTX_FRAME_RELATED_P (insn) = 1;
2227 }
2228 }
2229 else
2230 frame_size = -1;
2231
2232 /* If there were outgoing arguments or we've done dynamic stack
2233 allocation, then restore the stack pointer from the frame
2234 pointer. This is at most one insn and more efficient than using
2235 GCC's internal mechanism. */
2236 if (frame_pointer_needed
2237 && (crtl->outgoing_args_size || cfun->calls_alloca))
2238 {
2239 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2240 hard_frame_pointer_rtx,
2241 GEN_INT (- fp_offset)));
2242 RTX_FRAME_RELATED_P (insn) = 1;
2243 /* As SP is set to (FP - fp_offset), according to the rules in
2244 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2245 from the value of SP from now on. */
2246 cfa_reg = stack_pointer_rtx;
2247 }
2248
2249 aarch64_save_or_restore_callee_save_registers
2250 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2251
2252 /* Restore the frame pointer and lr if the frame pointer is needed. */
2253 if (offset > 0)
2254 {
2255 if (frame_pointer_needed)
2256 {
2257 rtx mem_fp, mem_lr;
2258
2259 if (fp_offset)
2260 {
2261 mem_fp = gen_frame_mem (DImode,
2262 plus_constant (Pmode,
2263 stack_pointer_rtx,
2264 fp_offset));
2265 mem_lr = gen_frame_mem (DImode,
2266 plus_constant (Pmode,
2267 stack_pointer_rtx,
2268 fp_offset
2269 + UNITS_PER_WORD));
2270 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2271 mem_fp,
2272 gen_rtx_REG (DImode,
2273 LR_REGNUM),
2274 mem_lr));
2275 }
2276 else
2277 {
2278 insn = emit_insn (gen_loadwb_pairdi_di
2279 (stack_pointer_rtx,
2280 stack_pointer_rtx,
2281 hard_frame_pointer_rtx,
2282 gen_rtx_REG (DImode, LR_REGNUM),
2283 GEN_INT (offset),
2284 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2285 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2286 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2287 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2288 plus_constant (Pmode, cfa_reg,
2289 offset))));
2290 }
2291
2292 /* The first part of a frame-related parallel insn
2293 is always assumed to be relevant to the frame
2294 calculations; subsequent parts, are only
2295 frame-related if explicitly marked. */
2296 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2297 RTX_FRAME_RELATED_P (insn) = 1;
2298 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2299 add_reg_note (insn, REG_CFA_RESTORE,
2300 gen_rtx_REG (DImode, LR_REGNUM));
2301
2302 if (fp_offset)
2303 {
2304 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2305 GEN_INT (offset)));
2306 RTX_FRAME_RELATED_P (insn) = 1;
2307 }
2308 }
2309 else
2310 {
2311 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2312 GEN_INT (offset)));
2313 RTX_FRAME_RELATED_P (insn) = 1;
2314 }
2315 }
2316
2317 /* Stack adjustment for exception handler. */
2318 if (crtl->calls_eh_return)
2319 {
2320 /* We need to unwind the stack by the offset computed by
2321 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2322 based on SP. Ideally we would update the SP and define the
2323 CFA along the lines of:
2324
2325 SP = SP + EH_RETURN_STACKADJ_RTX
2326 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2327
2328 However the dwarf emitter only understands a constant
2329 register offset.
2330
2331 The solution chosen here is to use the otherwise unused IP0
2332 as a temporary register to hold the current SP value. The
2333 CFA is described using IP0 then SP is modified. */
2334
2335 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2336
2337 insn = emit_move_insn (ip0, stack_pointer_rtx);
2338 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2339 RTX_FRAME_RELATED_P (insn) = 1;
2340
2341 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2342
2343 /* Ensure the assignment to IP0 does not get optimized away. */
2344 emit_use (ip0);
2345 }
2346
2347 if (frame_size > -1)
2348 {
2349 if (frame_size >= 0x1000000)
2350 {
2351 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2352 emit_move_insn (op0, GEN_INT (frame_size));
2353 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2354 aarch64_set_frame_expr (gen_rtx_SET
2355 (Pmode, stack_pointer_rtx,
2356 plus_constant (Pmode,
2357 stack_pointer_rtx,
2358 frame_size)));
2359 }
2360 else if (frame_size > 0)
2361 {
2362 if ((frame_size & 0xfff) != 0)
2363 {
2364 insn = emit_insn (gen_add2_insn
2365 (stack_pointer_rtx,
2366 GEN_INT ((frame_size
2367 & (HOST_WIDE_INT) 0xfff))));
2368 RTX_FRAME_RELATED_P (insn) = 1;
2369 }
2370 if ((frame_size & 0xfff) != frame_size)
2371 {
2372 insn = emit_insn (gen_add2_insn
2373 (stack_pointer_rtx,
2374 GEN_INT ((frame_size
2375 & ~ (HOST_WIDE_INT) 0xfff))));
2376 RTX_FRAME_RELATED_P (insn) = 1;
2377 }
2378 }
2379
2380 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2381 plus_constant (Pmode,
2382 stack_pointer_rtx,
2383 offset)));
2384 }
2385
2386 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2387 if (!for_sibcall)
2388 emit_jump_insn (ret_rtx);
2389 }
2390
2391 /* Return the place to copy the exception unwinding return address to.
2392 This will probably be a stack slot, but could (in theory be the
2393 return register). */
2394 rtx
2395 aarch64_final_eh_return_addr (void)
2396 {
2397 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2398 aarch64_layout_frame ();
2399 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2400 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2401 + crtl->outgoing_args_size);
2402 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2403 STACK_BOUNDARY / BITS_PER_UNIT);
2404 fp_offset = offset
2405 - original_frame_size
2406 - cfun->machine->frame.saved_regs_size;
2407
2408 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2409 return gen_rtx_REG (DImode, LR_REGNUM);
2410
2411 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2412 result in a store to save LR introduced by builtin_eh_return () being
2413 incorrectly deleted because the alias is not detected.
2414 So in the calculation of the address to copy the exception unwinding
2415 return address to, we note 2 cases.
2416 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2417 we return a SP-relative location since all the addresses are SP-relative
2418 in this case. This prevents the store from being optimized away.
2419 If the fp_offset is not 0, then the addresses will be FP-relative and
2420 therefore we return a FP-relative location. */
2421
2422 if (frame_pointer_needed)
2423 {
2424 if (fp_offset)
2425 return gen_frame_mem (DImode,
2426 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2427 else
2428 return gen_frame_mem (DImode,
2429 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2430 }
2431
2432 /* If FP is not needed, we calculate the location of LR, which would be
2433 at the top of the saved registers block. */
2434
2435 return gen_frame_mem (DImode,
2436 plus_constant (Pmode,
2437 stack_pointer_rtx,
2438 fp_offset
2439 + cfun->machine->frame.saved_regs_size
2440 - 2 * UNITS_PER_WORD));
2441 }
2442
2443 /* Output code to build up a constant in a register. */
2444 static void
2445 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2446 {
2447 if (aarch64_bitmask_imm (val, DImode))
2448 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2449 else
2450 {
2451 int i;
2452 int ncount = 0;
2453 int zcount = 0;
2454 HOST_WIDE_INT valp = val >> 16;
2455 HOST_WIDE_INT valm;
2456 HOST_WIDE_INT tval;
2457
2458 for (i = 16; i < 64; i += 16)
2459 {
2460 valm = (valp & 0xffff);
2461
2462 if (valm != 0)
2463 ++ zcount;
2464
2465 if (valm != 0xffff)
2466 ++ ncount;
2467
2468 valp >>= 16;
2469 }
2470
2471 /* zcount contains the number of additional MOVK instructions
2472 required if the constant is built up with an initial MOVZ instruction,
2473 while ncount is the number of MOVK instructions required if starting
2474 with a MOVN instruction. Choose the sequence that yields the fewest
2475 number of instructions, preferring MOVZ instructions when they are both
2476 the same. */
2477 if (ncount < zcount)
2478 {
2479 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2480 GEN_INT ((~val) & 0xffff));
2481 tval = 0xffff;
2482 }
2483 else
2484 {
2485 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2486 GEN_INT (val & 0xffff));
2487 tval = 0;
2488 }
2489
2490 val >>= 16;
2491
2492 for (i = 16; i < 64; i += 16)
2493 {
2494 if ((val & 0xffff) != tval)
2495 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2496 GEN_INT (i), GEN_INT (val & 0xffff)));
2497 val >>= 16;
2498 }
2499 }
2500 }
2501
2502 static void
2503 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2504 {
2505 HOST_WIDE_INT mdelta = delta;
2506 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2507 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2508
2509 if (mdelta < 0)
2510 mdelta = -mdelta;
2511
2512 if (mdelta >= 4096 * 4096)
2513 {
2514 aarch64_build_constant (scratchreg, delta);
2515 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2516 }
2517 else if (mdelta > 0)
2518 {
2519 if (mdelta >= 4096)
2520 {
2521 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2522 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2523 if (delta < 0)
2524 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2526 else
2527 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2528 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2529 }
2530 if (mdelta % 4096 != 0)
2531 {
2532 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2533 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2535 }
2536 }
2537 }
2538
2539 /* Output code to add DELTA to the first argument, and then jump
2540 to FUNCTION. Used for C++ multiple inheritance. */
2541 static void
2542 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2543 HOST_WIDE_INT delta,
2544 HOST_WIDE_INT vcall_offset,
2545 tree function)
2546 {
2547 /* The this pointer is always in x0. Note that this differs from
2548 Arm where the this pointer maybe bumped to r1 if r0 is required
2549 to return a pointer to an aggregate. On AArch64 a result value
2550 pointer will be in x8. */
2551 int this_regno = R0_REGNUM;
2552 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2553
2554 reload_completed = 1;
2555 emit_note (NOTE_INSN_PROLOGUE_END);
2556
2557 if (vcall_offset == 0)
2558 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2559 else
2560 {
2561 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2562
2563 this_rtx = gen_rtx_REG (Pmode, this_regno);
2564 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2565 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2566
2567 addr = this_rtx;
2568 if (delta != 0)
2569 {
2570 if (delta >= -256 && delta < 256)
2571 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2572 plus_constant (Pmode, this_rtx, delta));
2573 else
2574 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2575 }
2576
2577 if (Pmode == ptr_mode)
2578 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2579 else
2580 aarch64_emit_move (temp0,
2581 gen_rtx_ZERO_EXTEND (Pmode,
2582 gen_rtx_MEM (ptr_mode, addr)));
2583
2584 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2585 addr = plus_constant (Pmode, temp0, vcall_offset);
2586 else
2587 {
2588 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2589 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2590 }
2591
2592 if (Pmode == ptr_mode)
2593 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2594 else
2595 aarch64_emit_move (temp1,
2596 gen_rtx_SIGN_EXTEND (Pmode,
2597 gen_rtx_MEM (ptr_mode, addr)));
2598
2599 emit_insn (gen_add2_insn (this_rtx, temp1));
2600 }
2601
2602 /* Generate a tail call to the target function. */
2603 if (!TREE_USED (function))
2604 {
2605 assemble_external (function);
2606 TREE_USED (function) = 1;
2607 }
2608 funexp = XEXP (DECL_RTL (function), 0);
2609 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2610 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2611 SIBLING_CALL_P (insn) = 1;
2612
2613 insn = get_insns ();
2614 shorten_branches (insn);
2615 final_start_function (insn, file, 1);
2616 final (insn, file, 1);
2617 final_end_function ();
2618
2619 /* Stop pretending to be a post-reload pass. */
2620 reload_completed = 0;
2621 }
2622
2623 static int
2624 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2625 {
2626 if (GET_CODE (*x) == SYMBOL_REF)
2627 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2628
2629 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2630 TLS offsets, not real symbol references. */
2631 if (GET_CODE (*x) == UNSPEC
2632 && XINT (*x, 1) == UNSPEC_TLS)
2633 return -1;
2634
2635 return 0;
2636 }
2637
2638 static bool
2639 aarch64_tls_referenced_p (rtx x)
2640 {
2641 if (!TARGET_HAVE_TLS)
2642 return false;
2643
2644 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2645 }
2646
2647
2648 static int
2649 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2650 {
2651 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2652 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2653
2654 if (*imm1 < *imm2)
2655 return -1;
2656 if (*imm1 > *imm2)
2657 return +1;
2658 return 0;
2659 }
2660
2661
2662 static void
2663 aarch64_build_bitmask_table (void)
2664 {
2665 unsigned HOST_WIDE_INT mask, imm;
2666 unsigned int log_e, e, s, r;
2667 unsigned int nimms = 0;
2668
2669 for (log_e = 1; log_e <= 6; log_e++)
2670 {
2671 e = 1 << log_e;
2672 if (e == 64)
2673 mask = ~(HOST_WIDE_INT) 0;
2674 else
2675 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2676 for (s = 1; s < e; s++)
2677 {
2678 for (r = 0; r < e; r++)
2679 {
2680 /* set s consecutive bits to 1 (s < 64) */
2681 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2682 /* rotate right by r */
2683 if (r != 0)
2684 imm = ((imm >> r) | (imm << (e - r))) & mask;
2685 /* replicate the constant depending on SIMD size */
2686 switch (log_e) {
2687 case 1: imm |= (imm << 2);
2688 case 2: imm |= (imm << 4);
2689 case 3: imm |= (imm << 8);
2690 case 4: imm |= (imm << 16);
2691 case 5: imm |= (imm << 32);
2692 case 6:
2693 break;
2694 default:
2695 gcc_unreachable ();
2696 }
2697 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2698 aarch64_bitmasks[nimms++] = imm;
2699 }
2700 }
2701 }
2702
2703 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2704 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2705 aarch64_bitmasks_cmp);
2706 }
2707
2708
2709 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2710 a left shift of 0 or 12 bits. */
2711 bool
2712 aarch64_uimm12_shift (HOST_WIDE_INT val)
2713 {
2714 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2715 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2716 );
2717 }
2718
2719
2720 /* Return true if val is an immediate that can be loaded into a
2721 register by a MOVZ instruction. */
2722 static bool
2723 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2724 {
2725 if (GET_MODE_SIZE (mode) > 4)
2726 {
2727 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2728 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2729 return 1;
2730 }
2731 else
2732 {
2733 /* Ignore sign extension. */
2734 val &= (HOST_WIDE_INT) 0xffffffff;
2735 }
2736 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2737 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2738 }
2739
2740
2741 /* Return true if val is a valid bitmask immediate. */
2742 bool
2743 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2744 {
2745 if (GET_MODE_SIZE (mode) < 8)
2746 {
2747 /* Replicate bit pattern. */
2748 val &= (HOST_WIDE_INT) 0xffffffff;
2749 val |= val << 32;
2750 }
2751 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2752 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2753 }
2754
2755
2756 /* Return true if val is an immediate that can be loaded into a
2757 register in a single instruction. */
2758 bool
2759 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2760 {
2761 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2762 return 1;
2763 return aarch64_bitmask_imm (val, mode);
2764 }
2765
2766 static bool
2767 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2768 {
2769 rtx base, offset;
2770
2771 if (GET_CODE (x) == HIGH)
2772 return true;
2773
2774 split_const (x, &base, &offset);
2775 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2776 {
2777 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2778 != SYMBOL_FORCE_TO_MEM)
2779 return true;
2780 else
2781 /* Avoid generating a 64-bit relocation in ILP32; leave
2782 to aarch64_expand_mov_immediate to handle it properly. */
2783 return mode != ptr_mode;
2784 }
2785
2786 return aarch64_tls_referenced_p (x);
2787 }
2788
2789 /* Return true if register REGNO is a valid index register.
2790 STRICT_P is true if REG_OK_STRICT is in effect. */
2791
2792 bool
2793 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2794 {
2795 if (!HARD_REGISTER_NUM_P (regno))
2796 {
2797 if (!strict_p)
2798 return true;
2799
2800 if (!reg_renumber)
2801 return false;
2802
2803 regno = reg_renumber[regno];
2804 }
2805 return GP_REGNUM_P (regno);
2806 }
2807
2808 /* Return true if register REGNO is a valid base register for mode MODE.
2809 STRICT_P is true if REG_OK_STRICT is in effect. */
2810
2811 bool
2812 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2813 {
2814 if (!HARD_REGISTER_NUM_P (regno))
2815 {
2816 if (!strict_p)
2817 return true;
2818
2819 if (!reg_renumber)
2820 return false;
2821
2822 regno = reg_renumber[regno];
2823 }
2824
2825 /* The fake registers will be eliminated to either the stack or
2826 hard frame pointer, both of which are usually valid base registers.
2827 Reload deals with the cases where the eliminated form isn't valid. */
2828 return (GP_REGNUM_P (regno)
2829 || regno == SP_REGNUM
2830 || regno == FRAME_POINTER_REGNUM
2831 || regno == ARG_POINTER_REGNUM);
2832 }
2833
2834 /* Return true if X is a valid base register for mode MODE.
2835 STRICT_P is true if REG_OK_STRICT is in effect. */
2836
2837 static bool
2838 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2839 {
2840 if (!strict_p && GET_CODE (x) == SUBREG)
2841 x = SUBREG_REG (x);
2842
2843 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2844 }
2845
2846 /* Return true if address offset is a valid index. If it is, fill in INFO
2847 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2848
2849 static bool
2850 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2851 enum machine_mode mode, bool strict_p)
2852 {
2853 enum aarch64_address_type type;
2854 rtx index;
2855 int shift;
2856
2857 /* (reg:P) */
2858 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2859 && GET_MODE (x) == Pmode)
2860 {
2861 type = ADDRESS_REG_REG;
2862 index = x;
2863 shift = 0;
2864 }
2865 /* (sign_extend:DI (reg:SI)) */
2866 else if ((GET_CODE (x) == SIGN_EXTEND
2867 || GET_CODE (x) == ZERO_EXTEND)
2868 && GET_MODE (x) == DImode
2869 && GET_MODE (XEXP (x, 0)) == SImode)
2870 {
2871 type = (GET_CODE (x) == SIGN_EXTEND)
2872 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2873 index = XEXP (x, 0);
2874 shift = 0;
2875 }
2876 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2877 else if (GET_CODE (x) == MULT
2878 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2879 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2880 && GET_MODE (XEXP (x, 0)) == DImode
2881 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2882 && CONST_INT_P (XEXP (x, 1)))
2883 {
2884 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2885 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2886 index = XEXP (XEXP (x, 0), 0);
2887 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2888 }
2889 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2890 else if (GET_CODE (x) == ASHIFT
2891 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2892 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2893 && GET_MODE (XEXP (x, 0)) == DImode
2894 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2895 && CONST_INT_P (XEXP (x, 1)))
2896 {
2897 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2898 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2899 index = XEXP (XEXP (x, 0), 0);
2900 shift = INTVAL (XEXP (x, 1));
2901 }
2902 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2903 else if ((GET_CODE (x) == SIGN_EXTRACT
2904 || GET_CODE (x) == ZERO_EXTRACT)
2905 && GET_MODE (x) == DImode
2906 && GET_CODE (XEXP (x, 0)) == MULT
2907 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2908 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2909 {
2910 type = (GET_CODE (x) == SIGN_EXTRACT)
2911 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2912 index = XEXP (XEXP (x, 0), 0);
2913 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2914 if (INTVAL (XEXP (x, 1)) != 32 + shift
2915 || INTVAL (XEXP (x, 2)) != 0)
2916 shift = -1;
2917 }
2918 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2919 (const_int 0xffffffff<<shift)) */
2920 else if (GET_CODE (x) == AND
2921 && GET_MODE (x) == DImode
2922 && GET_CODE (XEXP (x, 0)) == MULT
2923 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2924 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2925 && CONST_INT_P (XEXP (x, 1)))
2926 {
2927 type = ADDRESS_REG_UXTW;
2928 index = XEXP (XEXP (x, 0), 0);
2929 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2930 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2931 shift = -1;
2932 }
2933 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2934 else if ((GET_CODE (x) == SIGN_EXTRACT
2935 || GET_CODE (x) == ZERO_EXTRACT)
2936 && GET_MODE (x) == DImode
2937 && GET_CODE (XEXP (x, 0)) == ASHIFT
2938 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2939 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2940 {
2941 type = (GET_CODE (x) == SIGN_EXTRACT)
2942 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2943 index = XEXP (XEXP (x, 0), 0);
2944 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2945 if (INTVAL (XEXP (x, 1)) != 32 + shift
2946 || INTVAL (XEXP (x, 2)) != 0)
2947 shift = -1;
2948 }
2949 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2950 (const_int 0xffffffff<<shift)) */
2951 else if (GET_CODE (x) == AND
2952 && GET_MODE (x) == DImode
2953 && GET_CODE (XEXP (x, 0)) == ASHIFT
2954 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2955 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2956 && CONST_INT_P (XEXP (x, 1)))
2957 {
2958 type = ADDRESS_REG_UXTW;
2959 index = XEXP (XEXP (x, 0), 0);
2960 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2961 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2962 shift = -1;
2963 }
2964 /* (mult:P (reg:P) (const_int scale)) */
2965 else if (GET_CODE (x) == MULT
2966 && GET_MODE (x) == Pmode
2967 && GET_MODE (XEXP (x, 0)) == Pmode
2968 && CONST_INT_P (XEXP (x, 1)))
2969 {
2970 type = ADDRESS_REG_REG;
2971 index = XEXP (x, 0);
2972 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2973 }
2974 /* (ashift:P (reg:P) (const_int shift)) */
2975 else if (GET_CODE (x) == ASHIFT
2976 && GET_MODE (x) == Pmode
2977 && GET_MODE (XEXP (x, 0)) == Pmode
2978 && CONST_INT_P (XEXP (x, 1)))
2979 {
2980 type = ADDRESS_REG_REG;
2981 index = XEXP (x, 0);
2982 shift = INTVAL (XEXP (x, 1));
2983 }
2984 else
2985 return false;
2986
2987 if (GET_CODE (index) == SUBREG)
2988 index = SUBREG_REG (index);
2989
2990 if ((shift == 0 ||
2991 (shift > 0 && shift <= 3
2992 && (1 << shift) == GET_MODE_SIZE (mode)))
2993 && REG_P (index)
2994 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2995 {
2996 info->type = type;
2997 info->offset = index;
2998 info->shift = shift;
2999 return true;
3000 }
3001
3002 return false;
3003 }
3004
3005 static inline bool
3006 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3007 {
3008 return (offset >= -64 * GET_MODE_SIZE (mode)
3009 && offset < 64 * GET_MODE_SIZE (mode)
3010 && offset % GET_MODE_SIZE (mode) == 0);
3011 }
3012
3013 static inline bool
3014 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3015 HOST_WIDE_INT offset)
3016 {
3017 return offset >= -256 && offset < 256;
3018 }
3019
3020 static inline bool
3021 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3022 {
3023 return (offset >= 0
3024 && offset < 4096 * GET_MODE_SIZE (mode)
3025 && offset % GET_MODE_SIZE (mode) == 0);
3026 }
3027
3028 /* Return true if X is a valid address for machine mode MODE. If it is,
3029 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3030 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3031
3032 static bool
3033 aarch64_classify_address (struct aarch64_address_info *info,
3034 rtx x, enum machine_mode mode,
3035 RTX_CODE outer_code, bool strict_p)
3036 {
3037 enum rtx_code code = GET_CODE (x);
3038 rtx op0, op1;
3039 bool allow_reg_index_p =
3040 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3041
3042 /* Don't support anything other than POST_INC or REG addressing for
3043 AdvSIMD. */
3044 if (aarch64_vector_mode_p (mode)
3045 && (code != POST_INC && code != REG))
3046 return false;
3047
3048 switch (code)
3049 {
3050 case REG:
3051 case SUBREG:
3052 info->type = ADDRESS_REG_IMM;
3053 info->base = x;
3054 info->offset = const0_rtx;
3055 return aarch64_base_register_rtx_p (x, strict_p);
3056
3057 case PLUS:
3058 op0 = XEXP (x, 0);
3059 op1 = XEXP (x, 1);
3060 if (GET_MODE_SIZE (mode) != 0
3061 && CONST_INT_P (op1)
3062 && aarch64_base_register_rtx_p (op0, strict_p))
3063 {
3064 HOST_WIDE_INT offset = INTVAL (op1);
3065
3066 info->type = ADDRESS_REG_IMM;
3067 info->base = op0;
3068 info->offset = op1;
3069
3070 /* TImode and TFmode values are allowed in both pairs of X
3071 registers and individual Q registers. The available
3072 address modes are:
3073 X,X: 7-bit signed scaled offset
3074 Q: 9-bit signed offset
3075 We conservatively require an offset representable in either mode.
3076 */
3077 if (mode == TImode || mode == TFmode)
3078 return (offset_7bit_signed_scaled_p (mode, offset)
3079 && offset_9bit_signed_unscaled_p (mode, offset));
3080
3081 if (outer_code == PARALLEL)
3082 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3083 && offset_7bit_signed_scaled_p (mode, offset));
3084 else
3085 return (offset_9bit_signed_unscaled_p (mode, offset)
3086 || offset_12bit_unsigned_scaled_p (mode, offset));
3087 }
3088
3089 if (allow_reg_index_p)
3090 {
3091 /* Look for base + (scaled/extended) index register. */
3092 if (aarch64_base_register_rtx_p (op0, strict_p)
3093 && aarch64_classify_index (info, op1, mode, strict_p))
3094 {
3095 info->base = op0;
3096 return true;
3097 }
3098 if (aarch64_base_register_rtx_p (op1, strict_p)
3099 && aarch64_classify_index (info, op0, mode, strict_p))
3100 {
3101 info->base = op1;
3102 return true;
3103 }
3104 }
3105
3106 return false;
3107
3108 case POST_INC:
3109 case POST_DEC:
3110 case PRE_INC:
3111 case PRE_DEC:
3112 info->type = ADDRESS_REG_WB;
3113 info->base = XEXP (x, 0);
3114 info->offset = NULL_RTX;
3115 return aarch64_base_register_rtx_p (info->base, strict_p);
3116
3117 case POST_MODIFY:
3118 case PRE_MODIFY:
3119 info->type = ADDRESS_REG_WB;
3120 info->base = XEXP (x, 0);
3121 if (GET_CODE (XEXP (x, 1)) == PLUS
3122 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3123 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3124 && aarch64_base_register_rtx_p (info->base, strict_p))
3125 {
3126 HOST_WIDE_INT offset;
3127 info->offset = XEXP (XEXP (x, 1), 1);
3128 offset = INTVAL (info->offset);
3129
3130 /* TImode and TFmode values are allowed in both pairs of X
3131 registers and individual Q registers. The available
3132 address modes are:
3133 X,X: 7-bit signed scaled offset
3134 Q: 9-bit signed offset
3135 We conservatively require an offset representable in either mode.
3136 */
3137 if (mode == TImode || mode == TFmode)
3138 return (offset_7bit_signed_scaled_p (mode, offset)
3139 && offset_9bit_signed_unscaled_p (mode, offset));
3140
3141 if (outer_code == PARALLEL)
3142 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3143 && offset_7bit_signed_scaled_p (mode, offset));
3144 else
3145 return offset_9bit_signed_unscaled_p (mode, offset);
3146 }
3147 return false;
3148
3149 case CONST:
3150 case SYMBOL_REF:
3151 case LABEL_REF:
3152 /* load literal: pc-relative constant pool entry. Only supported
3153 for SI mode or larger. */
3154 info->type = ADDRESS_SYMBOLIC;
3155 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3156 {
3157 rtx sym, addend;
3158
3159 split_const (x, &sym, &addend);
3160 return (GET_CODE (sym) == LABEL_REF
3161 || (GET_CODE (sym) == SYMBOL_REF
3162 && CONSTANT_POOL_ADDRESS_P (sym)));
3163 }
3164 return false;
3165
3166 case LO_SUM:
3167 info->type = ADDRESS_LO_SUM;
3168 info->base = XEXP (x, 0);
3169 info->offset = XEXP (x, 1);
3170 if (allow_reg_index_p
3171 && aarch64_base_register_rtx_p (info->base, strict_p))
3172 {
3173 rtx sym, offs;
3174 split_const (info->offset, &sym, &offs);
3175 if (GET_CODE (sym) == SYMBOL_REF
3176 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3177 == SYMBOL_SMALL_ABSOLUTE))
3178 {
3179 /* The symbol and offset must be aligned to the access size. */
3180 unsigned int align;
3181 unsigned int ref_size;
3182
3183 if (CONSTANT_POOL_ADDRESS_P (sym))
3184 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3185 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3186 {
3187 tree exp = SYMBOL_REF_DECL (sym);
3188 align = TYPE_ALIGN (TREE_TYPE (exp));
3189 align = CONSTANT_ALIGNMENT (exp, align);
3190 }
3191 else if (SYMBOL_REF_DECL (sym))
3192 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3193 else
3194 align = BITS_PER_UNIT;
3195
3196 ref_size = GET_MODE_SIZE (mode);
3197 if (ref_size == 0)
3198 ref_size = GET_MODE_SIZE (DImode);
3199
3200 return ((INTVAL (offs) & (ref_size - 1)) == 0
3201 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3202 }
3203 }
3204 return false;
3205
3206 default:
3207 return false;
3208 }
3209 }
3210
3211 bool
3212 aarch64_symbolic_address_p (rtx x)
3213 {
3214 rtx offset;
3215
3216 split_const (x, &x, &offset);
3217 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3218 }
3219
3220 /* Classify the base of symbolic expression X, given that X appears in
3221 context CONTEXT. */
3222
3223 enum aarch64_symbol_type
3224 aarch64_classify_symbolic_expression (rtx x,
3225 enum aarch64_symbol_context context)
3226 {
3227 rtx offset;
3228
3229 split_const (x, &x, &offset);
3230 return aarch64_classify_symbol (x, context);
3231 }
3232
3233
3234 /* Return TRUE if X is a legitimate address for accessing memory in
3235 mode MODE. */
3236 static bool
3237 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3238 {
3239 struct aarch64_address_info addr;
3240
3241 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3242 }
3243
3244 /* Return TRUE if X is a legitimate address for accessing memory in
3245 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3246 pair operation. */
3247 bool
3248 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3249 RTX_CODE outer_code, bool strict_p)
3250 {
3251 struct aarch64_address_info addr;
3252
3253 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3254 }
3255
3256 /* Return TRUE if rtx X is immediate constant 0.0 */
3257 bool
3258 aarch64_float_const_zero_rtx_p (rtx x)
3259 {
3260 REAL_VALUE_TYPE r;
3261
3262 if (GET_MODE (x) == VOIDmode)
3263 return false;
3264
3265 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3266 if (REAL_VALUE_MINUS_ZERO (r))
3267 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3268 return REAL_VALUES_EQUAL (r, dconst0);
3269 }
3270
3271 /* Return the fixed registers used for condition codes. */
3272
3273 static bool
3274 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3275 {
3276 *p1 = CC_REGNUM;
3277 *p2 = INVALID_REGNUM;
3278 return true;
3279 }
3280
3281 enum machine_mode
3282 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3283 {
3284 /* All floating point compares return CCFP if it is an equality
3285 comparison, and CCFPE otherwise. */
3286 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3287 {
3288 switch (code)
3289 {
3290 case EQ:
3291 case NE:
3292 case UNORDERED:
3293 case ORDERED:
3294 case UNLT:
3295 case UNLE:
3296 case UNGT:
3297 case UNGE:
3298 case UNEQ:
3299 case LTGT:
3300 return CCFPmode;
3301
3302 case LT:
3303 case LE:
3304 case GT:
3305 case GE:
3306 return CCFPEmode;
3307
3308 default:
3309 gcc_unreachable ();
3310 }
3311 }
3312
3313 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3314 && y == const0_rtx
3315 && (code == EQ || code == NE || code == LT || code == GE)
3316 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3317 || GET_CODE (x) == NEG))
3318 return CC_NZmode;
3319
3320 /* A compare with a shifted or negated operand. Because of canonicalization,
3321 the comparison will have to be swapped when we emit the assembly
3322 code. */
3323 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3324 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3325 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3326 || GET_CODE (x) == LSHIFTRT
3327 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3328 || GET_CODE (x) == NEG))
3329 return CC_SWPmode;
3330
3331 /* A compare of a mode narrower than SI mode against zero can be done
3332 by extending the value in the comparison. */
3333 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3334 && y == const0_rtx)
3335 /* Only use sign-extension if we really need it. */
3336 return ((code == GT || code == GE || code == LE || code == LT)
3337 ? CC_SESWPmode : CC_ZESWPmode);
3338
3339 /* For everything else, return CCmode. */
3340 return CCmode;
3341 }
3342
3343 static unsigned
3344 aarch64_get_condition_code (rtx x)
3345 {
3346 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3347 enum rtx_code comp_code = GET_CODE (x);
3348
3349 if (GET_MODE_CLASS (mode) != MODE_CC)
3350 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3351
3352 switch (mode)
3353 {
3354 case CCFPmode:
3355 case CCFPEmode:
3356 switch (comp_code)
3357 {
3358 case GE: return AARCH64_GE;
3359 case GT: return AARCH64_GT;
3360 case LE: return AARCH64_LS;
3361 case LT: return AARCH64_MI;
3362 case NE: return AARCH64_NE;
3363 case EQ: return AARCH64_EQ;
3364 case ORDERED: return AARCH64_VC;
3365 case UNORDERED: return AARCH64_VS;
3366 case UNLT: return AARCH64_LT;
3367 case UNLE: return AARCH64_LE;
3368 case UNGT: return AARCH64_HI;
3369 case UNGE: return AARCH64_PL;
3370 default: gcc_unreachable ();
3371 }
3372 break;
3373
3374 case CCmode:
3375 switch (comp_code)
3376 {
3377 case NE: return AARCH64_NE;
3378 case EQ: return AARCH64_EQ;
3379 case GE: return AARCH64_GE;
3380 case GT: return AARCH64_GT;
3381 case LE: return AARCH64_LE;
3382 case LT: return AARCH64_LT;
3383 case GEU: return AARCH64_CS;
3384 case GTU: return AARCH64_HI;
3385 case LEU: return AARCH64_LS;
3386 case LTU: return AARCH64_CC;
3387 default: gcc_unreachable ();
3388 }
3389 break;
3390
3391 case CC_SWPmode:
3392 case CC_ZESWPmode:
3393 case CC_SESWPmode:
3394 switch (comp_code)
3395 {
3396 case NE: return AARCH64_NE;
3397 case EQ: return AARCH64_EQ;
3398 case GE: return AARCH64_LE;
3399 case GT: return AARCH64_LT;
3400 case LE: return AARCH64_GE;
3401 case LT: return AARCH64_GT;
3402 case GEU: return AARCH64_LS;
3403 case GTU: return AARCH64_CC;
3404 case LEU: return AARCH64_CS;
3405 case LTU: return AARCH64_HI;
3406 default: gcc_unreachable ();
3407 }
3408 break;
3409
3410 case CC_NZmode:
3411 switch (comp_code)
3412 {
3413 case NE: return AARCH64_NE;
3414 case EQ: return AARCH64_EQ;
3415 case GE: return AARCH64_PL;
3416 case LT: return AARCH64_MI;
3417 default: gcc_unreachable ();
3418 }
3419 break;
3420
3421 default:
3422 gcc_unreachable ();
3423 break;
3424 }
3425 }
3426
3427 static unsigned
3428 bit_count (unsigned HOST_WIDE_INT value)
3429 {
3430 unsigned count = 0;
3431
3432 while (value)
3433 {
3434 count++;
3435 value &= value - 1;
3436 }
3437
3438 return count;
3439 }
3440
3441 void
3442 aarch64_print_operand (FILE *f, rtx x, char code)
3443 {
3444 switch (code)
3445 {
3446 /* An integer or symbol address without a preceding # sign. */
3447 case 'c':
3448 switch (GET_CODE (x))
3449 {
3450 case CONST_INT:
3451 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3452 break;
3453
3454 case SYMBOL_REF:
3455 output_addr_const (f, x);
3456 break;
3457
3458 case CONST:
3459 if (GET_CODE (XEXP (x, 0)) == PLUS
3460 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3461 {
3462 output_addr_const (f, x);
3463 break;
3464 }
3465 /* Fall through. */
3466
3467 default:
3468 output_operand_lossage ("Unsupported operand for code '%c'", code);
3469 }
3470 break;
3471
3472 case 'e':
3473 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3474 {
3475 int n;
3476
3477 if (GET_CODE (x) != CONST_INT
3478 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3479 {
3480 output_operand_lossage ("invalid operand for '%%%c'", code);
3481 return;
3482 }
3483
3484 switch (n)
3485 {
3486 case 3:
3487 fputc ('b', f);
3488 break;
3489 case 4:
3490 fputc ('h', f);
3491 break;
3492 case 5:
3493 fputc ('w', f);
3494 break;
3495 default:
3496 output_operand_lossage ("invalid operand for '%%%c'", code);
3497 return;
3498 }
3499 }
3500 break;
3501
3502 case 'p':
3503 {
3504 int n;
3505
3506 /* Print N such that 2^N == X. */
3507 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3508 {
3509 output_operand_lossage ("invalid operand for '%%%c'", code);
3510 return;
3511 }
3512
3513 asm_fprintf (f, "%d", n);
3514 }
3515 break;
3516
3517 case 'P':
3518 /* Print the number of non-zero bits in X (a const_int). */
3519 if (GET_CODE (x) != CONST_INT)
3520 {
3521 output_operand_lossage ("invalid operand for '%%%c'", code);
3522 return;
3523 }
3524
3525 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3526 break;
3527
3528 case 'H':
3529 /* Print the higher numbered register of a pair (TImode) of regs. */
3530 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3531 {
3532 output_operand_lossage ("invalid operand for '%%%c'", code);
3533 return;
3534 }
3535
3536 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3537 break;
3538
3539 case 'm':
3540 /* Print a condition (eq, ne, etc). */
3541
3542 /* CONST_TRUE_RTX means always -- that's the default. */
3543 if (x == const_true_rtx)
3544 return;
3545
3546 if (!COMPARISON_P (x))
3547 {
3548 output_operand_lossage ("invalid operand for '%%%c'", code);
3549 return;
3550 }
3551
3552 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3553 break;
3554
3555 case 'M':
3556 /* Print the inverse of a condition (eq <-> ne, etc). */
3557
3558 /* CONST_TRUE_RTX means never -- that's the default. */
3559 if (x == const_true_rtx)
3560 {
3561 fputs ("nv", f);
3562 return;
3563 }
3564
3565 if (!COMPARISON_P (x))
3566 {
3567 output_operand_lossage ("invalid operand for '%%%c'", code);
3568 return;
3569 }
3570
3571 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3572 (aarch64_get_condition_code (x))], f);
3573 break;
3574
3575 case 'b':
3576 case 'h':
3577 case 's':
3578 case 'd':
3579 case 'q':
3580 /* Print a scalar FP/SIMD register name. */
3581 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3582 {
3583 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3584 return;
3585 }
3586 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3587 break;
3588
3589 case 'S':
3590 case 'T':
3591 case 'U':
3592 case 'V':
3593 /* Print the first FP/SIMD register name in a list. */
3594 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3595 {
3596 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3597 return;
3598 }
3599 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3600 break;
3601
3602 case 'X':
3603 /* Print bottom 16 bits of integer constant in hex. */
3604 if (GET_CODE (x) != CONST_INT)
3605 {
3606 output_operand_lossage ("invalid operand for '%%%c'", code);
3607 return;
3608 }
3609 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3610 break;
3611
3612 case 'w':
3613 case 'x':
3614 /* Print a general register name or the zero register (32-bit or
3615 64-bit). */
3616 if (x == const0_rtx
3617 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3618 {
3619 asm_fprintf (f, "%czr", code);
3620 break;
3621 }
3622
3623 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3624 {
3625 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3626 break;
3627 }
3628
3629 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3630 {
3631 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3632 break;
3633 }
3634
3635 /* Fall through */
3636
3637 case 0:
3638 /* Print a normal operand, if it's a general register, then we
3639 assume DImode. */
3640 if (x == NULL)
3641 {
3642 output_operand_lossage ("missing operand");
3643 return;
3644 }
3645
3646 switch (GET_CODE (x))
3647 {
3648 case REG:
3649 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3650 break;
3651
3652 case MEM:
3653 aarch64_memory_reference_mode = GET_MODE (x);
3654 output_address (XEXP (x, 0));
3655 break;
3656
3657 case LABEL_REF:
3658 case SYMBOL_REF:
3659 output_addr_const (asm_out_file, x);
3660 break;
3661
3662 case CONST_INT:
3663 asm_fprintf (f, "%wd", INTVAL (x));
3664 break;
3665
3666 case CONST_VECTOR:
3667 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3668 {
3669 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3670 HOST_WIDE_INT_MIN,
3671 HOST_WIDE_INT_MAX));
3672 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3673 }
3674 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3675 {
3676 fputc ('0', f);
3677 }
3678 else
3679 gcc_unreachable ();
3680 break;
3681
3682 case CONST_DOUBLE:
3683 /* CONST_DOUBLE can represent a double-width integer.
3684 In this case, the mode of x is VOIDmode. */
3685 if (GET_MODE (x) == VOIDmode)
3686 ; /* Do Nothing. */
3687 else if (aarch64_float_const_zero_rtx_p (x))
3688 {
3689 fputc ('0', f);
3690 break;
3691 }
3692 else if (aarch64_float_const_representable_p (x))
3693 {
3694 #define buf_size 20
3695 char float_buf[buf_size] = {'\0'};
3696 REAL_VALUE_TYPE r;
3697 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3698 real_to_decimal_for_mode (float_buf, &r,
3699 buf_size, buf_size,
3700 1, GET_MODE (x));
3701 asm_fprintf (asm_out_file, "%s", float_buf);
3702 break;
3703 #undef buf_size
3704 }
3705 output_operand_lossage ("invalid constant");
3706 return;
3707 default:
3708 output_operand_lossage ("invalid operand");
3709 return;
3710 }
3711 break;
3712
3713 case 'A':
3714 if (GET_CODE (x) == HIGH)
3715 x = XEXP (x, 0);
3716
3717 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3718 {
3719 case SYMBOL_SMALL_GOT:
3720 asm_fprintf (asm_out_file, ":got:");
3721 break;
3722
3723 case SYMBOL_SMALL_TLSGD:
3724 asm_fprintf (asm_out_file, ":tlsgd:");
3725 break;
3726
3727 case SYMBOL_SMALL_TLSDESC:
3728 asm_fprintf (asm_out_file, ":tlsdesc:");
3729 break;
3730
3731 case SYMBOL_SMALL_GOTTPREL:
3732 asm_fprintf (asm_out_file, ":gottprel:");
3733 break;
3734
3735 case SYMBOL_SMALL_TPREL:
3736 asm_fprintf (asm_out_file, ":tprel:");
3737 break;
3738
3739 case SYMBOL_TINY_GOT:
3740 gcc_unreachable ();
3741 break;
3742
3743 default:
3744 break;
3745 }
3746 output_addr_const (asm_out_file, x);
3747 break;
3748
3749 case 'L':
3750 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3751 {
3752 case SYMBOL_SMALL_GOT:
3753 asm_fprintf (asm_out_file, ":lo12:");
3754 break;
3755
3756 case SYMBOL_SMALL_TLSGD:
3757 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3758 break;
3759
3760 case SYMBOL_SMALL_TLSDESC:
3761 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3762 break;
3763
3764 case SYMBOL_SMALL_GOTTPREL:
3765 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3766 break;
3767
3768 case SYMBOL_SMALL_TPREL:
3769 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3770 break;
3771
3772 case SYMBOL_TINY_GOT:
3773 asm_fprintf (asm_out_file, ":got:");
3774 break;
3775
3776 default:
3777 break;
3778 }
3779 output_addr_const (asm_out_file, x);
3780 break;
3781
3782 case 'G':
3783
3784 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3785 {
3786 case SYMBOL_SMALL_TPREL:
3787 asm_fprintf (asm_out_file, ":tprel_hi12:");
3788 break;
3789 default:
3790 break;
3791 }
3792 output_addr_const (asm_out_file, x);
3793 break;
3794
3795 default:
3796 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3797 return;
3798 }
3799 }
3800
3801 void
3802 aarch64_print_operand_address (FILE *f, rtx x)
3803 {
3804 struct aarch64_address_info addr;
3805
3806 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3807 MEM, true))
3808 switch (addr.type)
3809 {
3810 case ADDRESS_REG_IMM:
3811 if (addr.offset == const0_rtx)
3812 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3813 else
3814 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3815 INTVAL (addr.offset));
3816 return;
3817
3818 case ADDRESS_REG_REG:
3819 if (addr.shift == 0)
3820 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3821 reg_names [REGNO (addr.offset)]);
3822 else
3823 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3824 reg_names [REGNO (addr.offset)], addr.shift);
3825 return;
3826
3827 case ADDRESS_REG_UXTW:
3828 if (addr.shift == 0)
3829 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3830 REGNO (addr.offset) - R0_REGNUM);
3831 else
3832 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3833 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3834 return;
3835
3836 case ADDRESS_REG_SXTW:
3837 if (addr.shift == 0)
3838 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3839 REGNO (addr.offset) - R0_REGNUM);
3840 else
3841 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3842 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3843 return;
3844
3845 case ADDRESS_REG_WB:
3846 switch (GET_CODE (x))
3847 {
3848 case PRE_INC:
3849 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3850 GET_MODE_SIZE (aarch64_memory_reference_mode));
3851 return;
3852 case POST_INC:
3853 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3854 GET_MODE_SIZE (aarch64_memory_reference_mode));
3855 return;
3856 case PRE_DEC:
3857 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3858 GET_MODE_SIZE (aarch64_memory_reference_mode));
3859 return;
3860 case POST_DEC:
3861 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3862 GET_MODE_SIZE (aarch64_memory_reference_mode));
3863 return;
3864 case PRE_MODIFY:
3865 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3866 INTVAL (addr.offset));
3867 return;
3868 case POST_MODIFY:
3869 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3870 INTVAL (addr.offset));
3871 return;
3872 default:
3873 break;
3874 }
3875 break;
3876
3877 case ADDRESS_LO_SUM:
3878 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3879 output_addr_const (f, addr.offset);
3880 asm_fprintf (f, "]");
3881 return;
3882
3883 case ADDRESS_SYMBOLIC:
3884 break;
3885 }
3886
3887 output_addr_const (f, x);
3888 }
3889
3890 bool
3891 aarch64_label_mentioned_p (rtx x)
3892 {
3893 const char *fmt;
3894 int i;
3895
3896 if (GET_CODE (x) == LABEL_REF)
3897 return true;
3898
3899 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3900 referencing instruction, but they are constant offsets, not
3901 symbols. */
3902 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3903 return false;
3904
3905 fmt = GET_RTX_FORMAT (GET_CODE (x));
3906 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3907 {
3908 if (fmt[i] == 'E')
3909 {
3910 int j;
3911
3912 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3913 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3914 return 1;
3915 }
3916 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3917 return 1;
3918 }
3919
3920 return 0;
3921 }
3922
3923 /* Implement REGNO_REG_CLASS. */
3924
3925 enum reg_class
3926 aarch64_regno_regclass (unsigned regno)
3927 {
3928 if (GP_REGNUM_P (regno))
3929 return CORE_REGS;
3930
3931 if (regno == SP_REGNUM)
3932 return STACK_REG;
3933
3934 if (regno == FRAME_POINTER_REGNUM
3935 || regno == ARG_POINTER_REGNUM)
3936 return POINTER_REGS;
3937
3938 if (FP_REGNUM_P (regno))
3939 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3940
3941 return NO_REGS;
3942 }
3943
3944 /* Try a machine-dependent way of reloading an illegitimate address
3945 operand. If we find one, push the reload and return the new rtx. */
3946
3947 rtx
3948 aarch64_legitimize_reload_address (rtx *x_p,
3949 enum machine_mode mode,
3950 int opnum, int type,
3951 int ind_levels ATTRIBUTE_UNUSED)
3952 {
3953 rtx x = *x_p;
3954
3955 /* Do not allow mem (plus (reg, const)) if vector mode. */
3956 if (aarch64_vector_mode_p (mode)
3957 && GET_CODE (x) == PLUS
3958 && REG_P (XEXP (x, 0))
3959 && CONST_INT_P (XEXP (x, 1)))
3960 {
3961 rtx orig_rtx = x;
3962 x = copy_rtx (x);
3963 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3964 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3965 opnum, (enum reload_type) type);
3966 return x;
3967 }
3968
3969 /* We must recognize output that we have already generated ourselves. */
3970 if (GET_CODE (x) == PLUS
3971 && GET_CODE (XEXP (x, 0)) == PLUS
3972 && REG_P (XEXP (XEXP (x, 0), 0))
3973 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3974 && CONST_INT_P (XEXP (x, 1)))
3975 {
3976 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3977 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3978 opnum, (enum reload_type) type);
3979 return x;
3980 }
3981
3982 /* We wish to handle large displacements off a base register by splitting
3983 the addend across an add and the mem insn. This can cut the number of
3984 extra insns needed from 3 to 1. It is only useful for load/store of a
3985 single register with 12 bit offset field. */
3986 if (GET_CODE (x) == PLUS
3987 && REG_P (XEXP (x, 0))
3988 && CONST_INT_P (XEXP (x, 1))
3989 && HARD_REGISTER_P (XEXP (x, 0))
3990 && mode != TImode
3991 && mode != TFmode
3992 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3993 {
3994 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3995 HOST_WIDE_INT low = val & 0xfff;
3996 HOST_WIDE_INT high = val - low;
3997 HOST_WIDE_INT offs;
3998 rtx cst;
3999 enum machine_mode xmode = GET_MODE (x);
4000
4001 /* In ILP32, xmode can be either DImode or SImode. */
4002 gcc_assert (xmode == DImode || xmode == SImode);
4003
4004 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4005 BLKmode alignment. */
4006 if (GET_MODE_SIZE (mode) == 0)
4007 return NULL_RTX;
4008
4009 offs = low % GET_MODE_SIZE (mode);
4010
4011 /* Align misaligned offset by adjusting high part to compensate. */
4012 if (offs != 0)
4013 {
4014 if (aarch64_uimm12_shift (high + offs))
4015 {
4016 /* Align down. */
4017 low = low - offs;
4018 high = high + offs;
4019 }
4020 else
4021 {
4022 /* Align up. */
4023 offs = GET_MODE_SIZE (mode) - offs;
4024 low = low + offs;
4025 high = high + (low & 0x1000) - offs;
4026 low &= 0xfff;
4027 }
4028 }
4029
4030 /* Check for overflow. */
4031 if (high + low != val)
4032 return NULL_RTX;
4033
4034 cst = GEN_INT (high);
4035 if (!aarch64_uimm12_shift (high))
4036 cst = force_const_mem (xmode, cst);
4037
4038 /* Reload high part into base reg, leaving the low part
4039 in the mem instruction.
4040 Note that replacing this gen_rtx_PLUS with plus_constant is
4041 wrong in this case because we rely on the
4042 (plus (plus reg c1) c2) structure being preserved so that
4043 XEXP (*p, 0) in push_reload below uses the correct term. */
4044 x = gen_rtx_PLUS (xmode,
4045 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4046 GEN_INT (low));
4047
4048 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4049 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4050 opnum, (enum reload_type) type);
4051 return x;
4052 }
4053
4054 return NULL_RTX;
4055 }
4056
4057
4058 static reg_class_t
4059 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4060 reg_class_t rclass,
4061 enum machine_mode mode,
4062 secondary_reload_info *sri)
4063 {
4064 /* Without the TARGET_SIMD instructions we cannot move a Q register
4065 to a Q register directly. We need a scratch. */
4066 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4067 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4068 && reg_class_subset_p (rclass, FP_REGS))
4069 {
4070 if (mode == TFmode)
4071 sri->icode = CODE_FOR_aarch64_reload_movtf;
4072 else if (mode == TImode)
4073 sri->icode = CODE_FOR_aarch64_reload_movti;
4074 return NO_REGS;
4075 }
4076
4077 /* A TFmode or TImode memory access should be handled via an FP_REGS
4078 because AArch64 has richer addressing modes for LDR/STR instructions
4079 than LDP/STP instructions. */
4080 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4081 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4082 return FP_REGS;
4083
4084 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4085 return CORE_REGS;
4086
4087 return NO_REGS;
4088 }
4089
4090 static bool
4091 aarch64_can_eliminate (const int from, const int to)
4092 {
4093 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4094 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4095
4096 if (frame_pointer_needed)
4097 {
4098 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4099 return true;
4100 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4101 return false;
4102 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4103 && !cfun->calls_alloca)
4104 return true;
4105 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4106 return true;
4107 return false;
4108 }
4109 else
4110 {
4111 /* If we decided that we didn't need a leaf frame pointer but then used
4112 LR in the function, then we'll want a frame pointer after all, so
4113 prevent this elimination to ensure a frame pointer is used.
4114
4115 NOTE: the original value of flag_omit_frame_pointer gets trashed
4116 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4117 of faked_omit_frame_pointer here (which is true when we always
4118 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4119 pointers when LR is clobbered). */
4120 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4121 && df_regs_ever_live_p (LR_REGNUM)
4122 && faked_omit_frame_pointer)
4123 return false;
4124 }
4125
4126 return true;
4127 }
4128
4129 HOST_WIDE_INT
4130 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4131 {
4132 HOST_WIDE_INT frame_size;
4133 HOST_WIDE_INT offset;
4134
4135 aarch64_layout_frame ();
4136 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4137 + crtl->outgoing_args_size
4138 + cfun->machine->saved_varargs_size);
4139
4140 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4141 offset = frame_size;
4142
4143 if (to == HARD_FRAME_POINTER_REGNUM)
4144 {
4145 if (from == ARG_POINTER_REGNUM)
4146 return offset - crtl->outgoing_args_size;
4147
4148 if (from == FRAME_POINTER_REGNUM)
4149 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4150 }
4151
4152 if (to == STACK_POINTER_REGNUM)
4153 {
4154 if (from == FRAME_POINTER_REGNUM)
4155 {
4156 HOST_WIDE_INT elim = crtl->outgoing_args_size
4157 + cfun->machine->frame.saved_regs_size
4158 + get_frame_size ()
4159 - cfun->machine->frame.fp_lr_offset;
4160 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4161 return elim;
4162 }
4163 }
4164
4165 return offset;
4166 }
4167
4168
4169 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4170 previous frame. */
4171
4172 rtx
4173 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4174 {
4175 if (count != 0)
4176 return const0_rtx;
4177 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4178 }
4179
4180
4181 static void
4182 aarch64_asm_trampoline_template (FILE *f)
4183 {
4184 if (TARGET_ILP32)
4185 {
4186 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4187 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4188 }
4189 else
4190 {
4191 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4192 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4193 }
4194 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4195 assemble_aligned_integer (4, const0_rtx);
4196 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4197 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4198 }
4199
4200 static void
4201 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4202 {
4203 rtx fnaddr, mem, a_tramp;
4204 const int tramp_code_sz = 16;
4205
4206 /* Don't need to copy the trailing D-words, we fill those in below. */
4207 emit_block_move (m_tramp, assemble_trampoline_template (),
4208 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4209 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4210 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4211 if (GET_MODE (fnaddr) != ptr_mode)
4212 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4213 emit_move_insn (mem, fnaddr);
4214
4215 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4216 emit_move_insn (mem, chain_value);
4217
4218 /* XXX We should really define a "clear_cache" pattern and use
4219 gen_clear_cache(). */
4220 a_tramp = XEXP (m_tramp, 0);
4221 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4222 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4223 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4224 ptr_mode);
4225 }
4226
4227 static unsigned char
4228 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4229 {
4230 switch (regclass)
4231 {
4232 case CORE_REGS:
4233 case POINTER_REGS:
4234 case GENERAL_REGS:
4235 case ALL_REGS:
4236 case FP_REGS:
4237 case FP_LO_REGS:
4238 return
4239 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4240 (GET_MODE_SIZE (mode) + 7) / 8;
4241 case STACK_REG:
4242 return 1;
4243
4244 case NO_REGS:
4245 return 0;
4246
4247 default:
4248 break;
4249 }
4250 gcc_unreachable ();
4251 }
4252
4253 static reg_class_t
4254 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4255 {
4256 if (regclass == POINTER_REGS)
4257 return GENERAL_REGS;
4258
4259 if (regclass == STACK_REG)
4260 {
4261 if (REG_P(x)
4262 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4263 return regclass;
4264
4265 return NO_REGS;
4266 }
4267
4268 /* If it's an integer immediate that MOVI can't handle, then
4269 FP_REGS is not an option, so we return NO_REGS instead. */
4270 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4271 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4272 return NO_REGS;
4273
4274 /* Register eliminiation can result in a request for
4275 SP+constant->FP_REGS. We cannot support such operations which
4276 use SP as source and an FP_REG as destination, so reject out
4277 right now. */
4278 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4279 {
4280 rtx lhs = XEXP (x, 0);
4281
4282 /* Look through a possible SUBREG introduced by ILP32. */
4283 if (GET_CODE (lhs) == SUBREG)
4284 lhs = SUBREG_REG (lhs);
4285
4286 gcc_assert (REG_P (lhs));
4287 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4288 POINTER_REGS));
4289 return NO_REGS;
4290 }
4291
4292 return regclass;
4293 }
4294
4295 void
4296 aarch64_asm_output_labelref (FILE* f, const char *name)
4297 {
4298 asm_fprintf (f, "%U%s", name);
4299 }
4300
4301 static void
4302 aarch64_elf_asm_constructor (rtx symbol, int priority)
4303 {
4304 if (priority == DEFAULT_INIT_PRIORITY)
4305 default_ctor_section_asm_out_constructor (symbol, priority);
4306 else
4307 {
4308 section *s;
4309 char buf[18];
4310 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4311 s = get_section (buf, SECTION_WRITE, NULL);
4312 switch_to_section (s);
4313 assemble_align (POINTER_SIZE);
4314 assemble_aligned_integer (POINTER_BYTES, symbol);
4315 }
4316 }
4317
4318 static void
4319 aarch64_elf_asm_destructor (rtx symbol, int priority)
4320 {
4321 if (priority == DEFAULT_INIT_PRIORITY)
4322 default_dtor_section_asm_out_destructor (symbol, priority);
4323 else
4324 {
4325 section *s;
4326 char buf[18];
4327 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4328 s = get_section (buf, SECTION_WRITE, NULL);
4329 switch_to_section (s);
4330 assemble_align (POINTER_SIZE);
4331 assemble_aligned_integer (POINTER_BYTES, symbol);
4332 }
4333 }
4334
4335 const char*
4336 aarch64_output_casesi (rtx *operands)
4337 {
4338 char buf[100];
4339 char label[100];
4340 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4341 int index;
4342 static const char *const patterns[4][2] =
4343 {
4344 {
4345 "ldrb\t%w3, [%0,%w1,uxtw]",
4346 "add\t%3, %4, %w3, sxtb #2"
4347 },
4348 {
4349 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4350 "add\t%3, %4, %w3, sxth #2"
4351 },
4352 {
4353 "ldr\t%w3, [%0,%w1,uxtw #2]",
4354 "add\t%3, %4, %w3, sxtw #2"
4355 },
4356 /* We assume that DImode is only generated when not optimizing and
4357 that we don't really need 64-bit address offsets. That would
4358 imply an object file with 8GB of code in a single function! */
4359 {
4360 "ldr\t%w3, [%0,%w1,uxtw #2]",
4361 "add\t%3, %4, %w3, sxtw #2"
4362 }
4363 };
4364
4365 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4366
4367 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4368
4369 gcc_assert (index >= 0 && index <= 3);
4370
4371 /* Need to implement table size reduction, by chaning the code below. */
4372 output_asm_insn (patterns[index][0], operands);
4373 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4374 snprintf (buf, sizeof (buf),
4375 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4376 output_asm_insn (buf, operands);
4377 output_asm_insn (patterns[index][1], operands);
4378 output_asm_insn ("br\t%3", operands);
4379 assemble_label (asm_out_file, label);
4380 return "";
4381 }
4382
4383
4384 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4385 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4386 operator. */
4387
4388 int
4389 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4390 {
4391 if (shift >= 0 && shift <= 3)
4392 {
4393 int size;
4394 for (size = 8; size <= 32; size *= 2)
4395 {
4396 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4397 if (mask == bits << shift)
4398 return size;
4399 }
4400 }
4401 return 0;
4402 }
4403
4404 static bool
4405 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4406 const_rtx x ATTRIBUTE_UNUSED)
4407 {
4408 /* We can't use blocks for constants when we're using a per-function
4409 constant pool. */
4410 return false;
4411 }
4412
4413 static section *
4414 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4415 rtx x ATTRIBUTE_UNUSED,
4416 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4417 {
4418 /* Force all constant pool entries into the current function section. */
4419 return function_section (current_function_decl);
4420 }
4421
4422
4423 /* Costs. */
4424
4425 /* Helper function for rtx cost calculation. Strip a shift expression
4426 from X. Returns the inner operand if successful, or the original
4427 expression on failure. */
4428 static rtx
4429 aarch64_strip_shift (rtx x)
4430 {
4431 rtx op = x;
4432
4433 if ((GET_CODE (op) == ASHIFT
4434 || GET_CODE (op) == ASHIFTRT
4435 || GET_CODE (op) == LSHIFTRT)
4436 && CONST_INT_P (XEXP (op, 1)))
4437 return XEXP (op, 0);
4438
4439 if (GET_CODE (op) == MULT
4440 && CONST_INT_P (XEXP (op, 1))
4441 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4442 return XEXP (op, 0);
4443
4444 return x;
4445 }
4446
4447 /* Helper function for rtx cost calculation. Strip a shift or extend
4448 expression from X. Returns the inner operand if successful, or the
4449 original expression on failure. We deal with a number of possible
4450 canonicalization variations here. */
4451 static rtx
4452 aarch64_strip_shift_or_extend (rtx x)
4453 {
4454 rtx op = x;
4455
4456 /* Zero and sign extraction of a widened value. */
4457 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4458 && XEXP (op, 2) == const0_rtx
4459 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4460 XEXP (op, 1)))
4461 return XEXP (XEXP (op, 0), 0);
4462
4463 /* It can also be represented (for zero-extend) as an AND with an
4464 immediate. */
4465 if (GET_CODE (op) == AND
4466 && GET_CODE (XEXP (op, 0)) == MULT
4467 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4468 && CONST_INT_P (XEXP (op, 1))
4469 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4470 INTVAL (XEXP (op, 1))) != 0)
4471 return XEXP (XEXP (op, 0), 0);
4472
4473 /* Now handle extended register, as this may also have an optional
4474 left shift by 1..4. */
4475 if (GET_CODE (op) == ASHIFT
4476 && CONST_INT_P (XEXP (op, 1))
4477 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4478 op = XEXP (op, 0);
4479
4480 if (GET_CODE (op) == ZERO_EXTEND
4481 || GET_CODE (op) == SIGN_EXTEND)
4482 op = XEXP (op, 0);
4483
4484 if (op != x)
4485 return op;
4486
4487 return aarch64_strip_shift (x);
4488 }
4489
4490 /* Calculate the cost of calculating X, storing it in *COST. Result
4491 is true if the total cost of the operation has now been calculated. */
4492 static bool
4493 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4494 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4495 {
4496 rtx op0, op1;
4497 const struct cpu_cost_table *extra_cost
4498 = aarch64_tune_params->insn_extra_cost;
4499
4500 switch (code)
4501 {
4502 case SET:
4503 op0 = SET_DEST (x);
4504 op1 = SET_SRC (x);
4505
4506 switch (GET_CODE (op0))
4507 {
4508 case MEM:
4509 if (speed)
4510 *cost += extra_cost->ldst.store;
4511
4512 if (op1 != const0_rtx)
4513 *cost += rtx_cost (op1, SET, 1, speed);
4514 return true;
4515
4516 case SUBREG:
4517 if (! REG_P (SUBREG_REG (op0)))
4518 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4519 /* Fall through. */
4520 case REG:
4521 /* Cost is just the cost of the RHS of the set. */
4522 *cost += rtx_cost (op1, SET, 1, true);
4523 return true;
4524
4525 case ZERO_EXTRACT: /* Bit-field insertion. */
4526 case SIGN_EXTRACT:
4527 /* Strip any redundant widening of the RHS to meet the width of
4528 the target. */
4529 if (GET_CODE (op1) == SUBREG)
4530 op1 = SUBREG_REG (op1);
4531 if ((GET_CODE (op1) == ZERO_EXTEND
4532 || GET_CODE (op1) == SIGN_EXTEND)
4533 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4534 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4535 >= INTVAL (XEXP (op0, 1))))
4536 op1 = XEXP (op1, 0);
4537 *cost += rtx_cost (op1, SET, 1, speed);
4538 return true;
4539
4540 default:
4541 break;
4542 }
4543 return false;
4544
4545 case MEM:
4546 if (speed)
4547 *cost += extra_cost->ldst.load;
4548
4549 return true;
4550
4551 case NEG:
4552 op0 = CONST0_RTX (GET_MODE (x));
4553 op1 = XEXP (x, 0);
4554 goto cost_minus;
4555
4556 case COMPARE:
4557 op0 = XEXP (x, 0);
4558 op1 = XEXP (x, 1);
4559
4560 if (op1 == const0_rtx
4561 && GET_CODE (op0) == AND)
4562 {
4563 x = op0;
4564 goto cost_logic;
4565 }
4566
4567 /* Comparisons can work if the order is swapped.
4568 Canonicalization puts the more complex operation first, but
4569 we want it in op1. */
4570 if (! (REG_P (op0)
4571 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4572 {
4573 op0 = XEXP (x, 1);
4574 op1 = XEXP (x, 0);
4575 }
4576 goto cost_minus;
4577
4578 case MINUS:
4579 op0 = XEXP (x, 0);
4580 op1 = XEXP (x, 1);
4581
4582 cost_minus:
4583 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4584 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4585 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4586 {
4587 if (op0 != const0_rtx)
4588 *cost += rtx_cost (op0, MINUS, 0, speed);
4589
4590 if (CONST_INT_P (op1))
4591 {
4592 if (!aarch64_uimm12_shift (INTVAL (op1)))
4593 *cost += rtx_cost (op1, MINUS, 1, speed);
4594 }
4595 else
4596 {
4597 op1 = aarch64_strip_shift_or_extend (op1);
4598 *cost += rtx_cost (op1, MINUS, 1, speed);
4599 }
4600 return true;
4601 }
4602
4603 return false;
4604
4605 case PLUS:
4606 op0 = XEXP (x, 0);
4607 op1 = XEXP (x, 1);
4608
4609 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4610 {
4611 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4612 {
4613 *cost += rtx_cost (op0, PLUS, 0, speed);
4614 }
4615 else
4616 {
4617 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4618
4619 if (new_op0 == op0
4620 && GET_CODE (op0) == MULT)
4621 {
4622 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4623 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4624 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4625 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4626 {
4627 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4628 speed)
4629 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4630 speed)
4631 + rtx_cost (op1, PLUS, 1, speed));
4632 if (speed)
4633 *cost +=
4634 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4635 return true;
4636 }
4637 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4638 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4639 + rtx_cost (op1, PLUS, 1, speed));
4640
4641 if (speed)
4642 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4643 }
4644
4645 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4646 + rtx_cost (op1, PLUS, 1, speed));
4647 }
4648 return true;
4649 }
4650
4651 return false;
4652
4653 case IOR:
4654 case XOR:
4655 case AND:
4656 cost_logic:
4657 op0 = XEXP (x, 0);
4658 op1 = XEXP (x, 1);
4659
4660 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4661 {
4662 if (CONST_INT_P (op1)
4663 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4664 {
4665 *cost += rtx_cost (op0, AND, 0, speed);
4666 }
4667 else
4668 {
4669 if (GET_CODE (op0) == NOT)
4670 op0 = XEXP (op0, 0);
4671 op0 = aarch64_strip_shift (op0);
4672 *cost += (rtx_cost (op0, AND, 0, speed)
4673 + rtx_cost (op1, AND, 1, speed));
4674 }
4675 return true;
4676 }
4677 return false;
4678
4679 case ZERO_EXTEND:
4680 if ((GET_MODE (x) == DImode
4681 && GET_MODE (XEXP (x, 0)) == SImode)
4682 || GET_CODE (XEXP (x, 0)) == MEM)
4683 {
4684 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4685 return true;
4686 }
4687 return false;
4688
4689 case SIGN_EXTEND:
4690 if (GET_CODE (XEXP (x, 0)) == MEM)
4691 {
4692 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4693 return true;
4694 }
4695 return false;
4696
4697 case ROTATE:
4698 if (!CONST_INT_P (XEXP (x, 1)))
4699 *cost += COSTS_N_INSNS (2);
4700 /* Fall through. */
4701 case ROTATERT:
4702 case LSHIFTRT:
4703 case ASHIFT:
4704 case ASHIFTRT:
4705
4706 /* Shifting by a register often takes an extra cycle. */
4707 if (speed && !CONST_INT_P (XEXP (x, 1)))
4708 *cost += extra_cost->alu.arith_shift_reg;
4709
4710 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4711 return true;
4712
4713 case HIGH:
4714 if (!CONSTANT_P (XEXP (x, 0)))
4715 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4716 return true;
4717
4718 case LO_SUM:
4719 if (!CONSTANT_P (XEXP (x, 1)))
4720 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4721 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4722 return true;
4723
4724 case ZERO_EXTRACT:
4725 case SIGN_EXTRACT:
4726 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4727 return true;
4728
4729 case MULT:
4730 op0 = XEXP (x, 0);
4731 op1 = XEXP (x, 1);
4732
4733 *cost = COSTS_N_INSNS (1);
4734 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4735 {
4736 if (CONST_INT_P (op1)
4737 && exact_log2 (INTVAL (op1)) > 0)
4738 {
4739 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4740 return true;
4741 }
4742
4743 if ((GET_CODE (op0) == ZERO_EXTEND
4744 && GET_CODE (op1) == ZERO_EXTEND)
4745 || (GET_CODE (op0) == SIGN_EXTEND
4746 && GET_CODE (op1) == SIGN_EXTEND))
4747 {
4748 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4749 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4750 if (speed)
4751 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4752 return true;
4753 }
4754
4755 if (speed)
4756 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4757 }
4758 else if (speed)
4759 {
4760 if (GET_MODE (x) == DFmode)
4761 *cost += extra_cost->fp[1].mult;
4762 else if (GET_MODE (x) == SFmode)
4763 *cost += extra_cost->fp[0].mult;
4764 }
4765
4766 return false; /* All arguments need to be in registers. */
4767
4768 case MOD:
4769 case UMOD:
4770 *cost = COSTS_N_INSNS (2);
4771 if (speed)
4772 {
4773 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4774 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4775 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4776 else if (GET_MODE (x) == DFmode)
4777 *cost += (extra_cost->fp[1].mult
4778 + extra_cost->fp[1].div);
4779 else if (GET_MODE (x) == SFmode)
4780 *cost += (extra_cost->fp[0].mult
4781 + extra_cost->fp[0].div);
4782 }
4783 return false; /* All arguments need to be in registers. */
4784
4785 case DIV:
4786 case UDIV:
4787 *cost = COSTS_N_INSNS (1);
4788 if (speed)
4789 {
4790 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4791 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4792 else if (GET_MODE (x) == DFmode)
4793 *cost += extra_cost->fp[1].div;
4794 else if (GET_MODE (x) == SFmode)
4795 *cost += extra_cost->fp[0].div;
4796 }
4797 return false; /* All arguments need to be in registers. */
4798
4799 default:
4800 break;
4801 }
4802 return false;
4803 }
4804
4805 static int
4806 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4807 enum machine_mode mode ATTRIBUTE_UNUSED,
4808 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4809 {
4810 enum rtx_code c = GET_CODE (x);
4811 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4812
4813 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4814 return addr_cost->pre_modify;
4815
4816 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4817 return addr_cost->post_modify;
4818
4819 if (c == PLUS)
4820 {
4821 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4822 return addr_cost->imm_offset;
4823 else if (GET_CODE (XEXP (x, 0)) == MULT
4824 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4825 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4826 return addr_cost->register_extend;
4827
4828 return addr_cost->register_offset;
4829 }
4830 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4831 return addr_cost->imm_offset;
4832
4833 return 0;
4834 }
4835
4836 static int
4837 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4838 reg_class_t from, reg_class_t to)
4839 {
4840 const struct cpu_regmove_cost *regmove_cost
4841 = aarch64_tune_params->regmove_cost;
4842
4843 if (from == GENERAL_REGS && to == GENERAL_REGS)
4844 return regmove_cost->GP2GP;
4845 else if (from == GENERAL_REGS)
4846 return regmove_cost->GP2FP;
4847 else if (to == GENERAL_REGS)
4848 return regmove_cost->FP2GP;
4849
4850 /* When AdvSIMD instructions are disabled it is not possible to move
4851 a 128-bit value directly between Q registers. This is handled in
4852 secondary reload. A general register is used as a scratch to move
4853 the upper DI value and the lower DI value is moved directly,
4854 hence the cost is the sum of three moves. */
4855
4856 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4857 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4858
4859 return regmove_cost->FP2FP;
4860 }
4861
4862 static int
4863 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4864 reg_class_t rclass ATTRIBUTE_UNUSED,
4865 bool in ATTRIBUTE_UNUSED)
4866 {
4867 return aarch64_tune_params->memmov_cost;
4868 }
4869
4870 /* Vectorizer cost model target hooks. */
4871
4872 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4873 static int
4874 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4875 tree vectype,
4876 int misalign ATTRIBUTE_UNUSED)
4877 {
4878 unsigned elements;
4879
4880 switch (type_of_cost)
4881 {
4882 case scalar_stmt:
4883 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4884
4885 case scalar_load:
4886 return aarch64_tune_params->vec_costs->scalar_load_cost;
4887
4888 case scalar_store:
4889 return aarch64_tune_params->vec_costs->scalar_store_cost;
4890
4891 case vector_stmt:
4892 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4893
4894 case vector_load:
4895 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4896
4897 case vector_store:
4898 return aarch64_tune_params->vec_costs->vec_store_cost;
4899
4900 case vec_to_scalar:
4901 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4902
4903 case scalar_to_vec:
4904 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4905
4906 case unaligned_load:
4907 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4908
4909 case unaligned_store:
4910 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4911
4912 case cond_branch_taken:
4913 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4914
4915 case cond_branch_not_taken:
4916 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4917
4918 case vec_perm:
4919 case vec_promote_demote:
4920 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4921
4922 case vec_construct:
4923 elements = TYPE_VECTOR_SUBPARTS (vectype);
4924 return elements / 2 + 1;
4925
4926 default:
4927 gcc_unreachable ();
4928 }
4929 }
4930
4931 /* Implement targetm.vectorize.add_stmt_cost. */
4932 static unsigned
4933 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4934 struct _stmt_vec_info *stmt_info, int misalign,
4935 enum vect_cost_model_location where)
4936 {
4937 unsigned *cost = (unsigned *) data;
4938 unsigned retval = 0;
4939
4940 if (flag_vect_cost_model)
4941 {
4942 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4943 int stmt_cost =
4944 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4945
4946 /* Statements in an inner loop relative to the loop being
4947 vectorized are weighted more heavily. The value here is
4948 a function (linear for now) of the loop nest level. */
4949 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4950 {
4951 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4952 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4953 unsigned nest_level = loop_depth (loop);
4954
4955 count *= nest_level;
4956 }
4957
4958 retval = (unsigned) (count * stmt_cost);
4959 cost[where] += retval;
4960 }
4961
4962 return retval;
4963 }
4964
4965 static void initialize_aarch64_code_model (void);
4966
4967 /* Parse the architecture extension string. */
4968
4969 static void
4970 aarch64_parse_extension (char *str)
4971 {
4972 /* The extension string is parsed left to right. */
4973 const struct aarch64_option_extension *opt = NULL;
4974
4975 /* Flag to say whether we are adding or removing an extension. */
4976 int adding_ext = -1;
4977
4978 while (str != NULL && *str != 0)
4979 {
4980 char *ext;
4981 size_t len;
4982
4983 str++;
4984 ext = strchr (str, '+');
4985
4986 if (ext != NULL)
4987 len = ext - str;
4988 else
4989 len = strlen (str);
4990
4991 if (len >= 2 && strncmp (str, "no", 2) == 0)
4992 {
4993 adding_ext = 0;
4994 len -= 2;
4995 str += 2;
4996 }
4997 else if (len > 0)
4998 adding_ext = 1;
4999
5000 if (len == 0)
5001 {
5002 error ("missing feature modifier after %qs", "+no");
5003 return;
5004 }
5005
5006 /* Scan over the extensions table trying to find an exact match. */
5007 for (opt = all_extensions; opt->name != NULL; opt++)
5008 {
5009 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5010 {
5011 /* Add or remove the extension. */
5012 if (adding_ext)
5013 aarch64_isa_flags |= opt->flags_on;
5014 else
5015 aarch64_isa_flags &= ~(opt->flags_off);
5016 break;
5017 }
5018 }
5019
5020 if (opt->name == NULL)
5021 {
5022 /* Extension not found in list. */
5023 error ("unknown feature modifier %qs", str);
5024 return;
5025 }
5026
5027 str = ext;
5028 };
5029
5030 return;
5031 }
5032
5033 /* Parse the ARCH string. */
5034
5035 static void
5036 aarch64_parse_arch (void)
5037 {
5038 char *ext;
5039 const struct processor *arch;
5040 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5041 size_t len;
5042
5043 strcpy (str, aarch64_arch_string);
5044
5045 ext = strchr (str, '+');
5046
5047 if (ext != NULL)
5048 len = ext - str;
5049 else
5050 len = strlen (str);
5051
5052 if (len == 0)
5053 {
5054 error ("missing arch name in -march=%qs", str);
5055 return;
5056 }
5057
5058 /* Loop through the list of supported ARCHs to find a match. */
5059 for (arch = all_architectures; arch->name != NULL; arch++)
5060 {
5061 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5062 {
5063 selected_arch = arch;
5064 aarch64_isa_flags = selected_arch->flags;
5065 selected_cpu = &all_cores[selected_arch->core];
5066
5067 if (ext != NULL)
5068 {
5069 /* ARCH string contains at least one extension. */
5070 aarch64_parse_extension (ext);
5071 }
5072
5073 return;
5074 }
5075 }
5076
5077 /* ARCH name not found in list. */
5078 error ("unknown value %qs for -march", str);
5079 return;
5080 }
5081
5082 /* Parse the CPU string. */
5083
5084 static void
5085 aarch64_parse_cpu (void)
5086 {
5087 char *ext;
5088 const struct processor *cpu;
5089 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5090 size_t len;
5091
5092 strcpy (str, aarch64_cpu_string);
5093
5094 ext = strchr (str, '+');
5095
5096 if (ext != NULL)
5097 len = ext - str;
5098 else
5099 len = strlen (str);
5100
5101 if (len == 0)
5102 {
5103 error ("missing cpu name in -mcpu=%qs", str);
5104 return;
5105 }
5106
5107 /* Loop through the list of supported CPUs to find a match. */
5108 for (cpu = all_cores; cpu->name != NULL; cpu++)
5109 {
5110 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5111 {
5112 selected_cpu = cpu;
5113 aarch64_isa_flags = selected_cpu->flags;
5114
5115 if (ext != NULL)
5116 {
5117 /* CPU string contains at least one extension. */
5118 aarch64_parse_extension (ext);
5119 }
5120
5121 return;
5122 }
5123 }
5124
5125 /* CPU name not found in list. */
5126 error ("unknown value %qs for -mcpu", str);
5127 return;
5128 }
5129
5130 /* Parse the TUNE string. */
5131
5132 static void
5133 aarch64_parse_tune (void)
5134 {
5135 const struct processor *cpu;
5136 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5137 strcpy (str, aarch64_tune_string);
5138
5139 /* Loop through the list of supported CPUs to find a match. */
5140 for (cpu = all_cores; cpu->name != NULL; cpu++)
5141 {
5142 if (strcmp (cpu->name, str) == 0)
5143 {
5144 selected_tune = cpu;
5145 return;
5146 }
5147 }
5148
5149 /* CPU name not found in list. */
5150 error ("unknown value %qs for -mtune", str);
5151 return;
5152 }
5153
5154
5155 /* Implement TARGET_OPTION_OVERRIDE. */
5156
5157 static void
5158 aarch64_override_options (void)
5159 {
5160 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5161 otherwise march remains undefined. mtune can be used with either march or
5162 mcpu. */
5163
5164 if (aarch64_arch_string)
5165 {
5166 aarch64_parse_arch ();
5167 aarch64_cpu_string = NULL;
5168 }
5169
5170 if (aarch64_cpu_string)
5171 {
5172 aarch64_parse_cpu ();
5173 selected_arch = NULL;
5174 }
5175
5176 if (aarch64_tune_string)
5177 {
5178 aarch64_parse_tune ();
5179 }
5180
5181 initialize_aarch64_code_model ();
5182
5183 aarch64_build_bitmask_table ();
5184
5185 /* This target defaults to strict volatile bitfields. */
5186 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5187 flag_strict_volatile_bitfields = 1;
5188
5189 /* If the user did not specify a processor, choose the default
5190 one for them. This will be the CPU set during configuration using
5191 --with-cpu, otherwise it is "coretex-a53". */
5192 if (!selected_cpu)
5193 {
5194 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5195 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5196 }
5197
5198 gcc_assert (selected_cpu);
5199
5200 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5201 if (!selected_tune)
5202 selected_tune = &all_cores[selected_cpu->core];
5203
5204 aarch64_tune_flags = selected_tune->flags;
5205 aarch64_tune = selected_tune->core;
5206 aarch64_tune_params = selected_tune->tune;
5207
5208 aarch64_override_options_after_change ();
5209 }
5210
5211 /* Implement targetm.override_options_after_change. */
5212
5213 static void
5214 aarch64_override_options_after_change (void)
5215 {
5216 faked_omit_frame_pointer = false;
5217
5218 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5219 that aarch64_frame_pointer_required will be called. We need to remember
5220 whether flag_omit_frame_pointer was turned on normally or just faked. */
5221
5222 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5223 {
5224 flag_omit_frame_pointer = true;
5225 faked_omit_frame_pointer = true;
5226 }
5227 }
5228
5229 static struct machine_function *
5230 aarch64_init_machine_status (void)
5231 {
5232 struct machine_function *machine;
5233 machine = ggc_alloc_cleared_machine_function ();
5234 return machine;
5235 }
5236
5237 void
5238 aarch64_init_expanders (void)
5239 {
5240 init_machine_status = aarch64_init_machine_status;
5241 }
5242
5243 /* A checking mechanism for the implementation of the various code models. */
5244 static void
5245 initialize_aarch64_code_model (void)
5246 {
5247 if (flag_pic)
5248 {
5249 switch (aarch64_cmodel_var)
5250 {
5251 case AARCH64_CMODEL_TINY:
5252 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5253 break;
5254 case AARCH64_CMODEL_SMALL:
5255 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5256 break;
5257 case AARCH64_CMODEL_LARGE:
5258 sorry ("code model %qs with -f%s", "large",
5259 flag_pic > 1 ? "PIC" : "pic");
5260 default:
5261 gcc_unreachable ();
5262 }
5263 }
5264 else
5265 aarch64_cmodel = aarch64_cmodel_var;
5266 }
5267
5268 /* Return true if SYMBOL_REF X binds locally. */
5269
5270 static bool
5271 aarch64_symbol_binds_local_p (const_rtx x)
5272 {
5273 return (SYMBOL_REF_DECL (x)
5274 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5275 : SYMBOL_REF_LOCAL_P (x));
5276 }
5277
5278 /* Return true if SYMBOL_REF X is thread local */
5279 static bool
5280 aarch64_tls_symbol_p (rtx x)
5281 {
5282 if (! TARGET_HAVE_TLS)
5283 return false;
5284
5285 if (GET_CODE (x) != SYMBOL_REF)
5286 return false;
5287
5288 return SYMBOL_REF_TLS_MODEL (x) != 0;
5289 }
5290
5291 /* Classify a TLS symbol into one of the TLS kinds. */
5292 enum aarch64_symbol_type
5293 aarch64_classify_tls_symbol (rtx x)
5294 {
5295 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5296
5297 switch (tls_kind)
5298 {
5299 case TLS_MODEL_GLOBAL_DYNAMIC:
5300 case TLS_MODEL_LOCAL_DYNAMIC:
5301 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5302
5303 case TLS_MODEL_INITIAL_EXEC:
5304 return SYMBOL_SMALL_GOTTPREL;
5305
5306 case TLS_MODEL_LOCAL_EXEC:
5307 return SYMBOL_SMALL_TPREL;
5308
5309 case TLS_MODEL_EMULATED:
5310 case TLS_MODEL_NONE:
5311 return SYMBOL_FORCE_TO_MEM;
5312
5313 default:
5314 gcc_unreachable ();
5315 }
5316 }
5317
5318 /* Return the method that should be used to access SYMBOL_REF or
5319 LABEL_REF X in context CONTEXT. */
5320
5321 enum aarch64_symbol_type
5322 aarch64_classify_symbol (rtx x,
5323 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5324 {
5325 if (GET_CODE (x) == LABEL_REF)
5326 {
5327 switch (aarch64_cmodel)
5328 {
5329 case AARCH64_CMODEL_LARGE:
5330 return SYMBOL_FORCE_TO_MEM;
5331
5332 case AARCH64_CMODEL_TINY_PIC:
5333 case AARCH64_CMODEL_TINY:
5334 return SYMBOL_TINY_ABSOLUTE;
5335
5336 case AARCH64_CMODEL_SMALL_PIC:
5337 case AARCH64_CMODEL_SMALL:
5338 return SYMBOL_SMALL_ABSOLUTE;
5339
5340 default:
5341 gcc_unreachable ();
5342 }
5343 }
5344
5345 if (GET_CODE (x) == SYMBOL_REF)
5346 {
5347 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5348 || CONSTANT_POOL_ADDRESS_P (x))
5349 return SYMBOL_FORCE_TO_MEM;
5350
5351 if (aarch64_tls_symbol_p (x))
5352 return aarch64_classify_tls_symbol (x);
5353
5354 switch (aarch64_cmodel)
5355 {
5356 case AARCH64_CMODEL_TINY:
5357 if (SYMBOL_REF_WEAK (x))
5358 return SYMBOL_FORCE_TO_MEM;
5359 return SYMBOL_TINY_ABSOLUTE;
5360
5361 case AARCH64_CMODEL_SMALL:
5362 if (SYMBOL_REF_WEAK (x))
5363 return SYMBOL_FORCE_TO_MEM;
5364 return SYMBOL_SMALL_ABSOLUTE;
5365
5366 case AARCH64_CMODEL_TINY_PIC:
5367 if (!aarch64_symbol_binds_local_p (x))
5368 return SYMBOL_TINY_GOT;
5369 return SYMBOL_TINY_ABSOLUTE;
5370
5371 case AARCH64_CMODEL_SMALL_PIC:
5372 if (!aarch64_symbol_binds_local_p (x))
5373 return SYMBOL_SMALL_GOT;
5374 return SYMBOL_SMALL_ABSOLUTE;
5375
5376 default:
5377 gcc_unreachable ();
5378 }
5379 }
5380
5381 /* By default push everything into the constant pool. */
5382 return SYMBOL_FORCE_TO_MEM;
5383 }
5384
5385 bool
5386 aarch64_constant_address_p (rtx x)
5387 {
5388 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5389 }
5390
5391 bool
5392 aarch64_legitimate_pic_operand_p (rtx x)
5393 {
5394 if (GET_CODE (x) == SYMBOL_REF
5395 || (GET_CODE (x) == CONST
5396 && GET_CODE (XEXP (x, 0)) == PLUS
5397 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5398 return false;
5399
5400 return true;
5401 }
5402
5403 /* Return true if X holds either a quarter-precision or
5404 floating-point +0.0 constant. */
5405 static bool
5406 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5407 {
5408 if (!CONST_DOUBLE_P (x))
5409 return false;
5410
5411 /* TODO: We could handle moving 0.0 to a TFmode register,
5412 but first we would like to refactor the movtf_aarch64
5413 to be more amicable to split moves properly and
5414 correctly gate on TARGET_SIMD. For now - reject all
5415 constants which are not to SFmode or DFmode registers. */
5416 if (!(mode == SFmode || mode == DFmode))
5417 return false;
5418
5419 if (aarch64_float_const_zero_rtx_p (x))
5420 return true;
5421 return aarch64_float_const_representable_p (x);
5422 }
5423
5424 static bool
5425 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5426 {
5427 /* Do not allow vector struct mode constants. We could support
5428 0 and -1 easily, but they need support in aarch64-simd.md. */
5429 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5430 return false;
5431
5432 /* This could probably go away because
5433 we now decompose CONST_INTs according to expand_mov_immediate. */
5434 if ((GET_CODE (x) == CONST_VECTOR
5435 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5436 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5437 return !targetm.cannot_force_const_mem (mode, x);
5438
5439 if (GET_CODE (x) == HIGH
5440 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5441 return true;
5442
5443 return aarch64_constant_address_p (x);
5444 }
5445
5446 rtx
5447 aarch64_load_tp (rtx target)
5448 {
5449 if (!target
5450 || GET_MODE (target) != Pmode
5451 || !register_operand (target, Pmode))
5452 target = gen_reg_rtx (Pmode);
5453
5454 /* Can return in any reg. */
5455 emit_insn (gen_aarch64_load_tp_hard (target));
5456 return target;
5457 }
5458
5459 /* On AAPCS systems, this is the "struct __va_list". */
5460 static GTY(()) tree va_list_type;
5461
5462 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5463 Return the type to use as __builtin_va_list.
5464
5465 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5466
5467 struct __va_list
5468 {
5469 void *__stack;
5470 void *__gr_top;
5471 void *__vr_top;
5472 int __gr_offs;
5473 int __vr_offs;
5474 }; */
5475
5476 static tree
5477 aarch64_build_builtin_va_list (void)
5478 {
5479 tree va_list_name;
5480 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5481
5482 /* Create the type. */
5483 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5484 /* Give it the required name. */
5485 va_list_name = build_decl (BUILTINS_LOCATION,
5486 TYPE_DECL,
5487 get_identifier ("__va_list"),
5488 va_list_type);
5489 DECL_ARTIFICIAL (va_list_name) = 1;
5490 TYPE_NAME (va_list_type) = va_list_name;
5491 TYPE_STUB_DECL (va_list_type) = va_list_name;
5492
5493 /* Create the fields. */
5494 f_stack = build_decl (BUILTINS_LOCATION,
5495 FIELD_DECL, get_identifier ("__stack"),
5496 ptr_type_node);
5497 f_grtop = build_decl (BUILTINS_LOCATION,
5498 FIELD_DECL, get_identifier ("__gr_top"),
5499 ptr_type_node);
5500 f_vrtop = build_decl (BUILTINS_LOCATION,
5501 FIELD_DECL, get_identifier ("__vr_top"),
5502 ptr_type_node);
5503 f_groff = build_decl (BUILTINS_LOCATION,
5504 FIELD_DECL, get_identifier ("__gr_offs"),
5505 integer_type_node);
5506 f_vroff = build_decl (BUILTINS_LOCATION,
5507 FIELD_DECL, get_identifier ("__vr_offs"),
5508 integer_type_node);
5509
5510 DECL_ARTIFICIAL (f_stack) = 1;
5511 DECL_ARTIFICIAL (f_grtop) = 1;
5512 DECL_ARTIFICIAL (f_vrtop) = 1;
5513 DECL_ARTIFICIAL (f_groff) = 1;
5514 DECL_ARTIFICIAL (f_vroff) = 1;
5515
5516 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5517 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5518 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5519 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5520 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5521
5522 TYPE_FIELDS (va_list_type) = f_stack;
5523 DECL_CHAIN (f_stack) = f_grtop;
5524 DECL_CHAIN (f_grtop) = f_vrtop;
5525 DECL_CHAIN (f_vrtop) = f_groff;
5526 DECL_CHAIN (f_groff) = f_vroff;
5527
5528 /* Compute its layout. */
5529 layout_type (va_list_type);
5530
5531 return va_list_type;
5532 }
5533
5534 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5535 static void
5536 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5537 {
5538 const CUMULATIVE_ARGS *cum;
5539 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5540 tree stack, grtop, vrtop, groff, vroff;
5541 tree t;
5542 int gr_save_area_size;
5543 int vr_save_area_size;
5544 int vr_offset;
5545
5546 cum = &crtl->args.info;
5547 gr_save_area_size
5548 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5549 vr_save_area_size
5550 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5551
5552 if (TARGET_GENERAL_REGS_ONLY)
5553 {
5554 if (cum->aapcs_nvrn > 0)
5555 sorry ("%qs and floating point or vector arguments",
5556 "-mgeneral-regs-only");
5557 vr_save_area_size = 0;
5558 }
5559
5560 f_stack = TYPE_FIELDS (va_list_type_node);
5561 f_grtop = DECL_CHAIN (f_stack);
5562 f_vrtop = DECL_CHAIN (f_grtop);
5563 f_groff = DECL_CHAIN (f_vrtop);
5564 f_vroff = DECL_CHAIN (f_groff);
5565
5566 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5567 NULL_TREE);
5568 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5569 NULL_TREE);
5570 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5571 NULL_TREE);
5572 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5573 NULL_TREE);
5574 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5575 NULL_TREE);
5576
5577 /* Emit code to initialize STACK, which points to the next varargs stack
5578 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5579 by named arguments. STACK is 8-byte aligned. */
5580 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5581 if (cum->aapcs_stack_size > 0)
5582 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5583 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5584 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5585
5586 /* Emit code to initialize GRTOP, the top of the GR save area.
5587 virtual_incoming_args_rtx should have been 16 byte aligned. */
5588 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5589 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5590 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5591
5592 /* Emit code to initialize VRTOP, the top of the VR save area.
5593 This address is gr_save_area_bytes below GRTOP, rounded
5594 down to the next 16-byte boundary. */
5595 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5596 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5597 STACK_BOUNDARY / BITS_PER_UNIT);
5598
5599 if (vr_offset)
5600 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5601 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5602 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5603
5604 /* Emit code to initialize GROFF, the offset from GRTOP of the
5605 next GPR argument. */
5606 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5607 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5608 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5609
5610 /* Likewise emit code to initialize VROFF, the offset from FTOP
5611 of the next VR argument. */
5612 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5613 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5614 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5615 }
5616
5617 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5618
5619 static tree
5620 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5621 gimple_seq *post_p ATTRIBUTE_UNUSED)
5622 {
5623 tree addr;
5624 bool indirect_p;
5625 bool is_ha; /* is HFA or HVA. */
5626 bool dw_align; /* double-word align. */
5627 enum machine_mode ag_mode = VOIDmode;
5628 int nregs;
5629 enum machine_mode mode;
5630
5631 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5632 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5633 HOST_WIDE_INT size, rsize, adjust, align;
5634 tree t, u, cond1, cond2;
5635
5636 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5637 if (indirect_p)
5638 type = build_pointer_type (type);
5639
5640 mode = TYPE_MODE (type);
5641
5642 f_stack = TYPE_FIELDS (va_list_type_node);
5643 f_grtop = DECL_CHAIN (f_stack);
5644 f_vrtop = DECL_CHAIN (f_grtop);
5645 f_groff = DECL_CHAIN (f_vrtop);
5646 f_vroff = DECL_CHAIN (f_groff);
5647
5648 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5649 f_stack, NULL_TREE);
5650 size = int_size_in_bytes (type);
5651 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5652
5653 dw_align = false;
5654 adjust = 0;
5655 if (aarch64_vfp_is_call_or_return_candidate (mode,
5656 type,
5657 &ag_mode,
5658 &nregs,
5659 &is_ha))
5660 {
5661 /* TYPE passed in fp/simd registers. */
5662 if (TARGET_GENERAL_REGS_ONLY)
5663 sorry ("%qs and floating point or vector arguments",
5664 "-mgeneral-regs-only");
5665
5666 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5667 unshare_expr (valist), f_vrtop, NULL_TREE);
5668 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5669 unshare_expr (valist), f_vroff, NULL_TREE);
5670
5671 rsize = nregs * UNITS_PER_VREG;
5672
5673 if (is_ha)
5674 {
5675 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5676 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5677 }
5678 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5679 && size < UNITS_PER_VREG)
5680 {
5681 adjust = UNITS_PER_VREG - size;
5682 }
5683 }
5684 else
5685 {
5686 /* TYPE passed in general registers. */
5687 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5688 unshare_expr (valist), f_grtop, NULL_TREE);
5689 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5690 unshare_expr (valist), f_groff, NULL_TREE);
5691 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5692 nregs = rsize / UNITS_PER_WORD;
5693
5694 if (align > 8)
5695 dw_align = true;
5696
5697 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5698 && size < UNITS_PER_WORD)
5699 {
5700 adjust = UNITS_PER_WORD - size;
5701 }
5702 }
5703
5704 /* Get a local temporary for the field value. */
5705 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5706
5707 /* Emit code to branch if off >= 0. */
5708 t = build2 (GE_EXPR, boolean_type_node, off,
5709 build_int_cst (TREE_TYPE (off), 0));
5710 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5711
5712 if (dw_align)
5713 {
5714 /* Emit: offs = (offs + 15) & -16. */
5715 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5716 build_int_cst (TREE_TYPE (off), 15));
5717 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5718 build_int_cst (TREE_TYPE (off), -16));
5719 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5720 }
5721 else
5722 roundup = NULL;
5723
5724 /* Update ap.__[g|v]r_offs */
5725 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5726 build_int_cst (TREE_TYPE (off), rsize));
5727 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5728
5729 /* String up. */
5730 if (roundup)
5731 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5732
5733 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5734 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5735 build_int_cst (TREE_TYPE (f_off), 0));
5736 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5737
5738 /* String up: make sure the assignment happens before the use. */
5739 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5740 COND_EXPR_ELSE (cond1) = t;
5741
5742 /* Prepare the trees handling the argument that is passed on the stack;
5743 the top level node will store in ON_STACK. */
5744 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5745 if (align > 8)
5746 {
5747 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5748 t = fold_convert (intDI_type_node, arg);
5749 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5750 build_int_cst (TREE_TYPE (t), 15));
5751 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5752 build_int_cst (TREE_TYPE (t), -16));
5753 t = fold_convert (TREE_TYPE (arg), t);
5754 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5755 }
5756 else
5757 roundup = NULL;
5758 /* Advance ap.__stack */
5759 t = fold_convert (intDI_type_node, arg);
5760 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5761 build_int_cst (TREE_TYPE (t), size + 7));
5762 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5763 build_int_cst (TREE_TYPE (t), -8));
5764 t = fold_convert (TREE_TYPE (arg), t);
5765 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5766 /* String up roundup and advance. */
5767 if (roundup)
5768 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5769 /* String up with arg */
5770 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5771 /* Big-endianness related address adjustment. */
5772 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5773 && size < UNITS_PER_WORD)
5774 {
5775 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5776 size_int (UNITS_PER_WORD - size));
5777 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5778 }
5779
5780 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5781 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5782
5783 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5784 t = off;
5785 if (adjust)
5786 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5787 build_int_cst (TREE_TYPE (off), adjust));
5788
5789 t = fold_convert (sizetype, t);
5790 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5791
5792 if (is_ha)
5793 {
5794 /* type ha; // treat as "struct {ftype field[n];}"
5795 ... [computing offs]
5796 for (i = 0; i <nregs; ++i, offs += 16)
5797 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5798 return ha; */
5799 int i;
5800 tree tmp_ha, field_t, field_ptr_t;
5801
5802 /* Declare a local variable. */
5803 tmp_ha = create_tmp_var_raw (type, "ha");
5804 gimple_add_tmp_var (tmp_ha);
5805
5806 /* Establish the base type. */
5807 switch (ag_mode)
5808 {
5809 case SFmode:
5810 field_t = float_type_node;
5811 field_ptr_t = float_ptr_type_node;
5812 break;
5813 case DFmode:
5814 field_t = double_type_node;
5815 field_ptr_t = double_ptr_type_node;
5816 break;
5817 case TFmode:
5818 field_t = long_double_type_node;
5819 field_ptr_t = long_double_ptr_type_node;
5820 break;
5821 /* The half precision and quad precision are not fully supported yet. Enable
5822 the following code after the support is complete. Need to find the correct
5823 type node for __fp16 *. */
5824 #if 0
5825 case HFmode:
5826 field_t = float_type_node;
5827 field_ptr_t = float_ptr_type_node;
5828 break;
5829 #endif
5830 case V2SImode:
5831 case V4SImode:
5832 {
5833 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5834 field_t = build_vector_type_for_mode (innertype, ag_mode);
5835 field_ptr_t = build_pointer_type (field_t);
5836 }
5837 break;
5838 default:
5839 gcc_assert (0);
5840 }
5841
5842 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5843 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5844 addr = t;
5845 t = fold_convert (field_ptr_t, addr);
5846 t = build2 (MODIFY_EXPR, field_t,
5847 build1 (INDIRECT_REF, field_t, tmp_ha),
5848 build1 (INDIRECT_REF, field_t, t));
5849
5850 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5851 for (i = 1; i < nregs; ++i)
5852 {
5853 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5854 u = fold_convert (field_ptr_t, addr);
5855 u = build2 (MODIFY_EXPR, field_t,
5856 build2 (MEM_REF, field_t, tmp_ha,
5857 build_int_cst (field_ptr_t,
5858 (i *
5859 int_size_in_bytes (field_t)))),
5860 build1 (INDIRECT_REF, field_t, u));
5861 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5862 }
5863
5864 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5865 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5866 }
5867
5868 COND_EXPR_ELSE (cond2) = t;
5869 addr = fold_convert (build_pointer_type (type), cond1);
5870 addr = build_va_arg_indirect_ref (addr);
5871
5872 if (indirect_p)
5873 addr = build_va_arg_indirect_ref (addr);
5874
5875 return addr;
5876 }
5877
5878 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5879
5880 static void
5881 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5882 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5883 int no_rtl)
5884 {
5885 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5886 CUMULATIVE_ARGS local_cum;
5887 int gr_saved, vr_saved;
5888
5889 /* The caller has advanced CUM up to, but not beyond, the last named
5890 argument. Advance a local copy of CUM past the last "real" named
5891 argument, to find out how many registers are left over. */
5892 local_cum = *cum;
5893 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5894
5895 /* Found out how many registers we need to save. */
5896 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5897 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5898
5899 if (TARGET_GENERAL_REGS_ONLY)
5900 {
5901 if (local_cum.aapcs_nvrn > 0)
5902 sorry ("%qs and floating point or vector arguments",
5903 "-mgeneral-regs-only");
5904 vr_saved = 0;
5905 }
5906
5907 if (!no_rtl)
5908 {
5909 if (gr_saved > 0)
5910 {
5911 rtx ptr, mem;
5912
5913 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5914 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5915 - gr_saved * UNITS_PER_WORD);
5916 mem = gen_frame_mem (BLKmode, ptr);
5917 set_mem_alias_set (mem, get_varargs_alias_set ());
5918
5919 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5920 mem, gr_saved);
5921 }
5922 if (vr_saved > 0)
5923 {
5924 /* We can't use move_block_from_reg, because it will use
5925 the wrong mode, storing D regs only. */
5926 enum machine_mode mode = TImode;
5927 int off, i;
5928
5929 /* Set OFF to the offset from virtual_incoming_args_rtx of
5930 the first vector register. The VR save area lies below
5931 the GR one, and is aligned to 16 bytes. */
5932 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5933 STACK_BOUNDARY / BITS_PER_UNIT);
5934 off -= vr_saved * UNITS_PER_VREG;
5935
5936 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5937 {
5938 rtx ptr, mem;
5939
5940 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5941 mem = gen_frame_mem (mode, ptr);
5942 set_mem_alias_set (mem, get_varargs_alias_set ());
5943 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5944 off += UNITS_PER_VREG;
5945 }
5946 }
5947 }
5948
5949 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5950 any complication of having crtl->args.pretend_args_size changed. */
5951 cfun->machine->saved_varargs_size
5952 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5953 STACK_BOUNDARY / BITS_PER_UNIT)
5954 + vr_saved * UNITS_PER_VREG);
5955 }
5956
5957 static void
5958 aarch64_conditional_register_usage (void)
5959 {
5960 int i;
5961 if (!TARGET_FLOAT)
5962 {
5963 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5964 {
5965 fixed_regs[i] = 1;
5966 call_used_regs[i] = 1;
5967 }
5968 }
5969 }
5970
5971 /* Walk down the type tree of TYPE counting consecutive base elements.
5972 If *MODEP is VOIDmode, then set it to the first valid floating point
5973 type. If a non-floating point type is found, or if a floating point
5974 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5975 otherwise return the count in the sub-tree. */
5976 static int
5977 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5978 {
5979 enum machine_mode mode;
5980 HOST_WIDE_INT size;
5981
5982 switch (TREE_CODE (type))
5983 {
5984 case REAL_TYPE:
5985 mode = TYPE_MODE (type);
5986 if (mode != DFmode && mode != SFmode && mode != TFmode)
5987 return -1;
5988
5989 if (*modep == VOIDmode)
5990 *modep = mode;
5991
5992 if (*modep == mode)
5993 return 1;
5994
5995 break;
5996
5997 case COMPLEX_TYPE:
5998 mode = TYPE_MODE (TREE_TYPE (type));
5999 if (mode != DFmode && mode != SFmode && mode != TFmode)
6000 return -1;
6001
6002 if (*modep == VOIDmode)
6003 *modep = mode;
6004
6005 if (*modep == mode)
6006 return 2;
6007
6008 break;
6009
6010 case VECTOR_TYPE:
6011 /* Use V2SImode and V4SImode as representatives of all 64-bit
6012 and 128-bit vector types. */
6013 size = int_size_in_bytes (type);
6014 switch (size)
6015 {
6016 case 8:
6017 mode = V2SImode;
6018 break;
6019 case 16:
6020 mode = V4SImode;
6021 break;
6022 default:
6023 return -1;
6024 }
6025
6026 if (*modep == VOIDmode)
6027 *modep = mode;
6028
6029 /* Vector modes are considered to be opaque: two vectors are
6030 equivalent for the purposes of being homogeneous aggregates
6031 if they are the same size. */
6032 if (*modep == mode)
6033 return 1;
6034
6035 break;
6036
6037 case ARRAY_TYPE:
6038 {
6039 int count;
6040 tree index = TYPE_DOMAIN (type);
6041
6042 /* Can't handle incomplete types. */
6043 if (!COMPLETE_TYPE_P (type))
6044 return -1;
6045
6046 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6047 if (count == -1
6048 || !index
6049 || !TYPE_MAX_VALUE (index)
6050 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6051 || !TYPE_MIN_VALUE (index)
6052 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6053 || count < 0)
6054 return -1;
6055
6056 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6057 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6058
6059 /* There must be no padding. */
6060 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6061 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6062 != count * GET_MODE_BITSIZE (*modep)))
6063 return -1;
6064
6065 return count;
6066 }
6067
6068 case RECORD_TYPE:
6069 {
6070 int count = 0;
6071 int sub_count;
6072 tree field;
6073
6074 /* Can't handle incomplete types. */
6075 if (!COMPLETE_TYPE_P (type))
6076 return -1;
6077
6078 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6079 {
6080 if (TREE_CODE (field) != FIELD_DECL)
6081 continue;
6082
6083 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6084 if (sub_count < 0)
6085 return -1;
6086 count += sub_count;
6087 }
6088
6089 /* There must be no padding. */
6090 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6091 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6092 != count * GET_MODE_BITSIZE (*modep)))
6093 return -1;
6094
6095 return count;
6096 }
6097
6098 case UNION_TYPE:
6099 case QUAL_UNION_TYPE:
6100 {
6101 /* These aren't very interesting except in a degenerate case. */
6102 int count = 0;
6103 int sub_count;
6104 tree field;
6105
6106 /* Can't handle incomplete types. */
6107 if (!COMPLETE_TYPE_P (type))
6108 return -1;
6109
6110 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6111 {
6112 if (TREE_CODE (field) != FIELD_DECL)
6113 continue;
6114
6115 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6116 if (sub_count < 0)
6117 return -1;
6118 count = count > sub_count ? count : sub_count;
6119 }
6120
6121 /* There must be no padding. */
6122 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6123 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6124 != count * GET_MODE_BITSIZE (*modep)))
6125 return -1;
6126
6127 return count;
6128 }
6129
6130 default:
6131 break;
6132 }
6133
6134 return -1;
6135 }
6136
6137 /* Return true if we use LRA instead of reload pass. */
6138 static bool
6139 aarch64_lra_p (void)
6140 {
6141 return aarch64_lra_flag;
6142 }
6143
6144 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6145 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6146 array types. The C99 floating-point complex types are also considered
6147 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6148 types, which are GCC extensions and out of the scope of AAPCS64, are
6149 treated as composite types here as well.
6150
6151 Note that MODE itself is not sufficient in determining whether a type
6152 is such a composite type or not. This is because
6153 stor-layout.c:compute_record_mode may have already changed the MODE
6154 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6155 structure with only one field may have its MODE set to the mode of the
6156 field. Also an integer mode whose size matches the size of the
6157 RECORD_TYPE type may be used to substitute the original mode
6158 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6159 solely relied on. */
6160
6161 static bool
6162 aarch64_composite_type_p (const_tree type,
6163 enum machine_mode mode)
6164 {
6165 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6166 return true;
6167
6168 if (mode == BLKmode
6169 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6170 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6171 return true;
6172
6173 return false;
6174 }
6175
6176 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6177 type as described in AAPCS64 \S 4.1.2.
6178
6179 See the comment above aarch64_composite_type_p for the notes on MODE. */
6180
6181 static bool
6182 aarch64_short_vector_p (const_tree type,
6183 enum machine_mode mode)
6184 {
6185 HOST_WIDE_INT size = -1;
6186
6187 if (type && TREE_CODE (type) == VECTOR_TYPE)
6188 size = int_size_in_bytes (type);
6189 else if (!aarch64_composite_type_p (type, mode)
6190 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6191 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6192 size = GET_MODE_SIZE (mode);
6193
6194 return (size == 8 || size == 16) ? true : false;
6195 }
6196
6197 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6198 shall be passed or returned in simd/fp register(s) (providing these
6199 parameter passing registers are available).
6200
6201 Upon successful return, *COUNT returns the number of needed registers,
6202 *BASE_MODE returns the mode of the individual register and when IS_HAF
6203 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6204 floating-point aggregate or a homogeneous short-vector aggregate. */
6205
6206 static bool
6207 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6208 const_tree type,
6209 enum machine_mode *base_mode,
6210 int *count,
6211 bool *is_ha)
6212 {
6213 enum machine_mode new_mode = VOIDmode;
6214 bool composite_p = aarch64_composite_type_p (type, mode);
6215
6216 if (is_ha != NULL) *is_ha = false;
6217
6218 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6219 || aarch64_short_vector_p (type, mode))
6220 {
6221 *count = 1;
6222 new_mode = mode;
6223 }
6224 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6225 {
6226 if (is_ha != NULL) *is_ha = true;
6227 *count = 2;
6228 new_mode = GET_MODE_INNER (mode);
6229 }
6230 else if (type && composite_p)
6231 {
6232 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6233
6234 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6235 {
6236 if (is_ha != NULL) *is_ha = true;
6237 *count = ag_count;
6238 }
6239 else
6240 return false;
6241 }
6242 else
6243 return false;
6244
6245 *base_mode = new_mode;
6246 return true;
6247 }
6248
6249 /* Implement TARGET_STRUCT_VALUE_RTX. */
6250
6251 static rtx
6252 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6253 int incoming ATTRIBUTE_UNUSED)
6254 {
6255 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6256 }
6257
6258 /* Implements target hook vector_mode_supported_p. */
6259 static bool
6260 aarch64_vector_mode_supported_p (enum machine_mode mode)
6261 {
6262 if (TARGET_SIMD
6263 && (mode == V4SImode || mode == V8HImode
6264 || mode == V16QImode || mode == V2DImode
6265 || mode == V2SImode || mode == V4HImode
6266 || mode == V8QImode || mode == V2SFmode
6267 || mode == V4SFmode || mode == V2DFmode))
6268 return true;
6269
6270 return false;
6271 }
6272
6273 /* Return appropriate SIMD container
6274 for MODE within a vector of WIDTH bits. */
6275 static enum machine_mode
6276 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6277 {
6278 gcc_assert (width == 64 || width == 128);
6279 if (TARGET_SIMD)
6280 {
6281 if (width == 128)
6282 switch (mode)
6283 {
6284 case DFmode:
6285 return V2DFmode;
6286 case SFmode:
6287 return V4SFmode;
6288 case SImode:
6289 return V4SImode;
6290 case HImode:
6291 return V8HImode;
6292 case QImode:
6293 return V16QImode;
6294 case DImode:
6295 return V2DImode;
6296 default:
6297 break;
6298 }
6299 else
6300 switch (mode)
6301 {
6302 case SFmode:
6303 return V2SFmode;
6304 case SImode:
6305 return V2SImode;
6306 case HImode:
6307 return V4HImode;
6308 case QImode:
6309 return V8QImode;
6310 default:
6311 break;
6312 }
6313 }
6314 return word_mode;
6315 }
6316
6317 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6318 static enum machine_mode
6319 aarch64_preferred_simd_mode (enum machine_mode mode)
6320 {
6321 return aarch64_simd_container_mode (mode, 128);
6322 }
6323
6324 /* Return the bitmask of possible vector sizes for the vectorizer
6325 to iterate over. */
6326 static unsigned int
6327 aarch64_autovectorize_vector_sizes (void)
6328 {
6329 return (16 | 8);
6330 }
6331
6332 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6333 vector types in order to conform to the AAPCS64 (see "Procedure
6334 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6335 qualify for emission with the mangled names defined in that document,
6336 a vector type must not only be of the correct mode but also be
6337 composed of AdvSIMD vector element types (e.g.
6338 _builtin_aarch64_simd_qi); these types are registered by
6339 aarch64_init_simd_builtins (). In other words, vector types defined
6340 in other ways e.g. via vector_size attribute will get default
6341 mangled names. */
6342 typedef struct
6343 {
6344 enum machine_mode mode;
6345 const char *element_type_name;
6346 const char *mangled_name;
6347 } aarch64_simd_mangle_map_entry;
6348
6349 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6350 /* 64-bit containerized types. */
6351 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6352 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6353 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6354 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6355 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6356 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6357 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6358 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6359 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6360 /* 128-bit containerized types. */
6361 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6362 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6363 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6364 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6365 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6366 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6367 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6368 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6369 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6370 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6371 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6372 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6373 { VOIDmode, NULL, NULL }
6374 };
6375
6376 /* Implement TARGET_MANGLE_TYPE. */
6377
6378 static const char *
6379 aarch64_mangle_type (const_tree type)
6380 {
6381 /* The AArch64 ABI documents say that "__va_list" has to be
6382 managled as if it is in the "std" namespace. */
6383 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6384 return "St9__va_list";
6385
6386 /* Check the mode of the vector type, and the name of the vector
6387 element type, against the table. */
6388 if (TREE_CODE (type) == VECTOR_TYPE)
6389 {
6390 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6391
6392 while (pos->mode != VOIDmode)
6393 {
6394 tree elt_type = TREE_TYPE (type);
6395
6396 if (pos->mode == TYPE_MODE (type)
6397 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6398 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6399 pos->element_type_name))
6400 return pos->mangled_name;
6401
6402 pos++;
6403 }
6404 }
6405
6406 /* Use the default mangling. */
6407 return NULL;
6408 }
6409
6410 /* Return the equivalent letter for size. */
6411 static char
6412 sizetochar (int size)
6413 {
6414 switch (size)
6415 {
6416 case 64: return 'd';
6417 case 32: return 's';
6418 case 16: return 'h';
6419 case 8 : return 'b';
6420 default: gcc_unreachable ();
6421 }
6422 }
6423
6424 /* Return true iff x is a uniform vector of floating-point
6425 constants, and the constant can be represented in
6426 quarter-precision form. Note, as aarch64_float_const_representable
6427 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6428 static bool
6429 aarch64_vect_float_const_representable_p (rtx x)
6430 {
6431 int i = 0;
6432 REAL_VALUE_TYPE r0, ri;
6433 rtx x0, xi;
6434
6435 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6436 return false;
6437
6438 x0 = CONST_VECTOR_ELT (x, 0);
6439 if (!CONST_DOUBLE_P (x0))
6440 return false;
6441
6442 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6443
6444 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6445 {
6446 xi = CONST_VECTOR_ELT (x, i);
6447 if (!CONST_DOUBLE_P (xi))
6448 return false;
6449
6450 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6451 if (!REAL_VALUES_EQUAL (r0, ri))
6452 return false;
6453 }
6454
6455 return aarch64_float_const_representable_p (x0);
6456 }
6457
6458 /* Return true for valid and false for invalid. */
6459 bool
6460 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6461 struct simd_immediate_info *info)
6462 {
6463 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6464 matches = 1; \
6465 for (i = 0; i < idx; i += (STRIDE)) \
6466 if (!(TEST)) \
6467 matches = 0; \
6468 if (matches) \
6469 { \
6470 immtype = (CLASS); \
6471 elsize = (ELSIZE); \
6472 eshift = (SHIFT); \
6473 emvn = (NEG); \
6474 break; \
6475 }
6476
6477 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6478 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6479 unsigned char bytes[16];
6480 int immtype = -1, matches;
6481 unsigned int invmask = inverse ? 0xff : 0;
6482 int eshift, emvn;
6483
6484 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6485 {
6486 if (! (aarch64_simd_imm_zero_p (op, mode)
6487 || aarch64_vect_float_const_representable_p (op)))
6488 return false;
6489
6490 if (info)
6491 {
6492 info->value = CONST_VECTOR_ELT (op, 0);
6493 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6494 info->mvn = false;
6495 info->shift = 0;
6496 }
6497
6498 return true;
6499 }
6500
6501 /* Splat vector constant out into a byte vector. */
6502 for (i = 0; i < n_elts; i++)
6503 {
6504 rtx el = CONST_VECTOR_ELT (op, i);
6505 unsigned HOST_WIDE_INT elpart;
6506 unsigned int part, parts;
6507
6508 if (GET_CODE (el) == CONST_INT)
6509 {
6510 elpart = INTVAL (el);
6511 parts = 1;
6512 }
6513 else if (GET_CODE (el) == CONST_DOUBLE)
6514 {
6515 elpart = CONST_DOUBLE_LOW (el);
6516 parts = 2;
6517 }
6518 else
6519 gcc_unreachable ();
6520
6521 for (part = 0; part < parts; part++)
6522 {
6523 unsigned int byte;
6524 for (byte = 0; byte < innersize; byte++)
6525 {
6526 bytes[idx++] = (elpart & 0xff) ^ invmask;
6527 elpart >>= BITS_PER_UNIT;
6528 }
6529 if (GET_CODE (el) == CONST_DOUBLE)
6530 elpart = CONST_DOUBLE_HIGH (el);
6531 }
6532 }
6533
6534 /* Sanity check. */
6535 gcc_assert (idx == GET_MODE_SIZE (mode));
6536
6537 do
6538 {
6539 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6540 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6541
6542 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6543 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6544
6545 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6546 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6547
6548 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6549 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6550
6551 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6552
6553 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6554
6555 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6556 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6557
6558 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6559 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6560
6561 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6562 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6563
6564 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6565 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6566
6567 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6568
6569 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6570
6571 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6572 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6573
6574 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6575 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6576
6577 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6578 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6579
6580 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6581 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6582
6583 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6584
6585 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6586 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6587 }
6588 while (0);
6589
6590 if (immtype == -1)
6591 return false;
6592
6593 if (info)
6594 {
6595 info->element_width = elsize;
6596 info->mvn = emvn != 0;
6597 info->shift = eshift;
6598
6599 unsigned HOST_WIDE_INT imm = 0;
6600
6601 if (immtype >= 12 && immtype <= 15)
6602 info->msl = true;
6603
6604 /* Un-invert bytes of recognized vector, if necessary. */
6605 if (invmask != 0)
6606 for (i = 0; i < idx; i++)
6607 bytes[i] ^= invmask;
6608
6609 if (immtype == 17)
6610 {
6611 /* FIXME: Broken on 32-bit H_W_I hosts. */
6612 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6613
6614 for (i = 0; i < 8; i++)
6615 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6616 << (i * BITS_PER_UNIT);
6617
6618
6619 info->value = GEN_INT (imm);
6620 }
6621 else
6622 {
6623 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6624 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6625
6626 /* Construct 'abcdefgh' because the assembler cannot handle
6627 generic constants. */
6628 if (info->mvn)
6629 imm = ~imm;
6630 imm = (imm >> info->shift) & 0xff;
6631 info->value = GEN_INT (imm);
6632 }
6633 }
6634
6635 return true;
6636 #undef CHECK
6637 }
6638
6639 static bool
6640 aarch64_const_vec_all_same_int_p (rtx x,
6641 HOST_WIDE_INT minval,
6642 HOST_WIDE_INT maxval)
6643 {
6644 HOST_WIDE_INT firstval;
6645 int count, i;
6646
6647 if (GET_CODE (x) != CONST_VECTOR
6648 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6649 return false;
6650
6651 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6652 if (firstval < minval || firstval > maxval)
6653 return false;
6654
6655 count = CONST_VECTOR_NUNITS (x);
6656 for (i = 1; i < count; i++)
6657 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6658 return false;
6659
6660 return true;
6661 }
6662
6663 /* Check of immediate shift constants are within range. */
6664 bool
6665 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6666 {
6667 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6668 if (left)
6669 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6670 else
6671 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6672 }
6673
6674 /* Return true if X is a uniform vector where all elements
6675 are either the floating-point constant 0.0 or the
6676 integer constant 0. */
6677 bool
6678 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6679 {
6680 return x == CONST0_RTX (mode);
6681 }
6682
6683 bool
6684 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6685 {
6686 HOST_WIDE_INT imm = INTVAL (x);
6687 int i;
6688
6689 for (i = 0; i < 8; i++)
6690 {
6691 unsigned int byte = imm & 0xff;
6692 if (byte != 0xff && byte != 0)
6693 return false;
6694 imm >>= 8;
6695 }
6696
6697 return true;
6698 }
6699
6700 bool
6701 aarch64_mov_operand_p (rtx x,
6702 enum aarch64_symbol_context context,
6703 enum machine_mode mode)
6704 {
6705 if (GET_CODE (x) == HIGH
6706 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6707 return true;
6708
6709 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6710 return true;
6711
6712 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6713 return true;
6714
6715 return aarch64_classify_symbolic_expression (x, context)
6716 == SYMBOL_TINY_ABSOLUTE;
6717 }
6718
6719 /* Return a const_int vector of VAL. */
6720 rtx
6721 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6722 {
6723 int nunits = GET_MODE_NUNITS (mode);
6724 rtvec v = rtvec_alloc (nunits);
6725 int i;
6726
6727 for (i=0; i < nunits; i++)
6728 RTVEC_ELT (v, i) = GEN_INT (val);
6729
6730 return gen_rtx_CONST_VECTOR (mode, v);
6731 }
6732
6733 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6734
6735 bool
6736 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6737 {
6738 enum machine_mode vmode;
6739
6740 gcc_assert (!VECTOR_MODE_P (mode));
6741 vmode = aarch64_preferred_simd_mode (mode);
6742 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6743 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6744 }
6745
6746 /* Construct and return a PARALLEL RTX vector. */
6747 rtx
6748 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6749 {
6750 int nunits = GET_MODE_NUNITS (mode);
6751 rtvec v = rtvec_alloc (nunits / 2);
6752 int base = high ? nunits / 2 : 0;
6753 rtx t1;
6754 int i;
6755
6756 for (i=0; i < nunits / 2; i++)
6757 RTVEC_ELT (v, i) = GEN_INT (base + i);
6758
6759 t1 = gen_rtx_PARALLEL (mode, v);
6760 return t1;
6761 }
6762
6763 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6764 HIGH (exclusive). */
6765 void
6766 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6767 {
6768 HOST_WIDE_INT lane;
6769 gcc_assert (GET_CODE (operand) == CONST_INT);
6770 lane = INTVAL (operand);
6771
6772 if (lane < low || lane >= high)
6773 error ("lane out of range");
6774 }
6775
6776 void
6777 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6778 {
6779 gcc_assert (GET_CODE (operand) == CONST_INT);
6780 HOST_WIDE_INT lane = INTVAL (operand);
6781
6782 if (lane < low || lane >= high)
6783 error ("constant out of range");
6784 }
6785
6786 /* Emit code to reinterpret one AdvSIMD type as another,
6787 without altering bits. */
6788 void
6789 aarch64_simd_reinterpret (rtx dest, rtx src)
6790 {
6791 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6792 }
6793
6794 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6795 registers). */
6796 void
6797 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6798 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6799 rtx op1)
6800 {
6801 rtx mem = gen_rtx_MEM (mode, destaddr);
6802 rtx tmp1 = gen_reg_rtx (mode);
6803 rtx tmp2 = gen_reg_rtx (mode);
6804
6805 emit_insn (intfn (tmp1, op1, tmp2));
6806
6807 emit_move_insn (mem, tmp1);
6808 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6809 emit_move_insn (mem, tmp2);
6810 }
6811
6812 /* Return TRUE if OP is a valid vector addressing mode. */
6813 bool
6814 aarch64_simd_mem_operand_p (rtx op)
6815 {
6816 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6817 || GET_CODE (XEXP (op, 0)) == REG);
6818 }
6819
6820 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6821 not to early-clobber SRC registers in the process.
6822
6823 We assume that the operands described by SRC and DEST represent a
6824 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6825 number of components into which the copy has been decomposed. */
6826 void
6827 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6828 rtx *src, unsigned int count)
6829 {
6830 unsigned int i;
6831
6832 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6833 || REGNO (operands[0]) < REGNO (operands[1]))
6834 {
6835 for (i = 0; i < count; i++)
6836 {
6837 operands[2 * i] = dest[i];
6838 operands[2 * i + 1] = src[i];
6839 }
6840 }
6841 else
6842 {
6843 for (i = 0; i < count; i++)
6844 {
6845 operands[2 * i] = dest[count - i - 1];
6846 operands[2 * i + 1] = src[count - i - 1];
6847 }
6848 }
6849 }
6850
6851 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6852 one of VSTRUCT modes: OI, CI or XI. */
6853 int
6854 aarch64_simd_attr_length_move (rtx insn)
6855 {
6856 enum machine_mode mode;
6857
6858 extract_insn_cached (insn);
6859
6860 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6861 {
6862 mode = GET_MODE (recog_data.operand[0]);
6863 switch (mode)
6864 {
6865 case OImode:
6866 return 8;
6867 case CImode:
6868 return 12;
6869 case XImode:
6870 return 16;
6871 default:
6872 gcc_unreachable ();
6873 }
6874 }
6875 return 4;
6876 }
6877
6878 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6879 alignment of a vector to 128 bits. */
6880 static HOST_WIDE_INT
6881 aarch64_simd_vector_alignment (const_tree type)
6882 {
6883 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6884 return MIN (align, 128);
6885 }
6886
6887 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6888 static bool
6889 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6890 {
6891 if (is_packed)
6892 return false;
6893
6894 /* We guarantee alignment for vectors up to 128-bits. */
6895 if (tree_int_cst_compare (TYPE_SIZE (type),
6896 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6897 return false;
6898
6899 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6900 return true;
6901 }
6902
6903 /* If VALS is a vector constant that can be loaded into a register
6904 using DUP, generate instructions to do so and return an RTX to
6905 assign to the register. Otherwise return NULL_RTX. */
6906 static rtx
6907 aarch64_simd_dup_constant (rtx vals)
6908 {
6909 enum machine_mode mode = GET_MODE (vals);
6910 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6911 int n_elts = GET_MODE_NUNITS (mode);
6912 bool all_same = true;
6913 rtx x;
6914 int i;
6915
6916 if (GET_CODE (vals) != CONST_VECTOR)
6917 return NULL_RTX;
6918
6919 for (i = 1; i < n_elts; ++i)
6920 {
6921 x = CONST_VECTOR_ELT (vals, i);
6922 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6923 all_same = false;
6924 }
6925
6926 if (!all_same)
6927 return NULL_RTX;
6928
6929 /* We can load this constant by using DUP and a constant in a
6930 single ARM register. This will be cheaper than a vector
6931 load. */
6932 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6933 return gen_rtx_VEC_DUPLICATE (mode, x);
6934 }
6935
6936
6937 /* Generate code to load VALS, which is a PARALLEL containing only
6938 constants (for vec_init) or CONST_VECTOR, efficiently into a
6939 register. Returns an RTX to copy into the register, or NULL_RTX
6940 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6941 static rtx
6942 aarch64_simd_make_constant (rtx vals)
6943 {
6944 enum machine_mode mode = GET_MODE (vals);
6945 rtx const_dup;
6946 rtx const_vec = NULL_RTX;
6947 int n_elts = GET_MODE_NUNITS (mode);
6948 int n_const = 0;
6949 int i;
6950
6951 if (GET_CODE (vals) == CONST_VECTOR)
6952 const_vec = vals;
6953 else if (GET_CODE (vals) == PARALLEL)
6954 {
6955 /* A CONST_VECTOR must contain only CONST_INTs and
6956 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6957 Only store valid constants in a CONST_VECTOR. */
6958 for (i = 0; i < n_elts; ++i)
6959 {
6960 rtx x = XVECEXP (vals, 0, i);
6961 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6962 n_const++;
6963 }
6964 if (n_const == n_elts)
6965 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6966 }
6967 else
6968 gcc_unreachable ();
6969
6970 if (const_vec != NULL_RTX
6971 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6972 /* Load using MOVI/MVNI. */
6973 return const_vec;
6974 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6975 /* Loaded using DUP. */
6976 return const_dup;
6977 else if (const_vec != NULL_RTX)
6978 /* Load from constant pool. We can not take advantage of single-cycle
6979 LD1 because we need a PC-relative addressing mode. */
6980 return const_vec;
6981 else
6982 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6983 We can not construct an initializer. */
6984 return NULL_RTX;
6985 }
6986
6987 void
6988 aarch64_expand_vector_init (rtx target, rtx vals)
6989 {
6990 enum machine_mode mode = GET_MODE (target);
6991 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6992 int n_elts = GET_MODE_NUNITS (mode);
6993 int n_var = 0, one_var = -1;
6994 bool all_same = true;
6995 rtx x, mem;
6996 int i;
6997
6998 x = XVECEXP (vals, 0, 0);
6999 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7000 n_var = 1, one_var = 0;
7001
7002 for (i = 1; i < n_elts; ++i)
7003 {
7004 x = XVECEXP (vals, 0, i);
7005 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7006 ++n_var, one_var = i;
7007
7008 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7009 all_same = false;
7010 }
7011
7012 if (n_var == 0)
7013 {
7014 rtx constant = aarch64_simd_make_constant (vals);
7015 if (constant != NULL_RTX)
7016 {
7017 emit_move_insn (target, constant);
7018 return;
7019 }
7020 }
7021
7022 /* Splat a single non-constant element if we can. */
7023 if (all_same)
7024 {
7025 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7026 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7027 return;
7028 }
7029
7030 /* One field is non-constant. Load constant then overwrite varying
7031 field. This is more efficient than using the stack. */
7032 if (n_var == 1)
7033 {
7034 rtx copy = copy_rtx (vals);
7035 rtx index = GEN_INT (one_var);
7036 enum insn_code icode;
7037
7038 /* Load constant part of vector, substitute neighboring value for
7039 varying element. */
7040 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7041 aarch64_expand_vector_init (target, copy);
7042
7043 /* Insert variable. */
7044 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7045 icode = optab_handler (vec_set_optab, mode);
7046 gcc_assert (icode != CODE_FOR_nothing);
7047 emit_insn (GEN_FCN (icode) (target, x, index));
7048 return;
7049 }
7050
7051 /* Construct the vector in memory one field at a time
7052 and load the whole vector. */
7053 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7054 for (i = 0; i < n_elts; i++)
7055 emit_move_insn (adjust_address_nv (mem, inner_mode,
7056 i * GET_MODE_SIZE (inner_mode)),
7057 XVECEXP (vals, 0, i));
7058 emit_move_insn (target, mem);
7059
7060 }
7061
7062 static unsigned HOST_WIDE_INT
7063 aarch64_shift_truncation_mask (enum machine_mode mode)
7064 {
7065 return
7066 (aarch64_vector_mode_supported_p (mode)
7067 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7068 }
7069
7070 #ifndef TLS_SECTION_ASM_FLAG
7071 #define TLS_SECTION_ASM_FLAG 'T'
7072 #endif
7073
7074 void
7075 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7076 tree decl ATTRIBUTE_UNUSED)
7077 {
7078 char flagchars[10], *f = flagchars;
7079
7080 /* If we have already declared this section, we can use an
7081 abbreviated form to switch back to it -- unless this section is
7082 part of a COMDAT groups, in which case GAS requires the full
7083 declaration every time. */
7084 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7085 && (flags & SECTION_DECLARED))
7086 {
7087 fprintf (asm_out_file, "\t.section\t%s\n", name);
7088 return;
7089 }
7090
7091 if (!(flags & SECTION_DEBUG))
7092 *f++ = 'a';
7093 if (flags & SECTION_WRITE)
7094 *f++ = 'w';
7095 if (flags & SECTION_CODE)
7096 *f++ = 'x';
7097 if (flags & SECTION_SMALL)
7098 *f++ = 's';
7099 if (flags & SECTION_MERGE)
7100 *f++ = 'M';
7101 if (flags & SECTION_STRINGS)
7102 *f++ = 'S';
7103 if (flags & SECTION_TLS)
7104 *f++ = TLS_SECTION_ASM_FLAG;
7105 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7106 *f++ = 'G';
7107 *f = '\0';
7108
7109 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7110
7111 if (!(flags & SECTION_NOTYPE))
7112 {
7113 const char *type;
7114 const char *format;
7115
7116 if (flags & SECTION_BSS)
7117 type = "nobits";
7118 else
7119 type = "progbits";
7120
7121 #ifdef TYPE_OPERAND_FMT
7122 format = "," TYPE_OPERAND_FMT;
7123 #else
7124 format = ",@%s";
7125 #endif
7126
7127 fprintf (asm_out_file, format, type);
7128
7129 if (flags & SECTION_ENTSIZE)
7130 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7131 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7132 {
7133 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7134 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7135 else
7136 fprintf (asm_out_file, ",%s,comdat",
7137 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7138 }
7139 }
7140
7141 putc ('\n', asm_out_file);
7142 }
7143
7144 /* Select a format to encode pointers in exception handling data. */
7145 int
7146 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7147 {
7148 int type;
7149 switch (aarch64_cmodel)
7150 {
7151 case AARCH64_CMODEL_TINY:
7152 case AARCH64_CMODEL_TINY_PIC:
7153 case AARCH64_CMODEL_SMALL:
7154 case AARCH64_CMODEL_SMALL_PIC:
7155 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7156 for everything. */
7157 type = DW_EH_PE_sdata4;
7158 break;
7159 default:
7160 /* No assumptions here. 8-byte relocs required. */
7161 type = DW_EH_PE_sdata8;
7162 break;
7163 }
7164 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7165 }
7166
7167 /* Emit load exclusive. */
7168
7169 static void
7170 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7171 rtx mem, rtx model_rtx)
7172 {
7173 rtx (*gen) (rtx, rtx, rtx);
7174
7175 switch (mode)
7176 {
7177 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7178 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7179 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7180 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7181 default:
7182 gcc_unreachable ();
7183 }
7184
7185 emit_insn (gen (rval, mem, model_rtx));
7186 }
7187
7188 /* Emit store exclusive. */
7189
7190 static void
7191 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7192 rtx rval, rtx mem, rtx model_rtx)
7193 {
7194 rtx (*gen) (rtx, rtx, rtx, rtx);
7195
7196 switch (mode)
7197 {
7198 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7199 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7200 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7201 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7202 default:
7203 gcc_unreachable ();
7204 }
7205
7206 emit_insn (gen (bval, rval, mem, model_rtx));
7207 }
7208
7209 /* Mark the previous jump instruction as unlikely. */
7210
7211 static void
7212 aarch64_emit_unlikely_jump (rtx insn)
7213 {
7214 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7215
7216 insn = emit_jump_insn (insn);
7217 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7218 }
7219
7220 /* Expand a compare and swap pattern. */
7221
7222 void
7223 aarch64_expand_compare_and_swap (rtx operands[])
7224 {
7225 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7226 enum machine_mode mode, cmp_mode;
7227 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7228
7229 bval = operands[0];
7230 rval = operands[1];
7231 mem = operands[2];
7232 oldval = operands[3];
7233 newval = operands[4];
7234 is_weak = operands[5];
7235 mod_s = operands[6];
7236 mod_f = operands[7];
7237 mode = GET_MODE (mem);
7238 cmp_mode = mode;
7239
7240 /* Normally the succ memory model must be stronger than fail, but in the
7241 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7242 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7243
7244 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7245 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7246 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7247
7248 switch (mode)
7249 {
7250 case QImode:
7251 case HImode:
7252 /* For short modes, we're going to perform the comparison in SImode,
7253 so do the zero-extension now. */
7254 cmp_mode = SImode;
7255 rval = gen_reg_rtx (SImode);
7256 oldval = convert_modes (SImode, mode, oldval, true);
7257 /* Fall through. */
7258
7259 case SImode:
7260 case DImode:
7261 /* Force the value into a register if needed. */
7262 if (!aarch64_plus_operand (oldval, mode))
7263 oldval = force_reg (cmp_mode, oldval);
7264 break;
7265
7266 default:
7267 gcc_unreachable ();
7268 }
7269
7270 switch (mode)
7271 {
7272 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7273 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7274 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7275 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7276 default:
7277 gcc_unreachable ();
7278 }
7279
7280 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7281
7282 if (mode == QImode || mode == HImode)
7283 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7284
7285 x = gen_rtx_REG (CCmode, CC_REGNUM);
7286 x = gen_rtx_EQ (SImode, x, const0_rtx);
7287 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7288 }
7289
7290 /* Split a compare and swap pattern. */
7291
7292 void
7293 aarch64_split_compare_and_swap (rtx operands[])
7294 {
7295 rtx rval, mem, oldval, newval, scratch;
7296 enum machine_mode mode;
7297 bool is_weak;
7298 rtx label1, label2, x, cond;
7299
7300 rval = operands[0];
7301 mem = operands[1];
7302 oldval = operands[2];
7303 newval = operands[3];
7304 is_weak = (operands[4] != const0_rtx);
7305 scratch = operands[7];
7306 mode = GET_MODE (mem);
7307
7308 label1 = NULL_RTX;
7309 if (!is_weak)
7310 {
7311 label1 = gen_label_rtx ();
7312 emit_label (label1);
7313 }
7314 label2 = gen_label_rtx ();
7315
7316 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7317
7318 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7319 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7320 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7321 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7322 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7323
7324 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7325
7326 if (!is_weak)
7327 {
7328 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7329 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7330 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7331 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7332 }
7333 else
7334 {
7335 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7336 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7337 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7338 }
7339
7340 emit_label (label2);
7341 }
7342
7343 /* Split an atomic operation. */
7344
7345 void
7346 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7347 rtx value, rtx model_rtx, rtx cond)
7348 {
7349 enum machine_mode mode = GET_MODE (mem);
7350 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7351 rtx label, x;
7352
7353 label = gen_label_rtx ();
7354 emit_label (label);
7355
7356 if (new_out)
7357 new_out = gen_lowpart (wmode, new_out);
7358 if (old_out)
7359 old_out = gen_lowpart (wmode, old_out);
7360 else
7361 old_out = new_out;
7362 value = simplify_gen_subreg (wmode, value, mode, 0);
7363
7364 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7365
7366 switch (code)
7367 {
7368 case SET:
7369 new_out = value;
7370 break;
7371
7372 case NOT:
7373 x = gen_rtx_AND (wmode, old_out, value);
7374 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7375 x = gen_rtx_NOT (wmode, new_out);
7376 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7377 break;
7378
7379 case MINUS:
7380 if (CONST_INT_P (value))
7381 {
7382 value = GEN_INT (-INTVAL (value));
7383 code = PLUS;
7384 }
7385 /* Fall through. */
7386
7387 default:
7388 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7389 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7390 break;
7391 }
7392
7393 aarch64_emit_store_exclusive (mode, cond, mem,
7394 gen_lowpart (mode, new_out), model_rtx);
7395
7396 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7397 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7398 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7399 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7400 }
7401
7402 static void
7403 aarch64_print_extension (void)
7404 {
7405 const struct aarch64_option_extension *opt = NULL;
7406
7407 for (opt = all_extensions; opt->name != NULL; opt++)
7408 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7409 asm_fprintf (asm_out_file, "+%s", opt->name);
7410
7411 asm_fprintf (asm_out_file, "\n");
7412 }
7413
7414 static void
7415 aarch64_start_file (void)
7416 {
7417 if (selected_arch)
7418 {
7419 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7420 aarch64_print_extension ();
7421 }
7422 else if (selected_cpu)
7423 {
7424 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7425 aarch64_print_extension ();
7426 }
7427 default_file_start();
7428 }
7429
7430 /* Target hook for c_mode_for_suffix. */
7431 static enum machine_mode
7432 aarch64_c_mode_for_suffix (char suffix)
7433 {
7434 if (suffix == 'q')
7435 return TFmode;
7436
7437 return VOIDmode;
7438 }
7439
7440 /* We can only represent floating point constants which will fit in
7441 "quarter-precision" values. These values are characterised by
7442 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7443 by:
7444
7445 (-1)^s * (n/16) * 2^r
7446
7447 Where:
7448 's' is the sign bit.
7449 'n' is an integer in the range 16 <= n <= 31.
7450 'r' is an integer in the range -3 <= r <= 4. */
7451
7452 /* Return true iff X can be represented by a quarter-precision
7453 floating point immediate operand X. Note, we cannot represent 0.0. */
7454 bool
7455 aarch64_float_const_representable_p (rtx x)
7456 {
7457 /* This represents our current view of how many bits
7458 make up the mantissa. */
7459 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7460 int exponent;
7461 unsigned HOST_WIDE_INT mantissa, mask;
7462 HOST_WIDE_INT m1, m2;
7463 REAL_VALUE_TYPE r, m;
7464
7465 if (!CONST_DOUBLE_P (x))
7466 return false;
7467
7468 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7469
7470 /* We cannot represent infinities, NaNs or +/-zero. We won't
7471 know if we have +zero until we analyse the mantissa, but we
7472 can reject the other invalid values. */
7473 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7474 || REAL_VALUE_MINUS_ZERO (r))
7475 return false;
7476
7477 /* Extract exponent. */
7478 r = real_value_abs (&r);
7479 exponent = REAL_EXP (&r);
7480
7481 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7482 highest (sign) bit, with a fixed binary point at bit point_pos.
7483 m1 holds the low part of the mantissa, m2 the high part.
7484 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7485 bits for the mantissa, this can fail (low bits will be lost). */
7486 real_ldexp (&m, &r, point_pos - exponent);
7487 REAL_VALUE_TO_INT (&m1, &m2, m);
7488
7489 /* If the low part of the mantissa has bits set we cannot represent
7490 the value. */
7491 if (m1 != 0)
7492 return false;
7493 /* We have rejected the lower HOST_WIDE_INT, so update our
7494 understanding of how many bits lie in the mantissa and
7495 look only at the high HOST_WIDE_INT. */
7496 mantissa = m2;
7497 point_pos -= HOST_BITS_PER_WIDE_INT;
7498
7499 /* We can only represent values with a mantissa of the form 1.xxxx. */
7500 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7501 if ((mantissa & mask) != 0)
7502 return false;
7503
7504 /* Having filtered unrepresentable values, we may now remove all
7505 but the highest 5 bits. */
7506 mantissa >>= point_pos - 5;
7507
7508 /* We cannot represent the value 0.0, so reject it. This is handled
7509 elsewhere. */
7510 if (mantissa == 0)
7511 return false;
7512
7513 /* Then, as bit 4 is always set, we can mask it off, leaving
7514 the mantissa in the range [0, 15]. */
7515 mantissa &= ~(1 << 4);
7516 gcc_assert (mantissa <= 15);
7517
7518 /* GCC internally does not use IEEE754-like encoding (where normalized
7519 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7520 Our mantissa values are shifted 4 places to the left relative to
7521 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7522 by 5 places to correct for GCC's representation. */
7523 exponent = 5 - exponent;
7524
7525 return (exponent >= 0 && exponent <= 7);
7526 }
7527
7528 char*
7529 aarch64_output_simd_mov_immediate (rtx const_vector,
7530 enum machine_mode mode,
7531 unsigned width)
7532 {
7533 bool is_valid;
7534 static char templ[40];
7535 const char *mnemonic;
7536 const char *shift_op;
7537 unsigned int lane_count = 0;
7538 char element_char;
7539
7540 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7541
7542 /* This will return true to show const_vector is legal for use as either
7543 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7544 also update INFO to show how the immediate should be generated. */
7545 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7546 gcc_assert (is_valid);
7547
7548 element_char = sizetochar (info.element_width);
7549 lane_count = width / info.element_width;
7550
7551 mode = GET_MODE_INNER (mode);
7552 if (mode == SFmode || mode == DFmode)
7553 {
7554 gcc_assert (info.shift == 0 && ! info.mvn);
7555 if (aarch64_float_const_zero_rtx_p (info.value))
7556 info.value = GEN_INT (0);
7557 else
7558 {
7559 #define buf_size 20
7560 REAL_VALUE_TYPE r;
7561 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7562 char float_buf[buf_size] = {'\0'};
7563 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7564 #undef buf_size
7565
7566 if (lane_count == 1)
7567 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7568 else
7569 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7570 lane_count, element_char, float_buf);
7571 return templ;
7572 }
7573 }
7574
7575 mnemonic = info.mvn ? "mvni" : "movi";
7576 shift_op = info.msl ? "msl" : "lsl";
7577
7578 if (lane_count == 1)
7579 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7580 mnemonic, UINTVAL (info.value));
7581 else if (info.shift)
7582 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7583 ", %s %d", mnemonic, lane_count, element_char,
7584 UINTVAL (info.value), shift_op, info.shift);
7585 else
7586 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7587 mnemonic, lane_count, element_char, UINTVAL (info.value));
7588 return templ;
7589 }
7590
7591 char*
7592 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7593 enum machine_mode mode)
7594 {
7595 enum machine_mode vmode;
7596
7597 gcc_assert (!VECTOR_MODE_P (mode));
7598 vmode = aarch64_simd_container_mode (mode, 64);
7599 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7600 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7601 }
7602
7603 /* Split operands into moves from op[1] + op[2] into op[0]. */
7604
7605 void
7606 aarch64_split_combinev16qi (rtx operands[3])
7607 {
7608 unsigned int dest = REGNO (operands[0]);
7609 unsigned int src1 = REGNO (operands[1]);
7610 unsigned int src2 = REGNO (operands[2]);
7611 enum machine_mode halfmode = GET_MODE (operands[1]);
7612 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7613 rtx destlo, desthi;
7614
7615 gcc_assert (halfmode == V16QImode);
7616
7617 if (src1 == dest && src2 == dest + halfregs)
7618 {
7619 /* No-op move. Can't split to nothing; emit something. */
7620 emit_note (NOTE_INSN_DELETED);
7621 return;
7622 }
7623
7624 /* Preserve register attributes for variable tracking. */
7625 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7626 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7627 GET_MODE_SIZE (halfmode));
7628
7629 /* Special case of reversed high/low parts. */
7630 if (reg_overlap_mentioned_p (operands[2], destlo)
7631 && reg_overlap_mentioned_p (operands[1], desthi))
7632 {
7633 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7634 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7635 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7636 }
7637 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7638 {
7639 /* Try to avoid unnecessary moves if part of the result
7640 is in the right place already. */
7641 if (src1 != dest)
7642 emit_move_insn (destlo, operands[1]);
7643 if (src2 != dest + halfregs)
7644 emit_move_insn (desthi, operands[2]);
7645 }
7646 else
7647 {
7648 if (src2 != dest + halfregs)
7649 emit_move_insn (desthi, operands[2]);
7650 if (src1 != dest)
7651 emit_move_insn (destlo, operands[1]);
7652 }
7653 }
7654
7655 /* vec_perm support. */
7656
7657 #define MAX_VECT_LEN 16
7658
7659 struct expand_vec_perm_d
7660 {
7661 rtx target, op0, op1;
7662 unsigned char perm[MAX_VECT_LEN];
7663 enum machine_mode vmode;
7664 unsigned char nelt;
7665 bool one_vector_p;
7666 bool testing_p;
7667 };
7668
7669 /* Generate a variable permutation. */
7670
7671 static void
7672 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7673 {
7674 enum machine_mode vmode = GET_MODE (target);
7675 bool one_vector_p = rtx_equal_p (op0, op1);
7676
7677 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7678 gcc_checking_assert (GET_MODE (op0) == vmode);
7679 gcc_checking_assert (GET_MODE (op1) == vmode);
7680 gcc_checking_assert (GET_MODE (sel) == vmode);
7681 gcc_checking_assert (TARGET_SIMD);
7682
7683 if (one_vector_p)
7684 {
7685 if (vmode == V8QImode)
7686 {
7687 /* Expand the argument to a V16QI mode by duplicating it. */
7688 rtx pair = gen_reg_rtx (V16QImode);
7689 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7690 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7691 }
7692 else
7693 {
7694 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7695 }
7696 }
7697 else
7698 {
7699 rtx pair;
7700
7701 if (vmode == V8QImode)
7702 {
7703 pair = gen_reg_rtx (V16QImode);
7704 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7705 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7706 }
7707 else
7708 {
7709 pair = gen_reg_rtx (OImode);
7710 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7711 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7712 }
7713 }
7714 }
7715
7716 void
7717 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7718 {
7719 enum machine_mode vmode = GET_MODE (target);
7720 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7721 bool one_vector_p = rtx_equal_p (op0, op1);
7722 rtx rmask[MAX_VECT_LEN], mask;
7723
7724 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7725
7726 /* The TBL instruction does not use a modulo index, so we must take care
7727 of that ourselves. */
7728 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7729 for (i = 0; i < nelt; ++i)
7730 rmask[i] = mask;
7731 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7732 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7733
7734 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7735 }
7736
7737 /* Recognize patterns suitable for the TRN instructions. */
7738 static bool
7739 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7740 {
7741 unsigned int i, odd, mask, nelt = d->nelt;
7742 rtx out, in0, in1, x;
7743 rtx (*gen) (rtx, rtx, rtx);
7744 enum machine_mode vmode = d->vmode;
7745
7746 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7747 return false;
7748
7749 /* Note that these are little-endian tests.
7750 We correct for big-endian later. */
7751 if (d->perm[0] == 0)
7752 odd = 0;
7753 else if (d->perm[0] == 1)
7754 odd = 1;
7755 else
7756 return false;
7757 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7758
7759 for (i = 0; i < nelt; i += 2)
7760 {
7761 if (d->perm[i] != i + odd)
7762 return false;
7763 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7764 return false;
7765 }
7766
7767 /* Success! */
7768 if (d->testing_p)
7769 return true;
7770
7771 in0 = d->op0;
7772 in1 = d->op1;
7773 if (BYTES_BIG_ENDIAN)
7774 {
7775 x = in0, in0 = in1, in1 = x;
7776 odd = !odd;
7777 }
7778 out = d->target;
7779
7780 if (odd)
7781 {
7782 switch (vmode)
7783 {
7784 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7785 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7786 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7787 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7788 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7789 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7790 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7791 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7792 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7793 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7794 default:
7795 return false;
7796 }
7797 }
7798 else
7799 {
7800 switch (vmode)
7801 {
7802 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7803 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7804 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7805 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7806 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7807 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7808 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7809 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7810 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7811 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7812 default:
7813 return false;
7814 }
7815 }
7816
7817 emit_insn (gen (out, in0, in1));
7818 return true;
7819 }
7820
7821 /* Recognize patterns suitable for the UZP instructions. */
7822 static bool
7823 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7824 {
7825 unsigned int i, odd, mask, nelt = d->nelt;
7826 rtx out, in0, in1, x;
7827 rtx (*gen) (rtx, rtx, rtx);
7828 enum machine_mode vmode = d->vmode;
7829
7830 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7831 return false;
7832
7833 /* Note that these are little-endian tests.
7834 We correct for big-endian later. */
7835 if (d->perm[0] == 0)
7836 odd = 0;
7837 else if (d->perm[0] == 1)
7838 odd = 1;
7839 else
7840 return false;
7841 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7842
7843 for (i = 0; i < nelt; i++)
7844 {
7845 unsigned elt = (i * 2 + odd) & mask;
7846 if (d->perm[i] != elt)
7847 return false;
7848 }
7849
7850 /* Success! */
7851 if (d->testing_p)
7852 return true;
7853
7854 in0 = d->op0;
7855 in1 = d->op1;
7856 if (BYTES_BIG_ENDIAN)
7857 {
7858 x = in0, in0 = in1, in1 = x;
7859 odd = !odd;
7860 }
7861 out = d->target;
7862
7863 if (odd)
7864 {
7865 switch (vmode)
7866 {
7867 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7868 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7869 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7870 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7871 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7872 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7873 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7874 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7875 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7876 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7877 default:
7878 return false;
7879 }
7880 }
7881 else
7882 {
7883 switch (vmode)
7884 {
7885 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7886 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7887 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7888 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7889 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7890 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7891 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7892 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7893 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7894 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7895 default:
7896 return false;
7897 }
7898 }
7899
7900 emit_insn (gen (out, in0, in1));
7901 return true;
7902 }
7903
7904 /* Recognize patterns suitable for the ZIP instructions. */
7905 static bool
7906 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7907 {
7908 unsigned int i, high, mask, nelt = d->nelt;
7909 rtx out, in0, in1, x;
7910 rtx (*gen) (rtx, rtx, rtx);
7911 enum machine_mode vmode = d->vmode;
7912
7913 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7914 return false;
7915
7916 /* Note that these are little-endian tests.
7917 We correct for big-endian later. */
7918 high = nelt / 2;
7919 if (d->perm[0] == high)
7920 /* Do Nothing. */
7921 ;
7922 else if (d->perm[0] == 0)
7923 high = 0;
7924 else
7925 return false;
7926 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7927
7928 for (i = 0; i < nelt / 2; i++)
7929 {
7930 unsigned elt = (i + high) & mask;
7931 if (d->perm[i * 2] != elt)
7932 return false;
7933 elt = (elt + nelt) & mask;
7934 if (d->perm[i * 2 + 1] != elt)
7935 return false;
7936 }
7937
7938 /* Success! */
7939 if (d->testing_p)
7940 return true;
7941
7942 in0 = d->op0;
7943 in1 = d->op1;
7944 if (BYTES_BIG_ENDIAN)
7945 {
7946 x = in0, in0 = in1, in1 = x;
7947 high = !high;
7948 }
7949 out = d->target;
7950
7951 if (high)
7952 {
7953 switch (vmode)
7954 {
7955 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7956 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7957 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7958 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7959 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7960 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7961 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7962 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7963 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7964 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7965 default:
7966 return false;
7967 }
7968 }
7969 else
7970 {
7971 switch (vmode)
7972 {
7973 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7974 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7975 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7976 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7977 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7978 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7979 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7980 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7981 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7982 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7983 default:
7984 return false;
7985 }
7986 }
7987
7988 emit_insn (gen (out, in0, in1));
7989 return true;
7990 }
7991
7992 static bool
7993 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7994 {
7995 rtx (*gen) (rtx, rtx, rtx);
7996 rtx out = d->target;
7997 rtx in0;
7998 enum machine_mode vmode = d->vmode;
7999 unsigned int i, elt, nelt = d->nelt;
8000 rtx lane;
8001
8002 /* TODO: This may not be big-endian safe. */
8003 if (BYTES_BIG_ENDIAN)
8004 return false;
8005
8006 elt = d->perm[0];
8007 for (i = 1; i < nelt; i++)
8008 {
8009 if (elt != d->perm[i])
8010 return false;
8011 }
8012
8013 /* The generic preparation in aarch64_expand_vec_perm_const_1
8014 swaps the operand order and the permute indices if it finds
8015 d->perm[0] to be in the second operand. Thus, we can always
8016 use d->op0 and need not do any extra arithmetic to get the
8017 correct lane number. */
8018 in0 = d->op0;
8019 lane = GEN_INT (elt);
8020
8021 switch (vmode)
8022 {
8023 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8024 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8025 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8026 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8027 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8028 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8029 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8030 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8031 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8032 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8033 default:
8034 return false;
8035 }
8036
8037 emit_insn (gen (out, in0, lane));
8038 return true;
8039 }
8040
8041 static bool
8042 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8043 {
8044 rtx rperm[MAX_VECT_LEN], sel;
8045 enum machine_mode vmode = d->vmode;
8046 unsigned int i, nelt = d->nelt;
8047
8048 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8049 numbering of elements for big-endian, we must reverse the order. */
8050 if (BYTES_BIG_ENDIAN)
8051 return false;
8052
8053 if (d->testing_p)
8054 return true;
8055
8056 /* Generic code will try constant permutation twice. Once with the
8057 original mode and again with the elements lowered to QImode.
8058 So wait and don't do the selector expansion ourselves. */
8059 if (vmode != V8QImode && vmode != V16QImode)
8060 return false;
8061
8062 for (i = 0; i < nelt; ++i)
8063 rperm[i] = GEN_INT (d->perm[i]);
8064 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8065 sel = force_reg (vmode, sel);
8066
8067 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8068 return true;
8069 }
8070
8071 static bool
8072 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8073 {
8074 /* The pattern matching functions above are written to look for a small
8075 number to begin the sequence (0, 1, N/2). If we begin with an index
8076 from the second operand, we can swap the operands. */
8077 if (d->perm[0] >= d->nelt)
8078 {
8079 unsigned i, nelt = d->nelt;
8080 rtx x;
8081
8082 for (i = 0; i < nelt; ++i)
8083 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8084
8085 x = d->op0;
8086 d->op0 = d->op1;
8087 d->op1 = x;
8088 }
8089
8090 if (TARGET_SIMD)
8091 {
8092 if (aarch64_evpc_zip (d))
8093 return true;
8094 else if (aarch64_evpc_uzp (d))
8095 return true;
8096 else if (aarch64_evpc_trn (d))
8097 return true;
8098 else if (aarch64_evpc_dup (d))
8099 return true;
8100 return aarch64_evpc_tbl (d);
8101 }
8102 return false;
8103 }
8104
8105 /* Expand a vec_perm_const pattern. */
8106
8107 bool
8108 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8109 {
8110 struct expand_vec_perm_d d;
8111 int i, nelt, which;
8112
8113 d.target = target;
8114 d.op0 = op0;
8115 d.op1 = op1;
8116
8117 d.vmode = GET_MODE (target);
8118 gcc_assert (VECTOR_MODE_P (d.vmode));
8119 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8120 d.testing_p = false;
8121
8122 for (i = which = 0; i < nelt; ++i)
8123 {
8124 rtx e = XVECEXP (sel, 0, i);
8125 int ei = INTVAL (e) & (2 * nelt - 1);
8126 which |= (ei < nelt ? 1 : 2);
8127 d.perm[i] = ei;
8128 }
8129
8130 switch (which)
8131 {
8132 default:
8133 gcc_unreachable ();
8134
8135 case 3:
8136 d.one_vector_p = false;
8137 if (!rtx_equal_p (op0, op1))
8138 break;
8139
8140 /* The elements of PERM do not suggest that only the first operand
8141 is used, but both operands are identical. Allow easier matching
8142 of the permutation by folding the permutation into the single
8143 input vector. */
8144 /* Fall Through. */
8145 case 2:
8146 for (i = 0; i < nelt; ++i)
8147 d.perm[i] &= nelt - 1;
8148 d.op0 = op1;
8149 d.one_vector_p = true;
8150 break;
8151
8152 case 1:
8153 d.op1 = op0;
8154 d.one_vector_p = true;
8155 break;
8156 }
8157
8158 return aarch64_expand_vec_perm_const_1 (&d);
8159 }
8160
8161 static bool
8162 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8163 const unsigned char *sel)
8164 {
8165 struct expand_vec_perm_d d;
8166 unsigned int i, nelt, which;
8167 bool ret;
8168
8169 d.vmode = vmode;
8170 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8171 d.testing_p = true;
8172 memcpy (d.perm, sel, nelt);
8173
8174 /* Calculate whether all elements are in one vector. */
8175 for (i = which = 0; i < nelt; ++i)
8176 {
8177 unsigned char e = d.perm[i];
8178 gcc_assert (e < 2 * nelt);
8179 which |= (e < nelt ? 1 : 2);
8180 }
8181
8182 /* If all elements are from the second vector, reindex as if from the
8183 first vector. */
8184 if (which == 2)
8185 for (i = 0; i < nelt; ++i)
8186 d.perm[i] -= nelt;
8187
8188 /* Check whether the mask can be applied to a single vector. */
8189 d.one_vector_p = (which != 3);
8190
8191 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8192 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8193 if (!d.one_vector_p)
8194 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8195
8196 start_sequence ();
8197 ret = aarch64_expand_vec_perm_const_1 (&d);
8198 end_sequence ();
8199
8200 return ret;
8201 }
8202
8203 #undef TARGET_ADDRESS_COST
8204 #define TARGET_ADDRESS_COST aarch64_address_cost
8205
8206 /* This hook will determines whether unnamed bitfields affect the alignment
8207 of the containing structure. The hook returns true if the structure
8208 should inherit the alignment requirements of an unnamed bitfield's
8209 type. */
8210 #undef TARGET_ALIGN_ANON_BITFIELD
8211 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8212
8213 #undef TARGET_ASM_ALIGNED_DI_OP
8214 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8215
8216 #undef TARGET_ASM_ALIGNED_HI_OP
8217 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8218
8219 #undef TARGET_ASM_ALIGNED_SI_OP
8220 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8221
8222 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8223 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8224 hook_bool_const_tree_hwi_hwi_const_tree_true
8225
8226 #undef TARGET_ASM_FILE_START
8227 #define TARGET_ASM_FILE_START aarch64_start_file
8228
8229 #undef TARGET_ASM_OUTPUT_MI_THUNK
8230 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8231
8232 #undef TARGET_ASM_SELECT_RTX_SECTION
8233 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8234
8235 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8236 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8237
8238 #undef TARGET_BUILD_BUILTIN_VA_LIST
8239 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8240
8241 #undef TARGET_CALLEE_COPIES
8242 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8243
8244 #undef TARGET_CAN_ELIMINATE
8245 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8246
8247 #undef TARGET_CANNOT_FORCE_CONST_MEM
8248 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8249
8250 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8251 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8252
8253 /* Only the least significant bit is used for initialization guard
8254 variables. */
8255 #undef TARGET_CXX_GUARD_MASK_BIT
8256 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8257
8258 #undef TARGET_C_MODE_FOR_SUFFIX
8259 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8260
8261 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8262 #undef TARGET_DEFAULT_TARGET_FLAGS
8263 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8264 #endif
8265
8266 #undef TARGET_CLASS_MAX_NREGS
8267 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8268
8269 #undef TARGET_BUILTIN_DECL
8270 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8271
8272 #undef TARGET_EXPAND_BUILTIN
8273 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8274
8275 #undef TARGET_EXPAND_BUILTIN_VA_START
8276 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8277
8278 #undef TARGET_FOLD_BUILTIN
8279 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8280
8281 #undef TARGET_FUNCTION_ARG
8282 #define TARGET_FUNCTION_ARG aarch64_function_arg
8283
8284 #undef TARGET_FUNCTION_ARG_ADVANCE
8285 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8286
8287 #undef TARGET_FUNCTION_ARG_BOUNDARY
8288 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8289
8290 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8291 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8292
8293 #undef TARGET_FUNCTION_VALUE
8294 #define TARGET_FUNCTION_VALUE aarch64_function_value
8295
8296 #undef TARGET_FUNCTION_VALUE_REGNO_P
8297 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8298
8299 #undef TARGET_FRAME_POINTER_REQUIRED
8300 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8301
8302 #undef TARGET_GIMPLE_FOLD_BUILTIN
8303 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8304
8305 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8306 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8307
8308 #undef TARGET_INIT_BUILTINS
8309 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8310
8311 #undef TARGET_LEGITIMATE_ADDRESS_P
8312 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8313
8314 #undef TARGET_LEGITIMATE_CONSTANT_P
8315 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8316
8317 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8318 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8319
8320 #undef TARGET_LRA_P
8321 #define TARGET_LRA_P aarch64_lra_p
8322
8323 #undef TARGET_MANGLE_TYPE
8324 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8325
8326 #undef TARGET_MEMORY_MOVE_COST
8327 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8328
8329 #undef TARGET_MUST_PASS_IN_STACK
8330 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8331
8332 /* This target hook should return true if accesses to volatile bitfields
8333 should use the narrowest mode possible. It should return false if these
8334 accesses should use the bitfield container type. */
8335 #undef TARGET_NARROW_VOLATILE_BITFIELD
8336 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8337
8338 #undef TARGET_OPTION_OVERRIDE
8339 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8340
8341 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8342 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8343 aarch64_override_options_after_change
8344
8345 #undef TARGET_PASS_BY_REFERENCE
8346 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8347
8348 #undef TARGET_PREFERRED_RELOAD_CLASS
8349 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8350
8351 #undef TARGET_SECONDARY_RELOAD
8352 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8353
8354 #undef TARGET_SHIFT_TRUNCATION_MASK
8355 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8356
8357 #undef TARGET_SETUP_INCOMING_VARARGS
8358 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8359
8360 #undef TARGET_STRUCT_VALUE_RTX
8361 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8362
8363 #undef TARGET_REGISTER_MOVE_COST
8364 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8365
8366 #undef TARGET_RETURN_IN_MEMORY
8367 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8368
8369 #undef TARGET_RETURN_IN_MSB
8370 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8371
8372 #undef TARGET_RTX_COSTS
8373 #define TARGET_RTX_COSTS aarch64_rtx_costs
8374
8375 #undef TARGET_TRAMPOLINE_INIT
8376 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8377
8378 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8379 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8380
8381 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8382 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8383
8384 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8385 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8386
8387 #undef TARGET_VECTORIZE_ADD_STMT_COST
8388 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8389
8390 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8391 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8392 aarch64_builtin_vectorization_cost
8393
8394 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8395 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8396
8397 #undef TARGET_VECTORIZE_BUILTINS
8398 #define TARGET_VECTORIZE_BUILTINS
8399
8400 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8401 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8402 aarch64_builtin_vectorized_function
8403
8404 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8405 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8406 aarch64_autovectorize_vector_sizes
8407
8408 /* Section anchor support. */
8409
8410 #undef TARGET_MIN_ANCHOR_OFFSET
8411 #define TARGET_MIN_ANCHOR_OFFSET -256
8412
8413 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8414 byte offset; we can do much more for larger data types, but have no way
8415 to determine the size of the access. We assume accesses are aligned. */
8416 #undef TARGET_MAX_ANCHOR_OFFSET
8417 #define TARGET_MAX_ANCHOR_OFFSET 4095
8418
8419 #undef TARGET_VECTOR_ALIGNMENT
8420 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8421
8422 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8423 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8424 aarch64_simd_vector_alignment_reachable
8425
8426 /* vec_perm support. */
8427
8428 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8429 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8430 aarch64_vectorize_vec_perm_const_ok
8431
8432
8433 #undef TARGET_FIXED_CONDITION_CODE_REGS
8434 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8435
8436 struct gcc_target targetm = TARGET_INITIALIZER;
8437
8438 #include "gt-aarch64.h"