]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Temporarily remove aarch64_gimple_fold_builtin code for reduction operations
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
83685514
AM
44#include "hashtab.h"
45#include "hash-set.h"
46#include "vec.h"
47#include "machmode.h"
48#include "input.h"
43e9d192
IB
49#include "function.h"
50#include "tm_p.h"
51#include "recog.h"
52#include "langhooks.h"
53#include "diagnostic-core.h"
2fb9a547 54#include "hash-table.h"
2fb9a547
AM
55#include "basic-block.h"
56#include "tree-ssa-alias.h"
57#include "internal-fn.h"
58#include "gimple-fold.h"
59#include "tree-eh.h"
60#include "gimple-expr.h"
61#include "is-a.h"
18f429e2 62#include "gimple.h"
45b0be94 63#include "gimplify.h"
43e9d192
IB
64#include "optabs.h"
65#include "dwarf2.h"
8990e73a
TB
66#include "cfgloop.h"
67#include "tree-vectorizer.h"
d1bcc29f 68#include "aarch64-cost-tables.h"
0ee859b5 69#include "dumpfile.h"
9b2b7279 70#include "builtins.h"
8baff86e 71#include "rtl-iter.h"
43e9d192 72
28514dda
YZ
73/* Defined for convenience. */
74#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
75
43e9d192
IB
76/* Classifies an address.
77
78 ADDRESS_REG_IMM
79 A simple base register plus immediate offset.
80
81 ADDRESS_REG_WB
82 A base register indexed by immediate offset with writeback.
83
84 ADDRESS_REG_REG
85 A base register indexed by (optionally scaled) register.
86
87 ADDRESS_REG_UXTW
88 A base register indexed by (optionally scaled) zero-extended register.
89
90 ADDRESS_REG_SXTW
91 A base register indexed by (optionally scaled) sign-extended register.
92
93 ADDRESS_LO_SUM
94 A LO_SUM rtx with a base register and "LO12" symbol relocation.
95
96 ADDRESS_SYMBOLIC:
97 A constant symbolic address, in pc-relative literal pool. */
98
99enum aarch64_address_type {
100 ADDRESS_REG_IMM,
101 ADDRESS_REG_WB,
102 ADDRESS_REG_REG,
103 ADDRESS_REG_UXTW,
104 ADDRESS_REG_SXTW,
105 ADDRESS_LO_SUM,
106 ADDRESS_SYMBOLIC
107};
108
109struct aarch64_address_info {
110 enum aarch64_address_type type;
111 rtx base;
112 rtx offset;
113 int shift;
114 enum aarch64_symbol_type symbol_type;
115};
116
48063b9d
IB
117struct simd_immediate_info
118{
119 rtx value;
120 int shift;
121 int element_width;
48063b9d 122 bool mvn;
e4f0f84d 123 bool msl;
48063b9d
IB
124};
125
43e9d192
IB
126/* The current code model. */
127enum aarch64_code_model aarch64_cmodel;
128
129#ifdef HAVE_AS_TLS
130#undef TARGET_HAVE_TLS
131#define TARGET_HAVE_TLS 1
132#endif
133
38e8f663 134static bool aarch64_lra_p (void);
43e9d192
IB
135static bool aarch64_composite_type_p (const_tree, enum machine_mode);
136static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
137 const_tree,
138 enum machine_mode *, int *,
139 bool *);
140static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
141static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 142static void aarch64_override_options_after_change (void);
43e9d192
IB
143static bool aarch64_vector_mode_supported_p (enum machine_mode);
144static unsigned bit_count (unsigned HOST_WIDE_INT);
88b08073
JG
145static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
146 const unsigned char *sel);
2961177e 147static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
88b08073 148
43e9d192 149/* The processor for which instructions should be scheduled. */
02fdbd5b 150enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
151
152/* The current tuning set. */
153const struct tune_params *aarch64_tune_params;
154
155/* Mask to specify which instructions we are allowed to generate. */
156unsigned long aarch64_isa_flags = 0;
157
158/* Mask to specify which instruction scheduling options should be used. */
159unsigned long aarch64_tune_flags = 0;
160
161/* Tuning parameters. */
162
163#if HAVE_DESIGNATED_INITIALIZERS
164#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
165#else
166#define NAMED_PARAM(NAME, VAL) (VAL)
167#endif
168
169#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170__extension__
171#endif
43e9d192
IB
172
173#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174__extension__
175#endif
176static const struct cpu_addrcost_table generic_addrcost_table =
177{
67747367
JG
178#if HAVE_DESIGNATED_INITIALIZERS
179 .addr_scale_costs =
180#endif
181 {
67747367
JG
182 NAMED_PARAM (hi, 0),
183 NAMED_PARAM (si, 0),
8d805e02 184 NAMED_PARAM (di, 0),
67747367
JG
185 NAMED_PARAM (ti, 0),
186 },
43e9d192
IB
187 NAMED_PARAM (pre_modify, 0),
188 NAMED_PARAM (post_modify, 0),
189 NAMED_PARAM (register_offset, 0),
190 NAMED_PARAM (register_extend, 0),
191 NAMED_PARAM (imm_offset, 0)
192};
193
60bff090
JG
194#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195__extension__
196#endif
197static const struct cpu_addrcost_table cortexa57_addrcost_table =
198{
199#if HAVE_DESIGNATED_INITIALIZERS
200 .addr_scale_costs =
201#endif
202 {
60bff090
JG
203 NAMED_PARAM (hi, 1),
204 NAMED_PARAM (si, 0),
8d805e02 205 NAMED_PARAM (di, 0),
60bff090
JG
206 NAMED_PARAM (ti, 1),
207 },
208 NAMED_PARAM (pre_modify, 0),
209 NAMED_PARAM (post_modify, 0),
210 NAMED_PARAM (register_offset, 0),
211 NAMED_PARAM (register_extend, 0),
212 NAMED_PARAM (imm_offset, 0),
213};
214
43e9d192
IB
215#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216__extension__
217#endif
218static const struct cpu_regmove_cost generic_regmove_cost =
219{
220 NAMED_PARAM (GP2GP, 1),
221 NAMED_PARAM (GP2FP, 2),
222 NAMED_PARAM (FP2GP, 2),
20b32e50 223 NAMED_PARAM (FP2FP, 2)
43e9d192
IB
224};
225
e4a9c55a
WD
226static const struct cpu_regmove_cost cortexa57_regmove_cost =
227{
228 NAMED_PARAM (GP2GP, 1),
229 /* Avoid the use of slow int<->fp moves for spilling by setting
230 their cost higher than memmov_cost. */
231 NAMED_PARAM (GP2FP, 5),
232 NAMED_PARAM (FP2GP, 5),
233 NAMED_PARAM (FP2FP, 2)
234};
235
236static const struct cpu_regmove_cost cortexa53_regmove_cost =
237{
238 NAMED_PARAM (GP2GP, 1),
239 /* Avoid the use of slow int<->fp moves for spilling by setting
240 their cost higher than memmov_cost. */
241 NAMED_PARAM (GP2FP, 5),
242 NAMED_PARAM (FP2GP, 5),
243 NAMED_PARAM (FP2FP, 2)
244};
245
d1bcc29f
AP
246static const struct cpu_regmove_cost thunderx_regmove_cost =
247{
248 NAMED_PARAM (GP2GP, 2),
249 NAMED_PARAM (GP2FP, 2),
250 NAMED_PARAM (FP2GP, 6),
251 NAMED_PARAM (FP2FP, 4)
252};
253
8990e73a
TB
254/* Generic costs for vector insn classes. */
255#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
256__extension__
257#endif
258static const struct cpu_vector_cost generic_vector_cost =
259{
260 NAMED_PARAM (scalar_stmt_cost, 1),
261 NAMED_PARAM (scalar_load_cost, 1),
262 NAMED_PARAM (scalar_store_cost, 1),
263 NAMED_PARAM (vec_stmt_cost, 1),
264 NAMED_PARAM (vec_to_scalar_cost, 1),
265 NAMED_PARAM (scalar_to_vec_cost, 1),
266 NAMED_PARAM (vec_align_load_cost, 1),
267 NAMED_PARAM (vec_unalign_load_cost, 1),
268 NAMED_PARAM (vec_unalign_store_cost, 1),
269 NAMED_PARAM (vec_store_cost, 1),
270 NAMED_PARAM (cond_taken_branch_cost, 3),
271 NAMED_PARAM (cond_not_taken_branch_cost, 1)
272};
273
60bff090
JG
274/* Generic costs for vector insn classes. */
275#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
276__extension__
277#endif
278static const struct cpu_vector_cost cortexa57_vector_cost =
279{
280 NAMED_PARAM (scalar_stmt_cost, 1),
281 NAMED_PARAM (scalar_load_cost, 4),
282 NAMED_PARAM (scalar_store_cost, 1),
283 NAMED_PARAM (vec_stmt_cost, 3),
284 NAMED_PARAM (vec_to_scalar_cost, 8),
285 NAMED_PARAM (scalar_to_vec_cost, 8),
286 NAMED_PARAM (vec_align_load_cost, 5),
287 NAMED_PARAM (vec_unalign_load_cost, 5),
288 NAMED_PARAM (vec_unalign_store_cost, 1),
289 NAMED_PARAM (vec_store_cost, 1),
290 NAMED_PARAM (cond_taken_branch_cost, 1),
291 NAMED_PARAM (cond_not_taken_branch_cost, 1)
292};
293
43e9d192
IB
294#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
295__extension__
296#endif
297static const struct tune_params generic_tunings =
298{
4e2cd668 299 &cortexa57_extra_costs,
43e9d192
IB
300 &generic_addrcost_table,
301 &generic_regmove_cost,
8990e73a 302 &generic_vector_cost,
d126a4ae
AP
303 NAMED_PARAM (memmov_cost, 4),
304 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
305};
306
984239ad
KT
307static const struct tune_params cortexa53_tunings =
308{
309 &cortexa53_extra_costs,
310 &generic_addrcost_table,
e4a9c55a 311 &cortexa53_regmove_cost,
984239ad 312 &generic_vector_cost,
d126a4ae
AP
313 NAMED_PARAM (memmov_cost, 4),
314 NAMED_PARAM (issue_rate, 2)
984239ad
KT
315};
316
4fd92af6
KT
317static const struct tune_params cortexa57_tunings =
318{
319 &cortexa57_extra_costs,
60bff090 320 &cortexa57_addrcost_table,
e4a9c55a 321 &cortexa57_regmove_cost,
60bff090 322 &cortexa57_vector_cost,
4fd92af6
KT
323 NAMED_PARAM (memmov_cost, 4),
324 NAMED_PARAM (issue_rate, 3)
325};
326
d1bcc29f
AP
327static const struct tune_params thunderx_tunings =
328{
329 &thunderx_extra_costs,
330 &generic_addrcost_table,
331 &thunderx_regmove_cost,
332 &generic_vector_cost,
333 NAMED_PARAM (memmov_cost, 6),
334 NAMED_PARAM (issue_rate, 2)
335};
336
43e9d192
IB
337/* A processor implementing AArch64. */
338struct processor
339{
340 const char *const name;
341 enum aarch64_processor core;
342 const char *arch;
343 const unsigned long flags;
344 const struct tune_params *const tune;
345};
346
347/* Processor cores implementing AArch64. */
348static const struct processor all_cores[] =
349{
192ed1dd 350#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
351 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
352#include "aarch64-cores.def"
353#undef AARCH64_CORE
02fdbd5b 354 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
355 {NULL, aarch64_none, NULL, 0, NULL}
356};
357
358/* Architectures implementing AArch64. */
359static const struct processor all_architectures[] =
360{
361#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
362 {NAME, CORE, #ARCH, FLAGS, NULL},
363#include "aarch64-arches.def"
364#undef AARCH64_ARCH
43e9d192
IB
365 {NULL, aarch64_none, NULL, 0, NULL}
366};
367
368/* Target specification. These are populated as commandline arguments
369 are processed, or NULL if not specified. */
370static const struct processor *selected_arch;
371static const struct processor *selected_cpu;
372static const struct processor *selected_tune;
373
374#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
375
376/* An ISA extension in the co-processor and main instruction set space. */
377struct aarch64_option_extension
378{
379 const char *const name;
380 const unsigned long flags_on;
381 const unsigned long flags_off;
382};
383
384/* ISA extensions in AArch64. */
385static const struct aarch64_option_extension all_extensions[] =
386{
387#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
388 {NAME, FLAGS_ON, FLAGS_OFF},
389#include "aarch64-option-extensions.def"
390#undef AARCH64_OPT_EXTENSION
391 {NULL, 0, 0}
392};
393
394/* Used to track the size of an address when generating a pre/post
395 increment address. */
396static enum machine_mode aarch64_memory_reference_mode;
397
398/* Used to force GTY into this file. */
399static GTY(()) int gty_dummy;
400
401/* A table of valid AArch64 "bitmask immediate" values for
402 logical instructions. */
403
404#define AARCH64_NUM_BITMASKS 5334
405static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
406
43e9d192
IB
407typedef enum aarch64_cond_code
408{
409 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
410 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
411 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
412}
413aarch64_cc;
414
415#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
416
417/* The condition codes of the processor, and the inverse function. */
418static const char * const aarch64_condition_codes[] =
419{
420 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
421 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
422};
423
424/* Provide a mapping from gcc register numbers to dwarf register numbers. */
425unsigned
426aarch64_dbx_register_number (unsigned regno)
427{
428 if (GP_REGNUM_P (regno))
429 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
430 else if (regno == SP_REGNUM)
431 return AARCH64_DWARF_SP;
432 else if (FP_REGNUM_P (regno))
433 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
434
435 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
436 equivalent DWARF register. */
437 return DWARF_FRAME_REGISTERS;
438}
439
440/* Return TRUE if MODE is any of the large INT modes. */
441static bool
442aarch64_vect_struct_mode_p (enum machine_mode mode)
443{
444 return mode == OImode || mode == CImode || mode == XImode;
445}
446
447/* Return TRUE if MODE is any of the vector modes. */
448static bool
449aarch64_vector_mode_p (enum machine_mode mode)
450{
451 return aarch64_vector_mode_supported_p (mode)
452 || aarch64_vect_struct_mode_p (mode);
453}
454
455/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
456static bool
457aarch64_array_mode_supported_p (enum machine_mode mode,
458 unsigned HOST_WIDE_INT nelems)
459{
460 if (TARGET_SIMD
461 && AARCH64_VALID_SIMD_QREG_MODE (mode)
462 && (nelems >= 2 && nelems <= 4))
463 return true;
464
465 return false;
466}
467
468/* Implement HARD_REGNO_NREGS. */
469
470int
471aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
472{
473 switch (aarch64_regno_regclass (regno))
474 {
475 case FP_REGS:
476 case FP_LO_REGS:
477 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
478 default:
479 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 }
481 gcc_unreachable ();
482}
483
484/* Implement HARD_REGNO_MODE_OK. */
485
486int
487aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
488{
489 if (GET_MODE_CLASS (mode) == MODE_CC)
490 return regno == CC_REGNUM;
491
9259db42
YZ
492 if (regno == SP_REGNUM)
493 /* The purpose of comparing with ptr_mode is to support the
494 global register variable associated with the stack pointer
495 register via the syntax of asm ("wsp") in ILP32. */
496 return mode == Pmode || mode == ptr_mode;
497
498 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
499 return mode == Pmode;
500
501 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
502 return 1;
503
504 if (FP_REGNUM_P (regno))
505 {
506 if (aarch64_vect_struct_mode_p (mode))
507 return
508 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
509 else
510 return 1;
511 }
512
513 return 0;
514}
515
73d9ac6a
IB
516/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
517enum machine_mode
518aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
519 enum machine_mode mode)
520{
521 /* Handle modes that fit within single registers. */
522 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
523 {
524 if (GET_MODE_SIZE (mode) >= 4)
525 return mode;
526 else
527 return SImode;
528 }
529 /* Fall back to generic for multi-reg and very large modes. */
530 else
531 return choose_hard_reg_mode (regno, nregs, false);
532}
533
43e9d192
IB
534/* Return true if calls to DECL should be treated as
535 long-calls (ie called via a register). */
536static bool
537aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
538{
539 return false;
540}
541
542/* Return true if calls to symbol-ref SYM should be treated as
543 long-calls (ie called via a register). */
544bool
545aarch64_is_long_call_p (rtx sym)
546{
547 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
548}
549
550/* Return true if the offsets to a zero/sign-extract operation
551 represent an expression that matches an extend operation. The
552 operands represent the paramters from
553
4745e701 554 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192
IB
555bool
556aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
557 rtx extract_imm)
558{
559 HOST_WIDE_INT mult_val, extract_val;
560
561 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
562 return false;
563
564 mult_val = INTVAL (mult_imm);
565 extract_val = INTVAL (extract_imm);
566
567 if (extract_val > 8
568 && extract_val < GET_MODE_BITSIZE (mode)
569 && exact_log2 (extract_val & ~7) > 0
570 && (extract_val & 7) <= 4
571 && mult_val == (1 << (extract_val & 7)))
572 return true;
573
574 return false;
575}
576
577/* Emit an insn that's a simple single-set. Both the operands must be
578 known to be valid. */
579inline static rtx
580emit_set_insn (rtx x, rtx y)
581{
582 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
583}
584
585/* X and Y are two things to compare using CODE. Emit the compare insn and
586 return the rtx for register 0 in the proper mode. */
587rtx
588aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
589{
590 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
591 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
592
593 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
594 return cc_reg;
595}
596
597/* Build the SYMBOL_REF for __tls_get_addr. */
598
599static GTY(()) rtx tls_get_addr_libfunc;
600
601rtx
602aarch64_tls_get_addr (void)
603{
604 if (!tls_get_addr_libfunc)
605 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
606 return tls_get_addr_libfunc;
607}
608
609/* Return the TLS model to use for ADDR. */
610
611static enum tls_model
612tls_symbolic_operand_type (rtx addr)
613{
614 enum tls_model tls_kind = TLS_MODEL_NONE;
615 rtx sym, addend;
616
617 if (GET_CODE (addr) == CONST)
618 {
619 split_const (addr, &sym, &addend);
620 if (GET_CODE (sym) == SYMBOL_REF)
621 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
622 }
623 else if (GET_CODE (addr) == SYMBOL_REF)
624 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
625
626 return tls_kind;
627}
628
629/* We'll allow lo_sum's in addresses in our legitimate addresses
630 so that combine would take care of combining addresses where
631 necessary, but for generation purposes, we'll generate the address
632 as :
633 RTL Absolute
634 tmp = hi (symbol_ref); adrp x1, foo
635 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
636 nop
637
638 PIC TLS
639 adrp x1, :got:foo adrp tmp, :tlsgd:foo
640 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
641 bl __tls_get_addr
642 nop
643
644 Load TLS symbol, depending on TLS mechanism and TLS access model.
645
646 Global Dynamic - Traditional TLS:
647 adrp tmp, :tlsgd:imm
648 add dest, tmp, #:tlsgd_lo12:imm
649 bl __tls_get_addr
650
651 Global Dynamic - TLS Descriptors:
652 adrp dest, :tlsdesc:imm
653 ldr tmp, [dest, #:tlsdesc_lo12:imm]
654 add dest, dest, #:tlsdesc_lo12:imm
655 blr tmp
656 mrs tp, tpidr_el0
657 add dest, dest, tp
658
659 Initial Exec:
660 mrs tp, tpidr_el0
661 adrp tmp, :gottprel:imm
662 ldr dest, [tmp, #:gottprel_lo12:imm]
663 add dest, dest, tp
664
665 Local Exec:
666 mrs tp, tpidr_el0
667 add t0, tp, #:tprel_hi12:imm
668 add t0, #:tprel_lo12_nc:imm
669*/
670
671static void
672aarch64_load_symref_appropriately (rtx dest, rtx imm,
673 enum aarch64_symbol_type type)
674{
675 switch (type)
676 {
677 case SYMBOL_SMALL_ABSOLUTE:
678 {
28514dda 679 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 680 rtx tmp_reg = dest;
28514dda
YZ
681 enum machine_mode mode = GET_MODE (dest);
682
683 gcc_assert (mode == Pmode || mode == ptr_mode);
684
43e9d192 685 if (can_create_pseudo_p ())
28514dda 686 tmp_reg = gen_reg_rtx (mode);
43e9d192 687
28514dda 688 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
689 emit_insn (gen_add_losym (dest, tmp_reg, imm));
690 return;
691 }
692
a5350ddc
CSS
693 case SYMBOL_TINY_ABSOLUTE:
694 emit_insn (gen_rtx_SET (Pmode, dest, imm));
695 return;
696
43e9d192
IB
697 case SYMBOL_SMALL_GOT:
698 {
28514dda
YZ
699 /* In ILP32, the mode of dest can be either SImode or DImode,
700 while the got entry is always of SImode size. The mode of
701 dest depends on how dest is used: if dest is assigned to a
702 pointer (e.g. in the memory), it has SImode; it may have
703 DImode if dest is dereferenced to access the memeory.
704 This is why we have to handle three different ldr_got_small
705 patterns here (two patterns for ILP32). */
43e9d192 706 rtx tmp_reg = dest;
28514dda
YZ
707 enum machine_mode mode = GET_MODE (dest);
708
43e9d192 709 if (can_create_pseudo_p ())
28514dda
YZ
710 tmp_reg = gen_reg_rtx (mode);
711
712 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
713 if (mode == ptr_mode)
714 {
715 if (mode == DImode)
716 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
717 else
718 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
719 }
720 else
721 {
722 gcc_assert (mode == Pmode);
723 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
724 }
725
43e9d192
IB
726 return;
727 }
728
729 case SYMBOL_SMALL_TLSGD:
730 {
5d8a22a5 731 rtx_insn *insns;
43e9d192
IB
732 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
733
734 start_sequence ();
78607708 735 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
736 insns = get_insns ();
737 end_sequence ();
738
739 RTL_CONST_CALL_P (insns) = 1;
740 emit_libcall_block (insns, dest, result, imm);
741 return;
742 }
743
744 case SYMBOL_SMALL_TLSDESC:
745 {
621ad2de
AP
746 enum machine_mode mode = GET_MODE (dest);
747 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
748 rtx tp;
749
621ad2de
AP
750 gcc_assert (mode == Pmode || mode == ptr_mode);
751
752 /* In ILP32, the got entry is always of SImode size. Unlike
753 small GOT, the dest is fixed at reg 0. */
754 if (TARGET_ILP32)
755 emit_insn (gen_tlsdesc_small_si (imm));
756 else
757 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 758 tp = aarch64_load_tp (NULL);
621ad2de
AP
759
760 if (mode != Pmode)
761 tp = gen_lowpart (mode, tp);
762
763 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
764 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
765 return;
766 }
767
768 case SYMBOL_SMALL_GOTTPREL:
769 {
621ad2de
AP
770 /* In ILP32, the mode of dest can be either SImode or DImode,
771 while the got entry is always of SImode size. The mode of
772 dest depends on how dest is used: if dest is assigned to a
773 pointer (e.g. in the memory), it has SImode; it may have
774 DImode if dest is dereferenced to access the memeory.
775 This is why we have to handle three different tlsie_small
776 patterns here (two patterns for ILP32). */
777 enum machine_mode mode = GET_MODE (dest);
778 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 779 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
780
781 if (mode == ptr_mode)
782 {
783 if (mode == DImode)
784 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
785 else
786 {
787 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
788 tp = gen_lowpart (mode, tp);
789 }
790 }
791 else
792 {
793 gcc_assert (mode == Pmode);
794 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
795 }
796
797 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
798 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
799 return;
800 }
801
802 case SYMBOL_SMALL_TPREL:
803 {
804 rtx tp = aarch64_load_tp (NULL);
805 emit_insn (gen_tlsle_small (dest, tp, imm));
806 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
807 return;
808 }
809
87dd8ab0
MS
810 case SYMBOL_TINY_GOT:
811 emit_insn (gen_ldr_got_tiny (dest, imm));
812 return;
813
43e9d192
IB
814 default:
815 gcc_unreachable ();
816 }
817}
818
819/* Emit a move from SRC to DEST. Assume that the move expanders can
820 handle all moves if !can_create_pseudo_p (). The distinction is
821 important because, unlike emit_move_insn, the move expanders know
822 how to force Pmode objects into the constant pool even when the
823 constant pool address is not itself legitimate. */
824static rtx
825aarch64_emit_move (rtx dest, rtx src)
826{
827 return (can_create_pseudo_p ()
828 ? emit_move_insn (dest, src)
829 : emit_move_insn_1 (dest, src));
830}
831
030d03b8
RE
832/* Split a 128-bit move operation into two 64-bit move operations,
833 taking care to handle partial overlap of register to register
834 copies. Special cases are needed when moving between GP regs and
835 FP regs. SRC can be a register, constant or memory; DST a register
836 or memory. If either operand is memory it must not have any side
837 effects. */
43e9d192
IB
838void
839aarch64_split_128bit_move (rtx dst, rtx src)
840{
030d03b8
RE
841 rtx dst_lo, dst_hi;
842 rtx src_lo, src_hi;
43e9d192 843
030d03b8 844 enum machine_mode mode = GET_MODE (dst);
12dc6974 845
030d03b8
RE
846 gcc_assert (mode == TImode || mode == TFmode);
847 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
848 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
849
850 if (REG_P (dst) && REG_P (src))
851 {
030d03b8
RE
852 int src_regno = REGNO (src);
853 int dst_regno = REGNO (dst);
43e9d192 854
030d03b8 855 /* Handle FP <-> GP regs. */
43e9d192
IB
856 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
857 {
030d03b8
RE
858 src_lo = gen_lowpart (word_mode, src);
859 src_hi = gen_highpart (word_mode, src);
860
861 if (mode == TImode)
862 {
863 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
864 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
865 }
866 else
867 {
868 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
869 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
870 }
871 return;
43e9d192
IB
872 }
873 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
874 {
030d03b8
RE
875 dst_lo = gen_lowpart (word_mode, dst);
876 dst_hi = gen_highpart (word_mode, dst);
877
878 if (mode == TImode)
879 {
880 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
881 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
882 }
883 else
884 {
885 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
886 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
887 }
888 return;
43e9d192 889 }
43e9d192
IB
890 }
891
030d03b8
RE
892 dst_lo = gen_lowpart (word_mode, dst);
893 dst_hi = gen_highpart (word_mode, dst);
894 src_lo = gen_lowpart (word_mode, src);
895 src_hi = gen_highpart_mode (word_mode, mode, src);
896
897 /* At most one pairing may overlap. */
898 if (reg_overlap_mentioned_p (dst_lo, src_hi))
899 {
900 aarch64_emit_move (dst_hi, src_hi);
901 aarch64_emit_move (dst_lo, src_lo);
902 }
903 else
904 {
905 aarch64_emit_move (dst_lo, src_lo);
906 aarch64_emit_move (dst_hi, src_hi);
907 }
43e9d192
IB
908}
909
910bool
911aarch64_split_128bit_move_p (rtx dst, rtx src)
912{
913 return (! REG_P (src)
914 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
915}
916
8b033a8a
SN
917/* Split a complex SIMD combine. */
918
919void
920aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
921{
922 enum machine_mode src_mode = GET_MODE (src1);
923 enum machine_mode dst_mode = GET_MODE (dst);
924
925 gcc_assert (VECTOR_MODE_P (dst_mode));
926
927 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
928 {
929 rtx (*gen) (rtx, rtx, rtx);
930
931 switch (src_mode)
932 {
933 case V8QImode:
934 gen = gen_aarch64_simd_combinev8qi;
935 break;
936 case V4HImode:
937 gen = gen_aarch64_simd_combinev4hi;
938 break;
939 case V2SImode:
940 gen = gen_aarch64_simd_combinev2si;
941 break;
942 case V2SFmode:
943 gen = gen_aarch64_simd_combinev2sf;
944 break;
945 case DImode:
946 gen = gen_aarch64_simd_combinedi;
947 break;
948 case DFmode:
949 gen = gen_aarch64_simd_combinedf;
950 break;
951 default:
952 gcc_unreachable ();
953 }
954
955 emit_insn (gen (dst, src1, src2));
956 return;
957 }
958}
959
fd4842cd
SN
960/* Split a complex SIMD move. */
961
962void
963aarch64_split_simd_move (rtx dst, rtx src)
964{
965 enum machine_mode src_mode = GET_MODE (src);
966 enum machine_mode dst_mode = GET_MODE (dst);
967
968 gcc_assert (VECTOR_MODE_P (dst_mode));
969
970 if (REG_P (dst) && REG_P (src))
971 {
c59b7e28
SN
972 rtx (*gen) (rtx, rtx);
973
fd4842cd
SN
974 gcc_assert (VECTOR_MODE_P (src_mode));
975
976 switch (src_mode)
977 {
978 case V16QImode:
c59b7e28 979 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
980 break;
981 case V8HImode:
c59b7e28 982 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
983 break;
984 case V4SImode:
c59b7e28 985 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
986 break;
987 case V2DImode:
c59b7e28 988 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
989 break;
990 case V4SFmode:
c59b7e28 991 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
992 break;
993 case V2DFmode:
c59b7e28 994 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
995 break;
996 default:
997 gcc_unreachable ();
998 }
c59b7e28
SN
999
1000 emit_insn (gen (dst, src));
fd4842cd
SN
1001 return;
1002 }
1003}
1004
43e9d192 1005static rtx
e18b4a81 1006aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
1007{
1008 if (can_create_pseudo_p ())
e18b4a81 1009 return force_reg (mode, value);
43e9d192
IB
1010 else
1011 {
1012 x = aarch64_emit_move (x, value);
1013 return x;
1014 }
1015}
1016
1017
1018static rtx
1019aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1020{
9c023bf0 1021 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
1022 {
1023 rtx high;
1024 /* Load the full offset into a register. This
1025 might be improvable in the future. */
1026 high = GEN_INT (offset);
1027 offset = 0;
e18b4a81
YZ
1028 high = aarch64_force_temporary (mode, temp, high);
1029 reg = aarch64_force_temporary (mode, temp,
1030 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1031 }
1032 return plus_constant (mode, reg, offset);
1033}
1034
1035void
1036aarch64_expand_mov_immediate (rtx dest, rtx imm)
1037{
1038 enum machine_mode mode = GET_MODE (dest);
1039 unsigned HOST_WIDE_INT mask;
1040 int i;
1041 bool first;
1042 unsigned HOST_WIDE_INT val;
1043 bool subtargets;
1044 rtx subtarget;
c747993a 1045 int one_match, zero_match, first_not_ffff_match;
43e9d192
IB
1046
1047 gcc_assert (mode == SImode || mode == DImode);
1048
1049 /* Check on what type of symbol it is. */
1050 if (GET_CODE (imm) == SYMBOL_REF
1051 || GET_CODE (imm) == LABEL_REF
1052 || GET_CODE (imm) == CONST)
1053 {
1054 rtx mem, base, offset;
1055 enum aarch64_symbol_type sty;
1056
1057 /* If we have (const (plus symbol offset)), separate out the offset
1058 before we start classifying the symbol. */
1059 split_const (imm, &base, &offset);
1060
1061 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1062 switch (sty)
1063 {
1064 case SYMBOL_FORCE_TO_MEM:
1065 if (offset != const0_rtx
1066 && targetm.cannot_force_const_mem (mode, imm))
1067 {
aef66c94 1068 gcc_assert (can_create_pseudo_p ());
e18b4a81 1069 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1070 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1071 aarch64_emit_move (dest, base);
1072 return;
1073 }
28514dda 1074 mem = force_const_mem (ptr_mode, imm);
43e9d192 1075 gcc_assert (mem);
28514dda
YZ
1076 if (mode != ptr_mode)
1077 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
1078 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1079 return;
1080
1081 case SYMBOL_SMALL_TLSGD:
1082 case SYMBOL_SMALL_TLSDESC:
1083 case SYMBOL_SMALL_GOTTPREL:
1084 case SYMBOL_SMALL_GOT:
87dd8ab0 1085 case SYMBOL_TINY_GOT:
43e9d192
IB
1086 if (offset != const0_rtx)
1087 {
1088 gcc_assert(can_create_pseudo_p ());
e18b4a81 1089 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1090 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1091 aarch64_emit_move (dest, base);
1092 return;
1093 }
1094 /* FALLTHRU */
1095
1096 case SYMBOL_SMALL_TPREL:
1097 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 1098 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
1099 aarch64_load_symref_appropriately (dest, imm, sty);
1100 return;
1101
1102 default:
1103 gcc_unreachable ();
1104 }
1105 }
1106
1107 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1108 {
1109 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1110 return;
1111 }
1112
1113 if (!CONST_INT_P (imm))
1114 {
1115 if (GET_CODE (imm) == HIGH)
1116 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1117 else
1118 {
1119 rtx mem = force_const_mem (mode, imm);
1120 gcc_assert (mem);
1121 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1122 }
1123
1124 return;
1125 }
1126
1127 if (mode == SImode)
1128 {
1129 /* We know we can't do this in 1 insn, and we must be able to do it
1130 in two; so don't mess around looking for sequences that don't buy
1131 us anything. */
1132 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1133 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1134 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1135 return;
1136 }
1137
1138 /* Remaining cases are all for DImode. */
1139
1140 val = INTVAL (imm);
1141 subtargets = optimize && can_create_pseudo_p ();
1142
1143 one_match = 0;
1144 zero_match = 0;
1145 mask = 0xffff;
c747993a 1146 first_not_ffff_match = -1;
43e9d192
IB
1147
1148 for (i = 0; i < 64; i += 16, mask <<= 16)
1149 {
c747993a 1150 if ((val & mask) == mask)
43e9d192 1151 one_match++;
c747993a
IB
1152 else
1153 {
1154 if (first_not_ffff_match < 0)
1155 first_not_ffff_match = i;
1156 if ((val & mask) == 0)
1157 zero_match++;
1158 }
43e9d192
IB
1159 }
1160
1161 if (one_match == 2)
1162 {
c747993a
IB
1163 /* Set one of the quarters and then insert back into result. */
1164 mask = 0xffffll << first_not_ffff_match;
1165 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1166 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1167 GEN_INT ((val >> first_not_ffff_match)
1168 & 0xffff)));
1169 return;
1170 }
1171
43e9d192
IB
1172 if (zero_match == 2)
1173 goto simple_sequence;
1174
1175 mask = 0x0ffff0000UL;
1176 for (i = 16; i < 64; i += 16, mask <<= 16)
1177 {
1178 HOST_WIDE_INT comp = mask & ~(mask - 1);
1179
1180 if (aarch64_uimm12_shift (val - (val & mask)))
1181 {
1182 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1183
1184 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1185 emit_insn (gen_adddi3 (dest, subtarget,
1186 GEN_INT (val - (val & mask))));
1187 return;
1188 }
1189 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1190 {
1191 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1192
1193 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1194 GEN_INT ((val + comp) & mask)));
1195 emit_insn (gen_adddi3 (dest, subtarget,
1196 GEN_INT (val - ((val + comp) & mask))));
1197 return;
1198 }
1199 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1200 {
1201 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1202
1203 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1204 GEN_INT ((val - comp) | ~mask)));
1205 emit_insn (gen_adddi3 (dest, subtarget,
1206 GEN_INT (val - ((val - comp) | ~mask))));
1207 return;
1208 }
1209 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1210 {
1211 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1212
1213 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1214 GEN_INT (val | ~mask)));
1215 emit_insn (gen_adddi3 (dest, subtarget,
1216 GEN_INT (val - (val | ~mask))));
1217 return;
1218 }
1219 }
1220
1221 /* See if we can do it by arithmetically combining two
1222 immediates. */
1223 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1224 {
1225 int j;
1226 mask = 0xffff;
1227
1228 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1229 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1230 {
1231 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1232 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1233 GEN_INT (aarch64_bitmasks[i])));
1234 emit_insn (gen_adddi3 (dest, subtarget,
1235 GEN_INT (val - aarch64_bitmasks[i])));
1236 return;
1237 }
1238
1239 for (j = 0; j < 64; j += 16, mask <<= 16)
1240 {
1241 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1242 {
1243 emit_insn (gen_rtx_SET (VOIDmode, dest,
1244 GEN_INT (aarch64_bitmasks[i])));
1245 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1246 GEN_INT ((val >> j) & 0xffff)));
1247 return;
1248 }
1249 }
1250 }
1251
1252 /* See if we can do it by logically combining two immediates. */
1253 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1254 {
1255 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1256 {
1257 int j;
1258
1259 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1260 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1261 {
1262 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1263 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1264 GEN_INT (aarch64_bitmasks[i])));
1265 emit_insn (gen_iordi3 (dest, subtarget,
1266 GEN_INT (aarch64_bitmasks[j])));
1267 return;
1268 }
1269 }
1270 else if ((val & aarch64_bitmasks[i]) == val)
1271 {
1272 int j;
1273
1274 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1275 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1276 {
1277
1278 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1279 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1280 GEN_INT (aarch64_bitmasks[j])));
1281 emit_insn (gen_anddi3 (dest, subtarget,
1282 GEN_INT (aarch64_bitmasks[i])));
1283 return;
1284 }
1285 }
1286 }
1287
2c274197
KT
1288 if (one_match > zero_match)
1289 {
1290 /* Set either first three quarters or all but the third. */
1291 mask = 0xffffll << (16 - first_not_ffff_match);
1292 emit_insn (gen_rtx_SET (VOIDmode, dest,
1293 GEN_INT (val | mask | 0xffffffff00000000ull)));
1294
1295 /* Now insert other two quarters. */
1296 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1297 i < 64; i += 16, mask <<= 16)
1298 {
1299 if ((val & mask) != mask)
1300 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1301 GEN_INT ((val >> i) & 0xffff)));
1302 }
1303 return;
1304 }
1305
43e9d192
IB
1306 simple_sequence:
1307 first = true;
1308 mask = 0xffff;
1309 for (i = 0; i < 64; i += 16, mask <<= 16)
1310 {
1311 if ((val & mask) != 0)
1312 {
1313 if (first)
1314 {
1315 emit_insn (gen_rtx_SET (VOIDmode, dest,
1316 GEN_INT (val & mask)));
1317 first = false;
1318 }
1319 else
1320 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1321 GEN_INT ((val >> i) & 0xffff)));
1322 }
1323 }
1324}
1325
1326static bool
fee9ba42
JW
1327aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1328 tree exp ATTRIBUTE_UNUSED)
43e9d192 1329{
fee9ba42 1330 /* Currently, always true. */
43e9d192
IB
1331 return true;
1332}
1333
1334/* Implement TARGET_PASS_BY_REFERENCE. */
1335
1336static bool
1337aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1338 enum machine_mode mode,
1339 const_tree type,
1340 bool named ATTRIBUTE_UNUSED)
1341{
1342 HOST_WIDE_INT size;
1343 enum machine_mode dummymode;
1344 int nregs;
1345
1346 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1347 size = (mode == BLKmode && type)
1348 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1349
aadc1c43
MHD
1350 /* Aggregates are passed by reference based on their size. */
1351 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1352 {
aadc1c43 1353 size = int_size_in_bytes (type);
43e9d192
IB
1354 }
1355
1356 /* Variable sized arguments are always returned by reference. */
1357 if (size < 0)
1358 return true;
1359
1360 /* Can this be a candidate to be passed in fp/simd register(s)? */
1361 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1362 &dummymode, &nregs,
1363 NULL))
1364 return false;
1365
1366 /* Arguments which are variable sized or larger than 2 registers are
1367 passed by reference unless they are a homogenous floating point
1368 aggregate. */
1369 return size > 2 * UNITS_PER_WORD;
1370}
1371
1372/* Return TRUE if VALTYPE is padded to its least significant bits. */
1373static bool
1374aarch64_return_in_msb (const_tree valtype)
1375{
1376 enum machine_mode dummy_mode;
1377 int dummy_int;
1378
1379 /* Never happens in little-endian mode. */
1380 if (!BYTES_BIG_ENDIAN)
1381 return false;
1382
1383 /* Only composite types smaller than or equal to 16 bytes can
1384 be potentially returned in registers. */
1385 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1386 || int_size_in_bytes (valtype) <= 0
1387 || int_size_in_bytes (valtype) > 16)
1388 return false;
1389
1390 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1391 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1392 is always passed/returned in the least significant bits of fp/simd
1393 register(s). */
1394 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1395 &dummy_mode, &dummy_int, NULL))
1396 return false;
1397
1398 return true;
1399}
1400
1401/* Implement TARGET_FUNCTION_VALUE.
1402 Define how to find the value returned by a function. */
1403
1404static rtx
1405aarch64_function_value (const_tree type, const_tree func,
1406 bool outgoing ATTRIBUTE_UNUSED)
1407{
1408 enum machine_mode mode;
1409 int unsignedp;
1410 int count;
1411 enum machine_mode ag_mode;
1412
1413 mode = TYPE_MODE (type);
1414 if (INTEGRAL_TYPE_P (type))
1415 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1416
1417 if (aarch64_return_in_msb (type))
1418 {
1419 HOST_WIDE_INT size = int_size_in_bytes (type);
1420
1421 if (size % UNITS_PER_WORD != 0)
1422 {
1423 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1424 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1425 }
1426 }
1427
1428 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1429 &ag_mode, &count, NULL))
1430 {
1431 if (!aarch64_composite_type_p (type, mode))
1432 {
1433 gcc_assert (count == 1 && mode == ag_mode);
1434 return gen_rtx_REG (mode, V0_REGNUM);
1435 }
1436 else
1437 {
1438 int i;
1439 rtx par;
1440
1441 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1442 for (i = 0; i < count; i++)
1443 {
1444 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1445 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1446 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1447 XVECEXP (par, 0, i) = tmp;
1448 }
1449 return par;
1450 }
1451 }
1452 else
1453 return gen_rtx_REG (mode, R0_REGNUM);
1454}
1455
1456/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1457 Return true if REGNO is the number of a hard register in which the values
1458 of called function may come back. */
1459
1460static bool
1461aarch64_function_value_regno_p (const unsigned int regno)
1462{
1463 /* Maximum of 16 bytes can be returned in the general registers. Examples
1464 of 16-byte return values are: 128-bit integers and 16-byte small
1465 structures (excluding homogeneous floating-point aggregates). */
1466 if (regno == R0_REGNUM || regno == R1_REGNUM)
1467 return true;
1468
1469 /* Up to four fp/simd registers can return a function value, e.g. a
1470 homogeneous floating-point aggregate having four members. */
1471 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1472 return !TARGET_GENERAL_REGS_ONLY;
1473
1474 return false;
1475}
1476
1477/* Implement TARGET_RETURN_IN_MEMORY.
1478
1479 If the type T of the result of a function is such that
1480 void func (T arg)
1481 would require that arg be passed as a value in a register (or set of
1482 registers) according to the parameter passing rules, then the result
1483 is returned in the same registers as would be used for such an
1484 argument. */
1485
1486static bool
1487aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1488{
1489 HOST_WIDE_INT size;
1490 enum machine_mode ag_mode;
1491 int count;
1492
1493 if (!AGGREGATE_TYPE_P (type)
1494 && TREE_CODE (type) != COMPLEX_TYPE
1495 && TREE_CODE (type) != VECTOR_TYPE)
1496 /* Simple scalar types always returned in registers. */
1497 return false;
1498
1499 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1500 type,
1501 &ag_mode,
1502 &count,
1503 NULL))
1504 return false;
1505
1506 /* Types larger than 2 registers returned in memory. */
1507 size = int_size_in_bytes (type);
1508 return (size < 0 || size > 2 * UNITS_PER_WORD);
1509}
1510
1511static bool
1512aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1513 const_tree type, int *nregs)
1514{
1515 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1516 return aarch64_vfp_is_call_or_return_candidate (mode,
1517 type,
1518 &pcum->aapcs_vfp_rmode,
1519 nregs,
1520 NULL);
1521}
1522
1523/* Given MODE and TYPE of a function argument, return the alignment in
1524 bits. The idea is to suppress any stronger alignment requested by
1525 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1526 This is a helper function for local use only. */
1527
1528static unsigned int
1529aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1530{
1531 unsigned int alignment;
1532
1533 if (type)
1534 {
1535 if (!integer_zerop (TYPE_SIZE (type)))
1536 {
1537 if (TYPE_MODE (type) == mode)
1538 alignment = TYPE_ALIGN (type);
1539 else
1540 alignment = GET_MODE_ALIGNMENT (mode);
1541 }
1542 else
1543 alignment = 0;
1544 }
1545 else
1546 alignment = GET_MODE_ALIGNMENT (mode);
1547
1548 return alignment;
1549}
1550
1551/* Layout a function argument according to the AAPCS64 rules. The rule
1552 numbers refer to the rule numbers in the AAPCS64. */
1553
1554static void
1555aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1556 const_tree type,
1557 bool named ATTRIBUTE_UNUSED)
1558{
1559 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1560 int ncrn, nvrn, nregs;
1561 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1562 HOST_WIDE_INT size;
43e9d192
IB
1563
1564 /* We need to do this once per argument. */
1565 if (pcum->aapcs_arg_processed)
1566 return;
1567
1568 pcum->aapcs_arg_processed = true;
1569
3abf17cf
YZ
1570 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1571 size
1572 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1573 UNITS_PER_WORD);
1574
43e9d192
IB
1575 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1576 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1577 mode,
1578 type,
1579 &nregs);
1580
1581 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1582 The following code thus handles passing by SIMD/FP registers first. */
1583
1584 nvrn = pcum->aapcs_nvrn;
1585
1586 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1587 and homogenous short-vector aggregates (HVA). */
1588 if (allocate_nvrn)
1589 {
1590 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1591 {
1592 pcum->aapcs_nextnvrn = nvrn + nregs;
1593 if (!aarch64_composite_type_p (type, mode))
1594 {
1595 gcc_assert (nregs == 1);
1596 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1597 }
1598 else
1599 {
1600 rtx par;
1601 int i;
1602 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1603 for (i = 0; i < nregs; i++)
1604 {
1605 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1606 V0_REGNUM + nvrn + i);
1607 tmp = gen_rtx_EXPR_LIST
1608 (VOIDmode, tmp,
1609 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1610 XVECEXP (par, 0, i) = tmp;
1611 }
1612 pcum->aapcs_reg = par;
1613 }
1614 return;
1615 }
1616 else
1617 {
1618 /* C.3 NSRN is set to 8. */
1619 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1620 goto on_stack;
1621 }
1622 }
1623
1624 ncrn = pcum->aapcs_ncrn;
3abf17cf 1625 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1626
1627 /* C6 - C9. though the sign and zero extension semantics are
1628 handled elsewhere. This is the case where the argument fits
1629 entirely general registers. */
1630 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1631 {
1632 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1633
1634 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1635
1636 /* C.8 if the argument has an alignment of 16 then the NGRN is
1637 rounded up to the next even number. */
1638 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1639 {
1640 ++ncrn;
1641 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1642 }
1643 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1644 A reg is still generated for it, but the caller should be smart
1645 enough not to use it. */
1646 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1647 {
1648 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1649 }
1650 else
1651 {
1652 rtx par;
1653 int i;
1654
1655 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1656 for (i = 0; i < nregs; i++)
1657 {
1658 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1659 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1660 GEN_INT (i * UNITS_PER_WORD));
1661 XVECEXP (par, 0, i) = tmp;
1662 }
1663 pcum->aapcs_reg = par;
1664 }
1665
1666 pcum->aapcs_nextncrn = ncrn + nregs;
1667 return;
1668 }
1669
1670 /* C.11 */
1671 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1672
1673 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1674 this argument and align the total size if necessary. */
43e9d192 1675on_stack:
3abf17cf 1676 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1677 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1678 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1679 16 / UNITS_PER_WORD);
43e9d192
IB
1680 return;
1681}
1682
1683/* Implement TARGET_FUNCTION_ARG. */
1684
1685static rtx
1686aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1687 const_tree type, bool named)
1688{
1689 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1690 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1691
1692 if (mode == VOIDmode)
1693 return NULL_RTX;
1694
1695 aarch64_layout_arg (pcum_v, mode, type, named);
1696 return pcum->aapcs_reg;
1697}
1698
1699void
1700aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1701 const_tree fntype ATTRIBUTE_UNUSED,
1702 rtx libname ATTRIBUTE_UNUSED,
1703 const_tree fndecl ATTRIBUTE_UNUSED,
1704 unsigned n_named ATTRIBUTE_UNUSED)
1705{
1706 pcum->aapcs_ncrn = 0;
1707 pcum->aapcs_nvrn = 0;
1708 pcum->aapcs_nextncrn = 0;
1709 pcum->aapcs_nextnvrn = 0;
1710 pcum->pcs_variant = ARM_PCS_AAPCS64;
1711 pcum->aapcs_reg = NULL_RTX;
1712 pcum->aapcs_arg_processed = false;
1713 pcum->aapcs_stack_words = 0;
1714 pcum->aapcs_stack_size = 0;
1715
1716 return;
1717}
1718
1719static void
1720aarch64_function_arg_advance (cumulative_args_t pcum_v,
1721 enum machine_mode mode,
1722 const_tree type,
1723 bool named)
1724{
1725 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1726 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1727 {
1728 aarch64_layout_arg (pcum_v, mode, type, named);
1729 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1730 != (pcum->aapcs_stack_words != 0));
1731 pcum->aapcs_arg_processed = false;
1732 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1733 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1734 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1735 pcum->aapcs_stack_words = 0;
1736 pcum->aapcs_reg = NULL_RTX;
1737 }
1738}
1739
1740bool
1741aarch64_function_arg_regno_p (unsigned regno)
1742{
1743 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1744 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1745}
1746
1747/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1748 PARM_BOUNDARY bits of alignment, but will be given anything up
1749 to STACK_BOUNDARY bits if the type requires it. This makes sure
1750 that both before and after the layout of each argument, the Next
1751 Stacked Argument Address (NSAA) will have a minimum alignment of
1752 8 bytes. */
1753
1754static unsigned int
1755aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1756{
1757 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1758
1759 if (alignment < PARM_BOUNDARY)
1760 alignment = PARM_BOUNDARY;
1761 if (alignment > STACK_BOUNDARY)
1762 alignment = STACK_BOUNDARY;
1763 return alignment;
1764}
1765
1766/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1767
1768 Return true if an argument passed on the stack should be padded upwards,
1769 i.e. if the least-significant byte of the stack slot has useful data.
1770
1771 Small aggregate types are placed in the lowest memory address.
1772
1773 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1774
1775bool
1776aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1777{
1778 /* On little-endian targets, the least significant byte of every stack
1779 argument is passed at the lowest byte address of the stack slot. */
1780 if (!BYTES_BIG_ENDIAN)
1781 return true;
1782
00edcfbe 1783 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1784 the least significant byte of a stack argument is passed at the highest
1785 byte address of the stack slot. */
1786 if (type
00edcfbe
YZ
1787 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1788 || POINTER_TYPE_P (type))
43e9d192
IB
1789 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1790 return false;
1791
1792 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1793 return true;
1794}
1795
1796/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1797
1798 It specifies padding for the last (may also be the only)
1799 element of a block move between registers and memory. If
1800 assuming the block is in the memory, padding upward means that
1801 the last element is padded after its highest significant byte,
1802 while in downward padding, the last element is padded at the
1803 its least significant byte side.
1804
1805 Small aggregates and small complex types are always padded
1806 upwards.
1807
1808 We don't need to worry about homogeneous floating-point or
1809 short-vector aggregates; their move is not affected by the
1810 padding direction determined here. Regardless of endianness,
1811 each element of such an aggregate is put in the least
1812 significant bits of a fp/simd register.
1813
1814 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1815 register has useful data, and return the opposite if the most
1816 significant byte does. */
1817
1818bool
1819aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1820 bool first ATTRIBUTE_UNUSED)
1821{
1822
1823 /* Small composite types are always padded upward. */
1824 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1825 {
1826 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1827 : GET_MODE_SIZE (mode));
1828 if (size < 2 * UNITS_PER_WORD)
1829 return true;
1830 }
1831
1832 /* Otherwise, use the default padding. */
1833 return !BYTES_BIG_ENDIAN;
1834}
1835
1836static enum machine_mode
1837aarch64_libgcc_cmp_return_mode (void)
1838{
1839 return SImode;
1840}
1841
1842static bool
1843aarch64_frame_pointer_required (void)
1844{
0b7f8166
MS
1845 /* In aarch64_override_options_after_change
1846 flag_omit_leaf_frame_pointer turns off the frame pointer by
1847 default. Turn it back on now if we've not got a leaf
1848 function. */
1849 if (flag_omit_leaf_frame_pointer
1850 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1851 return true;
43e9d192 1852
0b7f8166 1853 return false;
43e9d192
IB
1854}
1855
1856/* Mark the registers that need to be saved by the callee and calculate
1857 the size of the callee-saved registers area and frame record (both FP
1858 and LR may be omitted). */
1859static void
1860aarch64_layout_frame (void)
1861{
1862 HOST_WIDE_INT offset = 0;
1863 int regno;
1864
1865 if (reload_completed && cfun->machine->frame.laid_out)
1866 return;
1867
97826595
MS
1868#define SLOT_NOT_REQUIRED (-2)
1869#define SLOT_REQUIRED (-1)
1870
363ffa50
JW
1871 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1872 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1873
43e9d192
IB
1874 /* First mark all the registers that really need to be saved... */
1875 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1876 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1877
1878 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1879 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1880
1881 /* ... that includes the eh data registers (if needed)... */
1882 if (crtl->calls_eh_return)
1883 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
1884 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1885 = SLOT_REQUIRED;
43e9d192
IB
1886
1887 /* ... and any callee saved register that dataflow says is live. */
1888 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1889 if (df_regs_ever_live_p (regno)
1890 && !call_used_regs[regno])
97826595 1891 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1892
1893 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1894 if (df_regs_ever_live_p (regno)
1895 && !call_used_regs[regno])
97826595 1896 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1897
1898 if (frame_pointer_needed)
1899 {
2e1cdae5 1900 /* FP and LR are placed in the linkage record. */
43e9d192 1901 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 1902 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 1903 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 1904 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 1905 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 1906 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
1907 }
1908
1909 /* Now assign stack slots for them. */
2e1cdae5 1910 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1911 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1912 {
1913 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1914 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1915 cfun->machine->frame.wb_candidate1 = regno;
1916 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1917 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1918 offset += UNITS_PER_WORD;
1919 }
1920
1921 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1922 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1923 {
1924 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1925 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1926 cfun->machine->frame.wb_candidate1 = regno;
1927 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1928 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1929 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1930 offset += UNITS_PER_WORD;
1931 }
1932
43e9d192
IB
1933 cfun->machine->frame.padding0 =
1934 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1935 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1936
1937 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
1938
1939 cfun->machine->frame.hard_fp_offset
1940 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1941 + get_frame_size ()
1942 + cfun->machine->frame.saved_regs_size,
1943 STACK_BOUNDARY / BITS_PER_UNIT);
1944
1945 cfun->machine->frame.frame_size
1946 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1947 + crtl->outgoing_args_size,
1948 STACK_BOUNDARY / BITS_PER_UNIT);
1949
43e9d192
IB
1950 cfun->machine->frame.laid_out = true;
1951}
1952
43e9d192
IB
1953static bool
1954aarch64_register_saved_on_entry (int regno)
1955{
97826595 1956 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
1957}
1958
64dedd72
JW
1959static unsigned
1960aarch64_next_callee_save (unsigned regno, unsigned limit)
1961{
1962 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1963 regno ++;
1964 return regno;
1965}
43e9d192 1966
c5e1f66e
JW
1967static void
1968aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1969 HOST_WIDE_INT adjustment)
1970 {
1971 rtx base_rtx = stack_pointer_rtx;
1972 rtx insn, reg, mem;
1973
1974 reg = gen_rtx_REG (mode, regno);
1975 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1976 plus_constant (Pmode, base_rtx, -adjustment));
1977 mem = gen_rtx_MEM (mode, mem);
1978
1979 insn = emit_move_insn (mem, reg);
1980 RTX_FRAME_RELATED_P (insn) = 1;
1981}
1982
80c11907
JW
1983static rtx
1984aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1985 HOST_WIDE_INT adjustment)
1986{
1987 switch (mode)
1988 {
1989 case DImode:
1990 return gen_storewb_pairdi_di (base, base, reg, reg2,
1991 GEN_INT (-adjustment),
1992 GEN_INT (UNITS_PER_WORD - adjustment));
1993 case DFmode:
1994 return gen_storewb_pairdf_di (base, base, reg, reg2,
1995 GEN_INT (-adjustment),
1996 GEN_INT (UNITS_PER_WORD - adjustment));
1997 default:
1998 gcc_unreachable ();
1999 }
2000}
2001
2002static void
2003aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
2004 unsigned regno2, HOST_WIDE_INT adjustment)
2005{
5d8a22a5 2006 rtx_insn *insn;
80c11907
JW
2007 rtx reg1 = gen_rtx_REG (mode, regno1);
2008 rtx reg2 = gen_rtx_REG (mode, regno2);
2009
2010 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2011 reg2, adjustment));
2012 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
2013 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2014 RTX_FRAME_RELATED_P (insn) = 1;
2015}
2016
159313d9
JW
2017static rtx
2018aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
2019 HOST_WIDE_INT adjustment)
2020{
2021 switch (mode)
2022 {
2023 case DImode:
2024 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2025 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2026 case DFmode:
2027 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2028 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2029 default:
2030 gcc_unreachable ();
2031 }
2032}
2033
72df5c1f
JW
2034static rtx
2035aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2036 rtx reg2)
2037{
2038 switch (mode)
2039 {
2040 case DImode:
2041 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2042
2043 case DFmode:
2044 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2045
2046 default:
2047 gcc_unreachable ();
2048 }
2049}
2050
2051static rtx
2052aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2053 rtx mem2)
2054{
2055 switch (mode)
2056 {
2057 case DImode:
2058 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2059
2060 case DFmode:
2061 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2062
2063 default:
2064 gcc_unreachable ();
2065 }
2066}
2067
43e9d192 2068
43e9d192 2069static void
8ed2fc62 2070aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2071 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2072{
5d8a22a5 2073 rtx_insn *insn;
a007a21c
JW
2074 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2075 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2076 unsigned regno;
2077 unsigned regno2;
2078
0ec74a1e 2079 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2080 regno <= limit;
2081 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2082 {
ae13fce3
JW
2083 rtx reg, mem;
2084 HOST_WIDE_INT offset;
64dedd72 2085
ae13fce3
JW
2086 if (skip_wb
2087 && (regno == cfun->machine->frame.wb_candidate1
2088 || regno == cfun->machine->frame.wb_candidate2))
2089 continue;
2090
2091 reg = gen_rtx_REG (mode, regno);
2092 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2093 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2094 offset));
64dedd72
JW
2095
2096 regno2 = aarch64_next_callee_save (regno + 1, limit);
2097
2098 if (regno2 <= limit
2099 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2100 == cfun->machine->frame.reg_offset[regno2]))
2101
43e9d192 2102 {
0ec74a1e 2103 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2104 rtx mem2;
2105
2106 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2107 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2108 offset));
2109 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2110 reg2));
0b4a9743 2111
64dedd72
JW
2112 /* The first part of a frame-related parallel insn is
2113 always assumed to be relevant to the frame
2114 calculations; subsequent parts, are only
2115 frame-related if explicitly marked. */
2116 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2117 regno = regno2;
2118 }
2119 else
8ed2fc62
JW
2120 insn = emit_move_insn (mem, reg);
2121
2122 RTX_FRAME_RELATED_P (insn) = 1;
2123 }
2124}
2125
2126static void
2127aarch64_restore_callee_saves (enum machine_mode mode,
2128 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2129 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2130{
8ed2fc62
JW
2131 rtx base_rtx = stack_pointer_rtx;
2132 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2133 ? gen_frame_mem : gen_rtx_MEM);
2134 unsigned regno;
2135 unsigned regno2;
2136 HOST_WIDE_INT offset;
2137
2138 for (regno = aarch64_next_callee_save (start, limit);
2139 regno <= limit;
2140 regno = aarch64_next_callee_save (regno + 1, limit))
2141 {
ae13fce3 2142 rtx reg, mem;
8ed2fc62 2143
ae13fce3
JW
2144 if (skip_wb
2145 && (regno == cfun->machine->frame.wb_candidate1
2146 || regno == cfun->machine->frame.wb_candidate2))
2147 continue;
2148
2149 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2150 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2151 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2152
2153 regno2 = aarch64_next_callee_save (regno + 1, limit);
2154
2155 if (regno2 <= limit
2156 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2157 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2158 {
8ed2fc62
JW
2159 rtx reg2 = gen_rtx_REG (mode, regno2);
2160 rtx mem2;
2161
2162 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2163 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2164 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2165
dd991abb 2166 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2167 regno = regno2;
43e9d192 2168 }
8ed2fc62 2169 else
dd991abb
RH
2170 emit_move_insn (reg, mem);
2171 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2172 }
43e9d192
IB
2173}
2174
2175/* AArch64 stack frames generated by this compiler look like:
2176
2177 +-------------------------------+
2178 | |
2179 | incoming stack arguments |
2180 | |
34834420
MS
2181 +-------------------------------+
2182 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2183 | callee-allocated save area |
2184 | for register varargs |
2185 | |
34834420
MS
2186 +-------------------------------+
2187 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2188 | |
2189 +-------------------------------+
454fdba9
RL
2190 | padding0 | \
2191 +-------------------------------+ |
454fdba9 2192 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2193 +-------------------------------+ |
2194 | LR' | |
2195 +-------------------------------+ |
34834420
MS
2196 | FP' | / <- hard_frame_pointer_rtx (aligned)
2197 +-------------------------------+
43e9d192
IB
2198 | dynamic allocation |
2199 +-------------------------------+
34834420
MS
2200 | padding |
2201 +-------------------------------+
2202 | outgoing stack arguments | <-- arg_pointer
2203 | |
2204 +-------------------------------+
2205 | | <-- stack_pointer_rtx (aligned)
43e9d192 2206
34834420
MS
2207 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2208 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2209 unchanged. */
43e9d192
IB
2210
2211/* Generate the prologue instructions for entry into a function.
2212 Establish the stack frame by decreasing the stack pointer with a
2213 properly calculated size and, if necessary, create a frame record
2214 filled with the values of LR and previous frame pointer. The
6991c977 2215 current FP is also set up if it is in use. */
43e9d192
IB
2216
2217void
2218aarch64_expand_prologue (void)
2219{
2220 /* sub sp, sp, #<frame_size>
2221 stp {fp, lr}, [sp, #<frame_size> - 16]
2222 add fp, sp, #<frame_size> - hardfp_offset
2223 stp {cs_reg}, [fp, #-16] etc.
2224
2225 sub sp, sp, <final_adjustment_if_any>
2226 */
43e9d192 2227 HOST_WIDE_INT frame_size, offset;
1c960e02 2228 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2229 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2230 rtx_insn *insn;
43e9d192
IB
2231
2232 aarch64_layout_frame ();
43e9d192 2233
dd991abb
RH
2234 offset = frame_size = cfun->machine->frame.frame_size;
2235 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2236 fp_offset = frame_size - hard_fp_offset;
43e9d192 2237
dd991abb
RH
2238 if (flag_stack_usage_info)
2239 current_function_static_stack_size = frame_size;
43e9d192 2240
44c0e7b9 2241 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2242 if (offset >= 512)
2243 {
2244 /* When the frame has a large size, an initial decrease is done on
2245 the stack pointer to jump over the callee-allocated save area for
2246 register varargs, the local variable area and/or the callee-saved
2247 register area. This will allow the pre-index write-back
2248 store pair instructions to be used for setting up the stack frame
2249 efficiently. */
dd991abb 2250 offset = hard_fp_offset;
43e9d192
IB
2251 if (offset >= 512)
2252 offset = cfun->machine->frame.saved_regs_size;
2253
2254 frame_size -= (offset + crtl->outgoing_args_size);
2255 fp_offset = 0;
2256
2257 if (frame_size >= 0x1000000)
2258 {
2259 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2260 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2261 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2262
2263 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2264 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2265 plus_constant (Pmode, stack_pointer_rtx,
2266 -frame_size)));
2267 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2268 }
2269 else if (frame_size > 0)
2270 {
dd991abb
RH
2271 int hi_ofs = frame_size & 0xfff000;
2272 int lo_ofs = frame_size & 0x000fff;
2273
2274 if (hi_ofs)
43e9d192
IB
2275 {
2276 insn = emit_insn (gen_add2_insn
dd991abb 2277 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2278 RTX_FRAME_RELATED_P (insn) = 1;
2279 }
dd991abb 2280 if (lo_ofs)
43e9d192
IB
2281 {
2282 insn = emit_insn (gen_add2_insn
dd991abb 2283 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2284 RTX_FRAME_RELATED_P (insn) = 1;
2285 }
2286 }
2287 }
2288 else
2289 frame_size = -1;
2290
2291 if (offset > 0)
2292 {
ae13fce3
JW
2293 bool skip_wb = false;
2294
43e9d192
IB
2295 if (frame_pointer_needed)
2296 {
c5e1f66e
JW
2297 skip_wb = true;
2298
43e9d192
IB
2299 if (fp_offset)
2300 {
2301 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2302 GEN_INT (-offset)));
2303 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2304
2305 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2306 R30_REGNUM, false);
43e9d192
IB
2307 }
2308 else
80c11907 2309 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2310
2311 /* Set up frame pointer to point to the location of the
2312 previous frame pointer on the stack. */
2313 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2314 stack_pointer_rtx,
2315 GEN_INT (fp_offset)));
43e9d192 2316 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2317 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2318 }
2319 else
2320 {
c5e1f66e
JW
2321 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2322 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2323
c5e1f66e
JW
2324 if (fp_offset
2325 || reg1 == FIRST_PSEUDO_REGISTER
2326 || (reg2 == FIRST_PSEUDO_REGISTER
2327 && offset >= 256))
2328 {
2329 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2330 GEN_INT (-offset)));
2331 RTX_FRAME_RELATED_P (insn) = 1;
2332 }
2333 else
2334 {
2335 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2336
2337 skip_wb = true;
2338
2339 if (reg2 == FIRST_PSEUDO_REGISTER)
2340 aarch64_pushwb_single_reg (mode1, reg1, offset);
2341 else
2342 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2343 }
43e9d192
IB
2344 }
2345
c5e1f66e
JW
2346 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2347 skip_wb);
ae13fce3
JW
2348 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2349 skip_wb);
43e9d192
IB
2350 }
2351
2352 /* when offset >= 512,
2353 sub sp, sp, #<outgoing_args_size> */
2354 if (frame_size > -1)
2355 {
2356 if (crtl->outgoing_args_size > 0)
2357 {
2358 insn = emit_insn (gen_add2_insn
2359 (stack_pointer_rtx,
2360 GEN_INT (- crtl->outgoing_args_size)));
2361 RTX_FRAME_RELATED_P (insn) = 1;
2362 }
2363 }
2364}
2365
4f942779
RL
2366/* Return TRUE if we can use a simple_return insn.
2367
2368 This function checks whether the callee saved stack is empty, which
2369 means no restore actions are need. The pro_and_epilogue will use
2370 this to check whether shrink-wrapping opt is feasible. */
2371
2372bool
2373aarch64_use_return_insn_p (void)
2374{
2375 if (!reload_completed)
2376 return false;
2377
2378 if (crtl->profile)
2379 return false;
2380
2381 aarch64_layout_frame ();
2382
2383 return cfun->machine->frame.frame_size == 0;
2384}
2385
43e9d192
IB
2386/* Generate the epilogue instructions for returning from a function. */
2387void
2388aarch64_expand_epilogue (bool for_sibcall)
2389{
1c960e02 2390 HOST_WIDE_INT frame_size, offset;
43e9d192 2391 HOST_WIDE_INT fp_offset;
dd991abb 2392 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2393 rtx_insn *insn;
43e9d192
IB
2394
2395 aarch64_layout_frame ();
43e9d192 2396
1c960e02 2397 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2398 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2399 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2400
2401 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2402 if (offset >= 512)
2403 {
dd991abb 2404 offset = hard_fp_offset;
43e9d192
IB
2405 if (offset >= 512)
2406 offset = cfun->machine->frame.saved_regs_size;
2407
2408 frame_size -= (offset + crtl->outgoing_args_size);
2409 fp_offset = 0;
2410 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2411 {
2412 insn = emit_insn (gen_add2_insn
2413 (stack_pointer_rtx,
2414 GEN_INT (crtl->outgoing_args_size)));
2415 RTX_FRAME_RELATED_P (insn) = 1;
2416 }
2417 }
2418 else
2419 frame_size = -1;
2420
2421 /* If there were outgoing arguments or we've done dynamic stack
2422 allocation, then restore the stack pointer from the frame
2423 pointer. This is at most one insn and more efficient than using
2424 GCC's internal mechanism. */
2425 if (frame_pointer_needed
2426 && (crtl->outgoing_args_size || cfun->calls_alloca))
2427 {
2428 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2429 hard_frame_pointer_rtx,
8f454e9f
JW
2430 GEN_INT (0)));
2431 offset = offset - fp_offset;
43e9d192
IB
2432 }
2433
43e9d192
IB
2434 if (offset > 0)
2435 {
4b92caa1
JW
2436 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2437 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2438 bool skip_wb = true;
dd991abb 2439 rtx cfi_ops = NULL;
4b92caa1 2440
43e9d192 2441 if (frame_pointer_needed)
4b92caa1
JW
2442 fp_offset = 0;
2443 else if (fp_offset
2444 || reg1 == FIRST_PSEUDO_REGISTER
2445 || (reg2 == FIRST_PSEUDO_REGISTER
2446 && offset >= 256))
2447 skip_wb = false;
2448
2449 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2450 skip_wb, &cfi_ops);
4b92caa1 2451 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2452 skip_wb, &cfi_ops);
4b92caa1
JW
2453
2454 if (skip_wb)
43e9d192 2455 {
4b92caa1 2456 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2457 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2458
dd991abb 2459 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2460 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2461 {
2462 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2463 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2464 mem = gen_rtx_MEM (mode1, mem);
2465 insn = emit_move_insn (rreg1, mem);
2466 }
4b92caa1
JW
2467 else
2468 {
dd991abb 2469 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2470
dd991abb
RH
2471 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2472 insn = emit_insn (aarch64_gen_loadwb_pair
2473 (mode1, stack_pointer_rtx, rreg1,
2474 rreg2, offset));
4b92caa1 2475 }
43e9d192 2476 }
43e9d192
IB
2477 else
2478 {
2479 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2480 GEN_INT (offset)));
43e9d192 2481 }
43e9d192 2482
dd991abb
RH
2483 /* Reset the CFA to be SP + FRAME_SIZE. */
2484 rtx new_cfa = stack_pointer_rtx;
2485 if (frame_size > 0)
2486 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2487 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2488 REG_NOTES (insn) = cfi_ops;
43e9d192 2489 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2490 }
2491
dd991abb 2492 if (frame_size > 0)
43e9d192
IB
2493 {
2494 if (frame_size >= 0x1000000)
2495 {
2496 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2497 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2498 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2499 }
dd991abb 2500 else
43e9d192 2501 {
dd991abb
RH
2502 int hi_ofs = frame_size & 0xfff000;
2503 int lo_ofs = frame_size & 0x000fff;
2504
2505 if (hi_ofs && lo_ofs)
43e9d192
IB
2506 {
2507 insn = emit_insn (gen_add2_insn
dd991abb 2508 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2509 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2510 frame_size = lo_ofs;
43e9d192 2511 }
dd991abb
RH
2512 insn = emit_insn (gen_add2_insn
2513 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2514 }
2515
dd991abb
RH
2516 /* Reset the CFA to be SP + 0. */
2517 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2518 RTX_FRAME_RELATED_P (insn) = 1;
2519 }
2520
2521 /* Stack adjustment for exception handler. */
2522 if (crtl->calls_eh_return)
2523 {
2524 /* We need to unwind the stack by the offset computed by
2525 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2526 to be SP; letting the CFA move during this adjustment
2527 is just as correct as retaining the CFA from the body
2528 of the function. Therefore, do nothing special. */
2529 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2530 }
2531
2532 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2533 if (!for_sibcall)
2534 emit_jump_insn (ret_rtx);
2535}
2536
2537/* Return the place to copy the exception unwinding return address to.
2538 This will probably be a stack slot, but could (in theory be the
2539 return register). */
2540rtx
2541aarch64_final_eh_return_addr (void)
2542{
1c960e02
MS
2543 HOST_WIDE_INT fp_offset;
2544
43e9d192 2545 aarch64_layout_frame ();
1c960e02
MS
2546
2547 fp_offset = cfun->machine->frame.frame_size
2548 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2549
2550 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2551 return gen_rtx_REG (DImode, LR_REGNUM);
2552
2553 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2554 result in a store to save LR introduced by builtin_eh_return () being
2555 incorrectly deleted because the alias is not detected.
2556 So in the calculation of the address to copy the exception unwinding
2557 return address to, we note 2 cases.
2558 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2559 we return a SP-relative location since all the addresses are SP-relative
2560 in this case. This prevents the store from being optimized away.
2561 If the fp_offset is not 0, then the addresses will be FP-relative and
2562 therefore we return a FP-relative location. */
2563
2564 if (frame_pointer_needed)
2565 {
2566 if (fp_offset)
2567 return gen_frame_mem (DImode,
2568 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2569 else
2570 return gen_frame_mem (DImode,
2571 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2572 }
2573
2574 /* If FP is not needed, we calculate the location of LR, which would be
2575 at the top of the saved registers block. */
2576
2577 return gen_frame_mem (DImode,
2578 plus_constant (Pmode,
2579 stack_pointer_rtx,
2580 fp_offset
2581 + cfun->machine->frame.saved_regs_size
2582 - 2 * UNITS_PER_WORD));
2583}
2584
9dfc162c
JG
2585/* Possibly output code to build up a constant in a register. For
2586 the benefit of the costs infrastructure, returns the number of
2587 instructions which would be emitted. GENERATE inhibits or
2588 enables code generation. */
2589
2590static int
2591aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2592{
9dfc162c
JG
2593 int insns = 0;
2594
43e9d192 2595 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2596 {
2597 if (generate)
2598 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2599 insns = 1;
2600 }
43e9d192
IB
2601 else
2602 {
2603 int i;
2604 int ncount = 0;
2605 int zcount = 0;
2606 HOST_WIDE_INT valp = val >> 16;
2607 HOST_WIDE_INT valm;
2608 HOST_WIDE_INT tval;
2609
2610 for (i = 16; i < 64; i += 16)
2611 {
2612 valm = (valp & 0xffff);
2613
2614 if (valm != 0)
2615 ++ zcount;
2616
2617 if (valm != 0xffff)
2618 ++ ncount;
2619
2620 valp >>= 16;
2621 }
2622
2623 /* zcount contains the number of additional MOVK instructions
2624 required if the constant is built up with an initial MOVZ instruction,
2625 while ncount is the number of MOVK instructions required if starting
2626 with a MOVN instruction. Choose the sequence that yields the fewest
2627 number of instructions, preferring MOVZ instructions when they are both
2628 the same. */
2629 if (ncount < zcount)
2630 {
9dfc162c
JG
2631 if (generate)
2632 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2633 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2634 tval = 0xffff;
9dfc162c 2635 insns++;
43e9d192
IB
2636 }
2637 else
2638 {
9dfc162c
JG
2639 if (generate)
2640 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2641 GEN_INT (val & 0xffff));
43e9d192 2642 tval = 0;
9dfc162c 2643 insns++;
43e9d192
IB
2644 }
2645
2646 val >>= 16;
2647
2648 for (i = 16; i < 64; i += 16)
2649 {
2650 if ((val & 0xffff) != tval)
9dfc162c
JG
2651 {
2652 if (generate)
2653 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2654 GEN_INT (i),
2655 GEN_INT (val & 0xffff)));
2656 insns++;
2657 }
43e9d192
IB
2658 val >>= 16;
2659 }
2660 }
9dfc162c 2661 return insns;
43e9d192
IB
2662}
2663
2664static void
d9600ae5 2665aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2666{
2667 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2668 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2669 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2670
2671 if (mdelta < 0)
2672 mdelta = -mdelta;
2673
2674 if (mdelta >= 4096 * 4096)
2675 {
9dfc162c 2676 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2677 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2678 }
2679 else if (mdelta > 0)
2680 {
43e9d192 2681 if (mdelta >= 4096)
d9600ae5
SN
2682 {
2683 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2684 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2685 if (delta < 0)
2686 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2687 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2688 else
2689 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2690 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2691 }
43e9d192 2692 if (mdelta % 4096 != 0)
d9600ae5
SN
2693 {
2694 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2695 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2696 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2697 }
43e9d192
IB
2698 }
2699}
2700
2701/* Output code to add DELTA to the first argument, and then jump
2702 to FUNCTION. Used for C++ multiple inheritance. */
2703static void
2704aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2705 HOST_WIDE_INT delta,
2706 HOST_WIDE_INT vcall_offset,
2707 tree function)
2708{
2709 /* The this pointer is always in x0. Note that this differs from
2710 Arm where the this pointer maybe bumped to r1 if r0 is required
2711 to return a pointer to an aggregate. On AArch64 a result value
2712 pointer will be in x8. */
2713 int this_regno = R0_REGNUM;
5d8a22a5
DM
2714 rtx this_rtx, temp0, temp1, addr, funexp;
2715 rtx_insn *insn;
43e9d192 2716
75f1d6fc
SN
2717 reload_completed = 1;
2718 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2719
2720 if (vcall_offset == 0)
d9600ae5 2721 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2722 else
2723 {
28514dda 2724 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2725
75f1d6fc
SN
2726 this_rtx = gen_rtx_REG (Pmode, this_regno);
2727 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2728 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2729
75f1d6fc
SN
2730 addr = this_rtx;
2731 if (delta != 0)
2732 {
2733 if (delta >= -256 && delta < 256)
2734 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2735 plus_constant (Pmode, this_rtx, delta));
2736 else
d9600ae5 2737 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2738 }
2739
28514dda
YZ
2740 if (Pmode == ptr_mode)
2741 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2742 else
2743 aarch64_emit_move (temp0,
2744 gen_rtx_ZERO_EXTEND (Pmode,
2745 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2746
28514dda 2747 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2748 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2749 else
2750 {
9dfc162c 2751 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2752 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2753 }
2754
28514dda
YZ
2755 if (Pmode == ptr_mode)
2756 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2757 else
2758 aarch64_emit_move (temp1,
2759 gen_rtx_SIGN_EXTEND (Pmode,
2760 gen_rtx_MEM (ptr_mode, addr)));
2761
75f1d6fc 2762 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2763 }
2764
75f1d6fc
SN
2765 /* Generate a tail call to the target function. */
2766 if (!TREE_USED (function))
2767 {
2768 assemble_external (function);
2769 TREE_USED (function) = 1;
2770 }
2771 funexp = XEXP (DECL_RTL (function), 0);
2772 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2773 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2774 SIBLING_CALL_P (insn) = 1;
2775
2776 insn = get_insns ();
2777 shorten_branches (insn);
2778 final_start_function (insn, file, 1);
2779 final (insn, file, 1);
43e9d192 2780 final_end_function ();
75f1d6fc
SN
2781
2782 /* Stop pretending to be a post-reload pass. */
2783 reload_completed = 0;
43e9d192
IB
2784}
2785
43e9d192
IB
2786static int
2787aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2788{
2789 if (GET_CODE (*x) == SYMBOL_REF)
2790 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2791
2792 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2793 TLS offsets, not real symbol references. */
2794 if (GET_CODE (*x) == UNSPEC
2795 && XINT (*x, 1) == UNSPEC_TLS)
2796 return -1;
2797
2798 return 0;
2799}
2800
2801static bool
2802aarch64_tls_referenced_p (rtx x)
2803{
2804 if (!TARGET_HAVE_TLS)
2805 return false;
2806
2807 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2808}
2809
2810
2811static int
2812aarch64_bitmasks_cmp (const void *i1, const void *i2)
2813{
2814 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2815 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2816
2817 if (*imm1 < *imm2)
2818 return -1;
2819 if (*imm1 > *imm2)
2820 return +1;
2821 return 0;
2822}
2823
2824
2825static void
2826aarch64_build_bitmask_table (void)
2827{
2828 unsigned HOST_WIDE_INT mask, imm;
2829 unsigned int log_e, e, s, r;
2830 unsigned int nimms = 0;
2831
2832 for (log_e = 1; log_e <= 6; log_e++)
2833 {
2834 e = 1 << log_e;
2835 if (e == 64)
2836 mask = ~(HOST_WIDE_INT) 0;
2837 else
2838 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2839 for (s = 1; s < e; s++)
2840 {
2841 for (r = 0; r < e; r++)
2842 {
2843 /* set s consecutive bits to 1 (s < 64) */
2844 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2845 /* rotate right by r */
2846 if (r != 0)
2847 imm = ((imm >> r) | (imm << (e - r))) & mask;
2848 /* replicate the constant depending on SIMD size */
2849 switch (log_e) {
2850 case 1: imm |= (imm << 2);
2851 case 2: imm |= (imm << 4);
2852 case 3: imm |= (imm << 8);
2853 case 4: imm |= (imm << 16);
2854 case 5: imm |= (imm << 32);
2855 case 6:
2856 break;
2857 default:
2858 gcc_unreachable ();
2859 }
2860 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2861 aarch64_bitmasks[nimms++] = imm;
2862 }
2863 }
2864 }
2865
2866 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2867 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2868 aarch64_bitmasks_cmp);
2869}
2870
2871
2872/* Return true if val can be encoded as a 12-bit unsigned immediate with
2873 a left shift of 0 or 12 bits. */
2874bool
2875aarch64_uimm12_shift (HOST_WIDE_INT val)
2876{
2877 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2878 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2879 );
2880}
2881
2882
2883/* Return true if val is an immediate that can be loaded into a
2884 register by a MOVZ instruction. */
2885static bool
2886aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2887{
2888 if (GET_MODE_SIZE (mode) > 4)
2889 {
2890 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2891 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2892 return 1;
2893 }
2894 else
2895 {
2896 /* Ignore sign extension. */
2897 val &= (HOST_WIDE_INT) 0xffffffff;
2898 }
2899 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2900 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2901}
2902
2903
2904/* Return true if val is a valid bitmask immediate. */
2905bool
2906aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2907{
2908 if (GET_MODE_SIZE (mode) < 8)
2909 {
2910 /* Replicate bit pattern. */
2911 val &= (HOST_WIDE_INT) 0xffffffff;
2912 val |= val << 32;
2913 }
2914 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2915 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2916}
2917
2918
2919/* Return true if val is an immediate that can be loaded into a
2920 register in a single instruction. */
2921bool
2922aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2923{
2924 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2925 return 1;
2926 return aarch64_bitmask_imm (val, mode);
2927}
2928
2929static bool
2930aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2931{
2932 rtx base, offset;
7eda14e1 2933
43e9d192
IB
2934 if (GET_CODE (x) == HIGH)
2935 return true;
2936
2937 split_const (x, &base, &offset);
2938 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2939 {
2940 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2941 != SYMBOL_FORCE_TO_MEM)
2942 return true;
2943 else
2944 /* Avoid generating a 64-bit relocation in ILP32; leave
2945 to aarch64_expand_mov_immediate to handle it properly. */
2946 return mode != ptr_mode;
2947 }
43e9d192
IB
2948
2949 return aarch64_tls_referenced_p (x);
2950}
2951
2952/* Return true if register REGNO is a valid index register.
2953 STRICT_P is true if REG_OK_STRICT is in effect. */
2954
2955bool
2956aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2957{
2958 if (!HARD_REGISTER_NUM_P (regno))
2959 {
2960 if (!strict_p)
2961 return true;
2962
2963 if (!reg_renumber)
2964 return false;
2965
2966 regno = reg_renumber[regno];
2967 }
2968 return GP_REGNUM_P (regno);
2969}
2970
2971/* Return true if register REGNO is a valid base register for mode MODE.
2972 STRICT_P is true if REG_OK_STRICT is in effect. */
2973
2974bool
2975aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2976{
2977 if (!HARD_REGISTER_NUM_P (regno))
2978 {
2979 if (!strict_p)
2980 return true;
2981
2982 if (!reg_renumber)
2983 return false;
2984
2985 regno = reg_renumber[regno];
2986 }
2987
2988 /* The fake registers will be eliminated to either the stack or
2989 hard frame pointer, both of which are usually valid base registers.
2990 Reload deals with the cases where the eliminated form isn't valid. */
2991 return (GP_REGNUM_P (regno)
2992 || regno == SP_REGNUM
2993 || regno == FRAME_POINTER_REGNUM
2994 || regno == ARG_POINTER_REGNUM);
2995}
2996
2997/* Return true if X is a valid base register for mode MODE.
2998 STRICT_P is true if REG_OK_STRICT is in effect. */
2999
3000static bool
3001aarch64_base_register_rtx_p (rtx x, bool strict_p)
3002{
3003 if (!strict_p && GET_CODE (x) == SUBREG)
3004 x = SUBREG_REG (x);
3005
3006 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3007}
3008
3009/* Return true if address offset is a valid index. If it is, fill in INFO
3010 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3011
3012static bool
3013aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3014 enum machine_mode mode, bool strict_p)
3015{
3016 enum aarch64_address_type type;
3017 rtx index;
3018 int shift;
3019
3020 /* (reg:P) */
3021 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3022 && GET_MODE (x) == Pmode)
3023 {
3024 type = ADDRESS_REG_REG;
3025 index = x;
3026 shift = 0;
3027 }
3028 /* (sign_extend:DI (reg:SI)) */
3029 else if ((GET_CODE (x) == SIGN_EXTEND
3030 || GET_CODE (x) == ZERO_EXTEND)
3031 && GET_MODE (x) == DImode
3032 && GET_MODE (XEXP (x, 0)) == SImode)
3033 {
3034 type = (GET_CODE (x) == SIGN_EXTEND)
3035 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3036 index = XEXP (x, 0);
3037 shift = 0;
3038 }
3039 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3040 else if (GET_CODE (x) == MULT
3041 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3042 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3043 && GET_MODE (XEXP (x, 0)) == DImode
3044 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3045 && CONST_INT_P (XEXP (x, 1)))
3046 {
3047 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3048 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3049 index = XEXP (XEXP (x, 0), 0);
3050 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3051 }
3052 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3053 else if (GET_CODE (x) == ASHIFT
3054 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3055 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3056 && GET_MODE (XEXP (x, 0)) == DImode
3057 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3058 && CONST_INT_P (XEXP (x, 1)))
3059 {
3060 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3061 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3062 index = XEXP (XEXP (x, 0), 0);
3063 shift = INTVAL (XEXP (x, 1));
3064 }
3065 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3066 else if ((GET_CODE (x) == SIGN_EXTRACT
3067 || GET_CODE (x) == ZERO_EXTRACT)
3068 && GET_MODE (x) == DImode
3069 && GET_CODE (XEXP (x, 0)) == MULT
3070 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3071 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3072 {
3073 type = (GET_CODE (x) == SIGN_EXTRACT)
3074 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3075 index = XEXP (XEXP (x, 0), 0);
3076 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3077 if (INTVAL (XEXP (x, 1)) != 32 + shift
3078 || INTVAL (XEXP (x, 2)) != 0)
3079 shift = -1;
3080 }
3081 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3082 (const_int 0xffffffff<<shift)) */
3083 else if (GET_CODE (x) == AND
3084 && GET_MODE (x) == DImode
3085 && GET_CODE (XEXP (x, 0)) == MULT
3086 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3087 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3088 && CONST_INT_P (XEXP (x, 1)))
3089 {
3090 type = ADDRESS_REG_UXTW;
3091 index = XEXP (XEXP (x, 0), 0);
3092 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3093 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3094 shift = -1;
3095 }
3096 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3097 else if ((GET_CODE (x) == SIGN_EXTRACT
3098 || GET_CODE (x) == ZERO_EXTRACT)
3099 && GET_MODE (x) == DImode
3100 && GET_CODE (XEXP (x, 0)) == ASHIFT
3101 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3102 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3103 {
3104 type = (GET_CODE (x) == SIGN_EXTRACT)
3105 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3106 index = XEXP (XEXP (x, 0), 0);
3107 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3108 if (INTVAL (XEXP (x, 1)) != 32 + shift
3109 || INTVAL (XEXP (x, 2)) != 0)
3110 shift = -1;
3111 }
3112 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3113 (const_int 0xffffffff<<shift)) */
3114 else if (GET_CODE (x) == AND
3115 && GET_MODE (x) == DImode
3116 && GET_CODE (XEXP (x, 0)) == ASHIFT
3117 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3118 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3119 && CONST_INT_P (XEXP (x, 1)))
3120 {
3121 type = ADDRESS_REG_UXTW;
3122 index = XEXP (XEXP (x, 0), 0);
3123 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3124 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3125 shift = -1;
3126 }
3127 /* (mult:P (reg:P) (const_int scale)) */
3128 else if (GET_CODE (x) == MULT
3129 && GET_MODE (x) == Pmode
3130 && GET_MODE (XEXP (x, 0)) == Pmode
3131 && CONST_INT_P (XEXP (x, 1)))
3132 {
3133 type = ADDRESS_REG_REG;
3134 index = XEXP (x, 0);
3135 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3136 }
3137 /* (ashift:P (reg:P) (const_int shift)) */
3138 else if (GET_CODE (x) == ASHIFT
3139 && GET_MODE (x) == Pmode
3140 && GET_MODE (XEXP (x, 0)) == Pmode
3141 && CONST_INT_P (XEXP (x, 1)))
3142 {
3143 type = ADDRESS_REG_REG;
3144 index = XEXP (x, 0);
3145 shift = INTVAL (XEXP (x, 1));
3146 }
3147 else
3148 return false;
3149
3150 if (GET_CODE (index) == SUBREG)
3151 index = SUBREG_REG (index);
3152
3153 if ((shift == 0 ||
3154 (shift > 0 && shift <= 3
3155 && (1 << shift) == GET_MODE_SIZE (mode)))
3156 && REG_P (index)
3157 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3158 {
3159 info->type = type;
3160 info->offset = index;
3161 info->shift = shift;
3162 return true;
3163 }
3164
3165 return false;
3166}
3167
44707478
JW
3168bool
3169aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3170{
3171 return (offset >= -64 * GET_MODE_SIZE (mode)
3172 && offset < 64 * GET_MODE_SIZE (mode)
3173 && offset % GET_MODE_SIZE (mode) == 0);
3174}
3175
3176static inline bool
3177offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3178 HOST_WIDE_INT offset)
3179{
3180 return offset >= -256 && offset < 256;
3181}
3182
3183static inline bool
3184offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3185{
3186 return (offset >= 0
3187 && offset < 4096 * GET_MODE_SIZE (mode)
3188 && offset % GET_MODE_SIZE (mode) == 0);
3189}
3190
3191/* Return true if X is a valid address for machine mode MODE. If it is,
3192 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3193 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3194
3195static bool
3196aarch64_classify_address (struct aarch64_address_info *info,
3197 rtx x, enum machine_mode mode,
3198 RTX_CODE outer_code, bool strict_p)
3199{
3200 enum rtx_code code = GET_CODE (x);
3201 rtx op0, op1;
3202 bool allow_reg_index_p =
348d4b0a
BC
3203 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3204 || aarch64_vector_mode_supported_p (mode));
43e9d192
IB
3205 /* Don't support anything other than POST_INC or REG addressing for
3206 AdvSIMD. */
348d4b0a 3207 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
3208 && (code != POST_INC && code != REG))
3209 return false;
3210
3211 switch (code)
3212 {
3213 case REG:
3214 case SUBREG:
3215 info->type = ADDRESS_REG_IMM;
3216 info->base = x;
3217 info->offset = const0_rtx;
3218 return aarch64_base_register_rtx_p (x, strict_p);
3219
3220 case PLUS:
3221 op0 = XEXP (x, 0);
3222 op1 = XEXP (x, 1);
15c0c5c9
JW
3223
3224 if (! strict_p
4aa81c2e 3225 && REG_P (op0)
15c0c5c9
JW
3226 && (op0 == virtual_stack_vars_rtx
3227 || op0 == frame_pointer_rtx
3228 || op0 == arg_pointer_rtx)
4aa81c2e 3229 && CONST_INT_P (op1))
15c0c5c9
JW
3230 {
3231 info->type = ADDRESS_REG_IMM;
3232 info->base = op0;
3233 info->offset = op1;
3234
3235 return true;
3236 }
3237
43e9d192
IB
3238 if (GET_MODE_SIZE (mode) != 0
3239 && CONST_INT_P (op1)
3240 && aarch64_base_register_rtx_p (op0, strict_p))
3241 {
3242 HOST_WIDE_INT offset = INTVAL (op1);
3243
3244 info->type = ADDRESS_REG_IMM;
3245 info->base = op0;
3246 info->offset = op1;
3247
3248 /* TImode and TFmode values are allowed in both pairs of X
3249 registers and individual Q registers. The available
3250 address modes are:
3251 X,X: 7-bit signed scaled offset
3252 Q: 9-bit signed offset
3253 We conservatively require an offset representable in either mode.
3254 */
3255 if (mode == TImode || mode == TFmode)
44707478 3256 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3257 && offset_9bit_signed_unscaled_p (mode, offset));
3258
3259 if (outer_code == PARALLEL)
3260 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3261 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3262 else
3263 return (offset_9bit_signed_unscaled_p (mode, offset)
3264 || offset_12bit_unsigned_scaled_p (mode, offset));
3265 }
3266
3267 if (allow_reg_index_p)
3268 {
3269 /* Look for base + (scaled/extended) index register. */
3270 if (aarch64_base_register_rtx_p (op0, strict_p)
3271 && aarch64_classify_index (info, op1, mode, strict_p))
3272 {
3273 info->base = op0;
3274 return true;
3275 }
3276 if (aarch64_base_register_rtx_p (op1, strict_p)
3277 && aarch64_classify_index (info, op0, mode, strict_p))
3278 {
3279 info->base = op1;
3280 return true;
3281 }
3282 }
3283
3284 return false;
3285
3286 case POST_INC:
3287 case POST_DEC:
3288 case PRE_INC:
3289 case PRE_DEC:
3290 info->type = ADDRESS_REG_WB;
3291 info->base = XEXP (x, 0);
3292 info->offset = NULL_RTX;
3293 return aarch64_base_register_rtx_p (info->base, strict_p);
3294
3295 case POST_MODIFY:
3296 case PRE_MODIFY:
3297 info->type = ADDRESS_REG_WB;
3298 info->base = XEXP (x, 0);
3299 if (GET_CODE (XEXP (x, 1)) == PLUS
3300 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3301 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3302 && aarch64_base_register_rtx_p (info->base, strict_p))
3303 {
3304 HOST_WIDE_INT offset;
3305 info->offset = XEXP (XEXP (x, 1), 1);
3306 offset = INTVAL (info->offset);
3307
3308 /* TImode and TFmode values are allowed in both pairs of X
3309 registers and individual Q registers. The available
3310 address modes are:
3311 X,X: 7-bit signed scaled offset
3312 Q: 9-bit signed offset
3313 We conservatively require an offset representable in either mode.
3314 */
3315 if (mode == TImode || mode == TFmode)
44707478 3316 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3317 && offset_9bit_signed_unscaled_p (mode, offset));
3318
3319 if (outer_code == PARALLEL)
3320 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3321 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3322 else
3323 return offset_9bit_signed_unscaled_p (mode, offset);
3324 }
3325 return false;
3326
3327 case CONST:
3328 case SYMBOL_REF:
3329 case LABEL_REF:
79517551
SN
3330 /* load literal: pc-relative constant pool entry. Only supported
3331 for SI mode or larger. */
43e9d192 3332 info->type = ADDRESS_SYMBOLIC;
79517551 3333 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3334 {
3335 rtx sym, addend;
3336
3337 split_const (x, &sym, &addend);
3338 return (GET_CODE (sym) == LABEL_REF
3339 || (GET_CODE (sym) == SYMBOL_REF
3340 && CONSTANT_POOL_ADDRESS_P (sym)));
3341 }
3342 return false;
3343
3344 case LO_SUM:
3345 info->type = ADDRESS_LO_SUM;
3346 info->base = XEXP (x, 0);
3347 info->offset = XEXP (x, 1);
3348 if (allow_reg_index_p
3349 && aarch64_base_register_rtx_p (info->base, strict_p))
3350 {
3351 rtx sym, offs;
3352 split_const (info->offset, &sym, &offs);
3353 if (GET_CODE (sym) == SYMBOL_REF
3354 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3355 == SYMBOL_SMALL_ABSOLUTE))
3356 {
3357 /* The symbol and offset must be aligned to the access size. */
3358 unsigned int align;
3359 unsigned int ref_size;
3360
3361 if (CONSTANT_POOL_ADDRESS_P (sym))
3362 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3363 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3364 {
3365 tree exp = SYMBOL_REF_DECL (sym);
3366 align = TYPE_ALIGN (TREE_TYPE (exp));
3367 align = CONSTANT_ALIGNMENT (exp, align);
3368 }
3369 else if (SYMBOL_REF_DECL (sym))
3370 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3371 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3372 && SYMBOL_REF_BLOCK (sym) != NULL)
3373 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3374 else
3375 align = BITS_PER_UNIT;
3376
3377 ref_size = GET_MODE_SIZE (mode);
3378 if (ref_size == 0)
3379 ref_size = GET_MODE_SIZE (DImode);
3380
3381 return ((INTVAL (offs) & (ref_size - 1)) == 0
3382 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3383 }
3384 }
3385 return false;
3386
3387 default:
3388 return false;
3389 }
3390}
3391
3392bool
3393aarch64_symbolic_address_p (rtx x)
3394{
3395 rtx offset;
3396
3397 split_const (x, &x, &offset);
3398 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3399}
3400
3401/* Classify the base of symbolic expression X, given that X appears in
3402 context CONTEXT. */
da4f13a4
MS
3403
3404enum aarch64_symbol_type
3405aarch64_classify_symbolic_expression (rtx x,
3406 enum aarch64_symbol_context context)
43e9d192
IB
3407{
3408 rtx offset;
da4f13a4 3409
43e9d192
IB
3410 split_const (x, &x, &offset);
3411 return aarch64_classify_symbol (x, context);
3412}
3413
3414
3415/* Return TRUE if X is a legitimate address for accessing memory in
3416 mode MODE. */
3417static bool
3418aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3419{
3420 struct aarch64_address_info addr;
3421
3422 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3423}
3424
3425/* Return TRUE if X is a legitimate address for accessing memory in
3426 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3427 pair operation. */
3428bool
3429aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3430 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3431{
3432 struct aarch64_address_info addr;
3433
3434 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3435}
3436
3437/* Return TRUE if rtx X is immediate constant 0.0 */
3438bool
3520f7cc 3439aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3440{
3441 REAL_VALUE_TYPE r;
3442
3443 if (GET_MODE (x) == VOIDmode)
3444 return false;
3445
3446 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3447 if (REAL_VALUE_MINUS_ZERO (r))
3448 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3449 return REAL_VALUES_EQUAL (r, dconst0);
3450}
3451
70f09188
AP
3452/* Return the fixed registers used for condition codes. */
3453
3454static bool
3455aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3456{
3457 *p1 = CC_REGNUM;
3458 *p2 = INVALID_REGNUM;
3459 return true;
3460}
3461
78607708
TV
3462/* Emit call insn with PAT and do aarch64-specific handling. */
3463
d07a3fed 3464void
78607708
TV
3465aarch64_emit_call_insn (rtx pat)
3466{
3467 rtx insn = emit_call_insn (pat);
3468
3469 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3470 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3471 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3472}
3473
43e9d192
IB
3474enum machine_mode
3475aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3476{
3477 /* All floating point compares return CCFP if it is an equality
3478 comparison, and CCFPE otherwise. */
3479 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3480 {
3481 switch (code)
3482 {
3483 case EQ:
3484 case NE:
3485 case UNORDERED:
3486 case ORDERED:
3487 case UNLT:
3488 case UNLE:
3489 case UNGT:
3490 case UNGE:
3491 case UNEQ:
3492 case LTGT:
3493 return CCFPmode;
3494
3495 case LT:
3496 case LE:
3497 case GT:
3498 case GE:
3499 return CCFPEmode;
3500
3501 default:
3502 gcc_unreachable ();
3503 }
3504 }
3505
3506 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3507 && y == const0_rtx
3508 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3509 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3510 || GET_CODE (x) == NEG))
43e9d192
IB
3511 return CC_NZmode;
3512
1c992d1e 3513 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3514 the comparison will have to be swapped when we emit the assembly
3515 code. */
3516 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3517 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3518 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3519 || GET_CODE (x) == LSHIFTRT
1c992d1e 3520 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3521 return CC_SWPmode;
3522
1c992d1e
RE
3523 /* Similarly for a negated operand, but we can only do this for
3524 equalities. */
3525 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3526 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3527 && (code == EQ || code == NE)
3528 && GET_CODE (x) == NEG)
3529 return CC_Zmode;
3530
43e9d192
IB
3531 /* A compare of a mode narrower than SI mode against zero can be done
3532 by extending the value in the comparison. */
3533 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3534 && y == const0_rtx)
3535 /* Only use sign-extension if we really need it. */
3536 return ((code == GT || code == GE || code == LE || code == LT)
3537 ? CC_SESWPmode : CC_ZESWPmode);
3538
3539 /* For everything else, return CCmode. */
3540 return CCmode;
3541}
3542
cd5660ab 3543int
43e9d192
IB
3544aarch64_get_condition_code (rtx x)
3545{
3546 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3547 enum rtx_code comp_code = GET_CODE (x);
3548
3549 if (GET_MODE_CLASS (mode) != MODE_CC)
3550 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3551
3552 switch (mode)
3553 {
3554 case CCFPmode:
3555 case CCFPEmode:
3556 switch (comp_code)
3557 {
3558 case GE: return AARCH64_GE;
3559 case GT: return AARCH64_GT;
3560 case LE: return AARCH64_LS;
3561 case LT: return AARCH64_MI;
3562 case NE: return AARCH64_NE;
3563 case EQ: return AARCH64_EQ;
3564 case ORDERED: return AARCH64_VC;
3565 case UNORDERED: return AARCH64_VS;
3566 case UNLT: return AARCH64_LT;
3567 case UNLE: return AARCH64_LE;
3568 case UNGT: return AARCH64_HI;
3569 case UNGE: return AARCH64_PL;
cd5660ab 3570 default: return -1;
43e9d192
IB
3571 }
3572 break;
3573
3574 case CCmode:
3575 switch (comp_code)
3576 {
3577 case NE: return AARCH64_NE;
3578 case EQ: return AARCH64_EQ;
3579 case GE: return AARCH64_GE;
3580 case GT: return AARCH64_GT;
3581 case LE: return AARCH64_LE;
3582 case LT: return AARCH64_LT;
3583 case GEU: return AARCH64_CS;
3584 case GTU: return AARCH64_HI;
3585 case LEU: return AARCH64_LS;
3586 case LTU: return AARCH64_CC;
cd5660ab 3587 default: return -1;
43e9d192
IB
3588 }
3589 break;
3590
3591 case CC_SWPmode:
3592 case CC_ZESWPmode:
3593 case CC_SESWPmode:
3594 switch (comp_code)
3595 {
3596 case NE: return AARCH64_NE;
3597 case EQ: return AARCH64_EQ;
3598 case GE: return AARCH64_LE;
3599 case GT: return AARCH64_LT;
3600 case LE: return AARCH64_GE;
3601 case LT: return AARCH64_GT;
3602 case GEU: return AARCH64_LS;
3603 case GTU: return AARCH64_CC;
3604 case LEU: return AARCH64_CS;
3605 case LTU: return AARCH64_HI;
cd5660ab 3606 default: return -1;
43e9d192
IB
3607 }
3608 break;
3609
3610 case CC_NZmode:
3611 switch (comp_code)
3612 {
3613 case NE: return AARCH64_NE;
3614 case EQ: return AARCH64_EQ;
3615 case GE: return AARCH64_PL;
3616 case LT: return AARCH64_MI;
cd5660ab 3617 default: return -1;
43e9d192
IB
3618 }
3619 break;
3620
1c992d1e
RE
3621 case CC_Zmode:
3622 switch (comp_code)
3623 {
3624 case NE: return AARCH64_NE;
3625 case EQ: return AARCH64_EQ;
cd5660ab 3626 default: return -1;
1c992d1e
RE
3627 }
3628 break;
3629
43e9d192 3630 default:
cd5660ab 3631 return -1;
43e9d192
IB
3632 break;
3633 }
3634}
3635
ddeabd3e
AL
3636bool
3637aarch64_const_vec_all_same_in_range_p (rtx x,
3638 HOST_WIDE_INT minval,
3639 HOST_WIDE_INT maxval)
3640{
3641 HOST_WIDE_INT firstval;
3642 int count, i;
3643
3644 if (GET_CODE (x) != CONST_VECTOR
3645 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3646 return false;
3647
3648 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3649 if (firstval < minval || firstval > maxval)
3650 return false;
3651
3652 count = CONST_VECTOR_NUNITS (x);
3653 for (i = 1; i < count; i++)
3654 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3655 return false;
3656
3657 return true;
3658}
3659
3660bool
3661aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3662{
3663 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3664}
3665
43e9d192
IB
3666static unsigned
3667bit_count (unsigned HOST_WIDE_INT value)
3668{
3669 unsigned count = 0;
3670
3671 while (value)
3672 {
3673 count++;
3674 value &= value - 1;
3675 }
3676
3677 return count;
3678}
3679
3680void
3681aarch64_print_operand (FILE *f, rtx x, char code)
3682{
3683 switch (code)
3684 {
f541a481
KT
3685 /* An integer or symbol address without a preceding # sign. */
3686 case 'c':
3687 switch (GET_CODE (x))
3688 {
3689 case CONST_INT:
3690 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3691 break;
3692
3693 case SYMBOL_REF:
3694 output_addr_const (f, x);
3695 break;
3696
3697 case CONST:
3698 if (GET_CODE (XEXP (x, 0)) == PLUS
3699 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3700 {
3701 output_addr_const (f, x);
3702 break;
3703 }
3704 /* Fall through. */
3705
3706 default:
3707 output_operand_lossage ("Unsupported operand for code '%c'", code);
3708 }
3709 break;
3710
43e9d192
IB
3711 case 'e':
3712 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3713 {
3714 int n;
3715
4aa81c2e 3716 if (!CONST_INT_P (x)
43e9d192
IB
3717 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3718 {
3719 output_operand_lossage ("invalid operand for '%%%c'", code);
3720 return;
3721 }
3722
3723 switch (n)
3724 {
3725 case 3:
3726 fputc ('b', f);
3727 break;
3728 case 4:
3729 fputc ('h', f);
3730 break;
3731 case 5:
3732 fputc ('w', f);
3733 break;
3734 default:
3735 output_operand_lossage ("invalid operand for '%%%c'", code);
3736 return;
3737 }
3738 }
3739 break;
3740
3741 case 'p':
3742 {
3743 int n;
3744
3745 /* Print N such that 2^N == X. */
4aa81c2e 3746 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
3747 {
3748 output_operand_lossage ("invalid operand for '%%%c'", code);
3749 return;
3750 }
3751
3752 asm_fprintf (f, "%d", n);
3753 }
3754 break;
3755
3756 case 'P':
3757 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 3758 if (!CONST_INT_P (x))
43e9d192
IB
3759 {
3760 output_operand_lossage ("invalid operand for '%%%c'", code);
3761 return;
3762 }
3763
3764 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3765 break;
3766
3767 case 'H':
3768 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 3769 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
3770 {
3771 output_operand_lossage ("invalid operand for '%%%c'", code);
3772 return;
3773 }
3774
01a3a324 3775 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3776 break;
3777
43e9d192 3778 case 'm':
cd5660ab
KT
3779 {
3780 int cond_code;
3781 /* Print a condition (eq, ne, etc). */
43e9d192 3782
cd5660ab
KT
3783 /* CONST_TRUE_RTX means always -- that's the default. */
3784 if (x == const_true_rtx)
43e9d192 3785 return;
43e9d192 3786
cd5660ab
KT
3787 if (!COMPARISON_P (x))
3788 {
3789 output_operand_lossage ("invalid operand for '%%%c'", code);
3790 return;
3791 }
3792
3793 cond_code = aarch64_get_condition_code (x);
3794 gcc_assert (cond_code >= 0);
3795 fputs (aarch64_condition_codes[cond_code], f);
3796 }
43e9d192
IB
3797 break;
3798
3799 case 'M':
cd5660ab
KT
3800 {
3801 int cond_code;
3802 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 3803
cd5660ab
KT
3804 /* CONST_TRUE_RTX means never -- that's the default. */
3805 if (x == const_true_rtx)
3806 {
3807 fputs ("nv", f);
3808 return;
3809 }
43e9d192 3810
cd5660ab
KT
3811 if (!COMPARISON_P (x))
3812 {
3813 output_operand_lossage ("invalid operand for '%%%c'", code);
3814 return;
3815 }
3816 cond_code = aarch64_get_condition_code (x);
3817 gcc_assert (cond_code >= 0);
3818 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3819 (cond_code)], f);
3820 }
43e9d192
IB
3821 break;
3822
3823 case 'b':
3824 case 'h':
3825 case 's':
3826 case 'd':
3827 case 'q':
3828 /* Print a scalar FP/SIMD register name. */
3829 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3830 {
3831 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3832 return;
3833 }
50ce6f88 3834 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3835 break;
3836
3837 case 'S':
3838 case 'T':
3839 case 'U':
3840 case 'V':
3841 /* Print the first FP/SIMD register name in a list. */
3842 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3843 {
3844 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3845 return;
3846 }
50ce6f88 3847 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3848 break;
3849
a05c0ddf 3850 case 'X':
50d38551 3851 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 3852 if (!CONST_INT_P (x))
a05c0ddf
IB
3853 {
3854 output_operand_lossage ("invalid operand for '%%%c'", code);
3855 return;
3856 }
50d38551 3857 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3858 break;
3859
43e9d192
IB
3860 case 'w':
3861 case 'x':
3862 /* Print a general register name or the zero register (32-bit or
3863 64-bit). */
3520f7cc
JG
3864 if (x == const0_rtx
3865 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3866 {
50ce6f88 3867 asm_fprintf (f, "%czr", code);
43e9d192
IB
3868 break;
3869 }
3870
3871 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3872 {
50ce6f88 3873 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3874 break;
3875 }
3876
3877 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3878 {
50ce6f88 3879 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3880 break;
3881 }
3882
3883 /* Fall through */
3884
3885 case 0:
3886 /* Print a normal operand, if it's a general register, then we
3887 assume DImode. */
3888 if (x == NULL)
3889 {
3890 output_operand_lossage ("missing operand");
3891 return;
3892 }
3893
3894 switch (GET_CODE (x))
3895 {
3896 case REG:
01a3a324 3897 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3898 break;
3899
3900 case MEM:
3901 aarch64_memory_reference_mode = GET_MODE (x);
3902 output_address (XEXP (x, 0));
3903 break;
3904
3905 case LABEL_REF:
3906 case SYMBOL_REF:
3907 output_addr_const (asm_out_file, x);
3908 break;
3909
3910 case CONST_INT:
3911 asm_fprintf (f, "%wd", INTVAL (x));
3912 break;
3913
3914 case CONST_VECTOR:
3520f7cc
JG
3915 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3916 {
ddeabd3e
AL
3917 gcc_assert (
3918 aarch64_const_vec_all_same_in_range_p (x,
3919 HOST_WIDE_INT_MIN,
3920 HOST_WIDE_INT_MAX));
3520f7cc
JG
3921 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3922 }
3923 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3924 {
3925 fputc ('0', f);
3926 }
3927 else
3928 gcc_unreachable ();
43e9d192
IB
3929 break;
3930
3520f7cc
JG
3931 case CONST_DOUBLE:
3932 /* CONST_DOUBLE can represent a double-width integer.
3933 In this case, the mode of x is VOIDmode. */
3934 if (GET_MODE (x) == VOIDmode)
3935 ; /* Do Nothing. */
3936 else if (aarch64_float_const_zero_rtx_p (x))
3937 {
3938 fputc ('0', f);
3939 break;
3940 }
3941 else if (aarch64_float_const_representable_p (x))
3942 {
3943#define buf_size 20
3944 char float_buf[buf_size] = {'\0'};
3945 REAL_VALUE_TYPE r;
3946 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3947 real_to_decimal_for_mode (float_buf, &r,
3948 buf_size, buf_size,
3949 1, GET_MODE (x));
3950 asm_fprintf (asm_out_file, "%s", float_buf);
3951 break;
3952#undef buf_size
3953 }
3954 output_operand_lossage ("invalid constant");
3955 return;
43e9d192
IB
3956 default:
3957 output_operand_lossage ("invalid operand");
3958 return;
3959 }
3960 break;
3961
3962 case 'A':
3963 if (GET_CODE (x) == HIGH)
3964 x = XEXP (x, 0);
3965
3966 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3967 {
3968 case SYMBOL_SMALL_GOT:
3969 asm_fprintf (asm_out_file, ":got:");
3970 break;
3971
3972 case SYMBOL_SMALL_TLSGD:
3973 asm_fprintf (asm_out_file, ":tlsgd:");
3974 break;
3975
3976 case SYMBOL_SMALL_TLSDESC:
3977 asm_fprintf (asm_out_file, ":tlsdesc:");
3978 break;
3979
3980 case SYMBOL_SMALL_GOTTPREL:
3981 asm_fprintf (asm_out_file, ":gottprel:");
3982 break;
3983
3984 case SYMBOL_SMALL_TPREL:
3985 asm_fprintf (asm_out_file, ":tprel:");
3986 break;
3987
87dd8ab0
MS
3988 case SYMBOL_TINY_GOT:
3989 gcc_unreachable ();
3990 break;
3991
43e9d192
IB
3992 default:
3993 break;
3994 }
3995 output_addr_const (asm_out_file, x);
3996 break;
3997
3998 case 'L':
3999 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4000 {
4001 case SYMBOL_SMALL_GOT:
4002 asm_fprintf (asm_out_file, ":lo12:");
4003 break;
4004
4005 case SYMBOL_SMALL_TLSGD:
4006 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4007 break;
4008
4009 case SYMBOL_SMALL_TLSDESC:
4010 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4011 break;
4012
4013 case SYMBOL_SMALL_GOTTPREL:
4014 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4015 break;
4016
4017 case SYMBOL_SMALL_TPREL:
4018 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4019 break;
4020
87dd8ab0
MS
4021 case SYMBOL_TINY_GOT:
4022 asm_fprintf (asm_out_file, ":got:");
4023 break;
4024
43e9d192
IB
4025 default:
4026 break;
4027 }
4028 output_addr_const (asm_out_file, x);
4029 break;
4030
4031 case 'G':
4032
4033 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4034 {
4035 case SYMBOL_SMALL_TPREL:
4036 asm_fprintf (asm_out_file, ":tprel_hi12:");
4037 break;
4038 default:
4039 break;
4040 }
4041 output_addr_const (asm_out_file, x);
4042 break;
4043
4044 default:
4045 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4046 return;
4047 }
4048}
4049
4050void
4051aarch64_print_operand_address (FILE *f, rtx x)
4052{
4053 struct aarch64_address_info addr;
4054
4055 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4056 MEM, true))
4057 switch (addr.type)
4058 {
4059 case ADDRESS_REG_IMM:
4060 if (addr.offset == const0_rtx)
01a3a324 4061 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4062 else
16a3246f 4063 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4064 INTVAL (addr.offset));
4065 return;
4066
4067 case ADDRESS_REG_REG:
4068 if (addr.shift == 0)
16a3246f 4069 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4070 reg_names [REGNO (addr.offset)]);
43e9d192 4071 else
16a3246f 4072 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4073 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4074 return;
4075
4076 case ADDRESS_REG_UXTW:
4077 if (addr.shift == 0)
16a3246f 4078 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4079 REGNO (addr.offset) - R0_REGNUM);
4080 else
16a3246f 4081 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4082 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4083 return;
4084
4085 case ADDRESS_REG_SXTW:
4086 if (addr.shift == 0)
16a3246f 4087 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4088 REGNO (addr.offset) - R0_REGNUM);
4089 else
16a3246f 4090 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4091 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4092 return;
4093
4094 case ADDRESS_REG_WB:
4095 switch (GET_CODE (x))
4096 {
4097 case PRE_INC:
16a3246f 4098 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4099 GET_MODE_SIZE (aarch64_memory_reference_mode));
4100 return;
4101 case POST_INC:
16a3246f 4102 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4103 GET_MODE_SIZE (aarch64_memory_reference_mode));
4104 return;
4105 case PRE_DEC:
16a3246f 4106 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4107 GET_MODE_SIZE (aarch64_memory_reference_mode));
4108 return;
4109 case POST_DEC:
16a3246f 4110 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4111 GET_MODE_SIZE (aarch64_memory_reference_mode));
4112 return;
4113 case PRE_MODIFY:
16a3246f 4114 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4115 INTVAL (addr.offset));
4116 return;
4117 case POST_MODIFY:
16a3246f 4118 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4119 INTVAL (addr.offset));
4120 return;
4121 default:
4122 break;
4123 }
4124 break;
4125
4126 case ADDRESS_LO_SUM:
16a3246f 4127 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4128 output_addr_const (f, addr.offset);
4129 asm_fprintf (f, "]");
4130 return;
4131
4132 case ADDRESS_SYMBOLIC:
4133 break;
4134 }
4135
4136 output_addr_const (f, x);
4137}
4138
43e9d192
IB
4139bool
4140aarch64_label_mentioned_p (rtx x)
4141{
4142 const char *fmt;
4143 int i;
4144
4145 if (GET_CODE (x) == LABEL_REF)
4146 return true;
4147
4148 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4149 referencing instruction, but they are constant offsets, not
4150 symbols. */
4151 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4152 return false;
4153
4154 fmt = GET_RTX_FORMAT (GET_CODE (x));
4155 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4156 {
4157 if (fmt[i] == 'E')
4158 {
4159 int j;
4160
4161 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4162 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4163 return 1;
4164 }
4165 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4166 return 1;
4167 }
4168
4169 return 0;
4170}
4171
4172/* Implement REGNO_REG_CLASS. */
4173
4174enum reg_class
4175aarch64_regno_regclass (unsigned regno)
4176{
4177 if (GP_REGNUM_P (regno))
a4a182c6 4178 return GENERAL_REGS;
43e9d192
IB
4179
4180 if (regno == SP_REGNUM)
4181 return STACK_REG;
4182
4183 if (regno == FRAME_POINTER_REGNUM
4184 || regno == ARG_POINTER_REGNUM)
f24bb080 4185 return POINTER_REGS;
43e9d192
IB
4186
4187 if (FP_REGNUM_P (regno))
4188 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4189
4190 return NO_REGS;
4191}
4192
0c4ec427
RE
4193static rtx
4194aarch64_legitimize_address (rtx x, rtx /* orig_x */, enum machine_mode mode)
4195{
4196 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4197 where mask is selected by alignment and size of the offset.
4198 We try to pick as large a range for the offset as possible to
4199 maximize the chance of a CSE. However, for aligned addresses
4200 we limit the range to 4k so that structures with different sized
4201 elements are likely to use the same base. */
4202
4203 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4204 {
4205 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4206 HOST_WIDE_INT base_offset;
4207
4208 /* Does it look like we'll need a load/store-pair operation? */
4209 if (GET_MODE_SIZE (mode) > 16
4210 || mode == TImode)
4211 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4212 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4213 /* For offsets aren't a multiple of the access size, the limit is
4214 -256...255. */
4215 else if (offset & (GET_MODE_SIZE (mode) - 1))
4216 base_offset = (offset + 0x100) & ~0x1ff;
4217 else
4218 base_offset = offset & ~0xfff;
4219
4220 if (base_offset == 0)
4221 return x;
4222
4223 offset -= base_offset;
4224 rtx base_reg = gen_reg_rtx (Pmode);
4225 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4226 NULL_RTX);
4227 emit_move_insn (base_reg, val);
4228 x = plus_constant (Pmode, base_reg, offset);
4229 }
4230
4231 return x;
4232}
4233
43e9d192
IB
4234/* Try a machine-dependent way of reloading an illegitimate address
4235 operand. If we find one, push the reload and return the new rtx. */
4236
4237rtx
4238aarch64_legitimize_reload_address (rtx *x_p,
4239 enum machine_mode mode,
4240 int opnum, int type,
4241 int ind_levels ATTRIBUTE_UNUSED)
4242{
4243 rtx x = *x_p;
4244
348d4b0a
BC
4245 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4246 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4247 && GET_CODE (x) == PLUS
4248 && REG_P (XEXP (x, 0))
4249 && CONST_INT_P (XEXP (x, 1)))
4250 {
4251 rtx orig_rtx = x;
4252 x = copy_rtx (x);
4253 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4254 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4255 opnum, (enum reload_type) type);
4256 return x;
4257 }
4258
4259 /* We must recognize output that we have already generated ourselves. */
4260 if (GET_CODE (x) == PLUS
4261 && GET_CODE (XEXP (x, 0)) == PLUS
4262 && REG_P (XEXP (XEXP (x, 0), 0))
4263 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4264 && CONST_INT_P (XEXP (x, 1)))
4265 {
4266 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4267 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4268 opnum, (enum reload_type) type);
4269 return x;
4270 }
4271
4272 /* We wish to handle large displacements off a base register by splitting
4273 the addend across an add and the mem insn. This can cut the number of
4274 extra insns needed from 3 to 1. It is only useful for load/store of a
4275 single register with 12 bit offset field. */
4276 if (GET_CODE (x) == PLUS
4277 && REG_P (XEXP (x, 0))
4278 && CONST_INT_P (XEXP (x, 1))
4279 && HARD_REGISTER_P (XEXP (x, 0))
4280 && mode != TImode
4281 && mode != TFmode
4282 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4283 {
4284 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4285 HOST_WIDE_INT low = val & 0xfff;
4286 HOST_WIDE_INT high = val - low;
4287 HOST_WIDE_INT offs;
4288 rtx cst;
28514dda
YZ
4289 enum machine_mode xmode = GET_MODE (x);
4290
4291 /* In ILP32, xmode can be either DImode or SImode. */
4292 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4293
4294 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4295 BLKmode alignment. */
4296 if (GET_MODE_SIZE (mode) == 0)
4297 return NULL_RTX;
4298
4299 offs = low % GET_MODE_SIZE (mode);
4300
4301 /* Align misaligned offset by adjusting high part to compensate. */
4302 if (offs != 0)
4303 {
4304 if (aarch64_uimm12_shift (high + offs))
4305 {
4306 /* Align down. */
4307 low = low - offs;
4308 high = high + offs;
4309 }
4310 else
4311 {
4312 /* Align up. */
4313 offs = GET_MODE_SIZE (mode) - offs;
4314 low = low + offs;
4315 high = high + (low & 0x1000) - offs;
4316 low &= 0xfff;
4317 }
4318 }
4319
4320 /* Check for overflow. */
4321 if (high + low != val)
4322 return NULL_RTX;
4323
4324 cst = GEN_INT (high);
4325 if (!aarch64_uimm12_shift (high))
28514dda 4326 cst = force_const_mem (xmode, cst);
43e9d192
IB
4327
4328 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4329 in the mem instruction.
4330 Note that replacing this gen_rtx_PLUS with plus_constant is
4331 wrong in this case because we rely on the
4332 (plus (plus reg c1) c2) structure being preserved so that
4333 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4334 x = gen_rtx_PLUS (xmode,
4335 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4336 GEN_INT (low));
43e9d192
IB
4337
4338 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4339 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4340 opnum, (enum reload_type) type);
4341 return x;
4342 }
4343
4344 return NULL_RTX;
4345}
4346
4347
4348static reg_class_t
4349aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4350 reg_class_t rclass,
4351 enum machine_mode mode,
4352 secondary_reload_info *sri)
4353{
43e9d192
IB
4354 /* Without the TARGET_SIMD instructions we cannot move a Q register
4355 to a Q register directly. We need a scratch. */
4356 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4357 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4358 && reg_class_subset_p (rclass, FP_REGS))
4359 {
4360 if (mode == TFmode)
4361 sri->icode = CODE_FOR_aarch64_reload_movtf;
4362 else if (mode == TImode)
4363 sri->icode = CODE_FOR_aarch64_reload_movti;
4364 return NO_REGS;
4365 }
4366
4367 /* A TFmode or TImode memory access should be handled via an FP_REGS
4368 because AArch64 has richer addressing modes for LDR/STR instructions
4369 than LDP/STP instructions. */
a4a182c6 4370 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4371 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4372 return FP_REGS;
4373
4374 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4375 return GENERAL_REGS;
43e9d192
IB
4376
4377 return NO_REGS;
4378}
4379
4380static bool
4381aarch64_can_eliminate (const int from, const int to)
4382{
4383 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4384 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4385
4386 if (frame_pointer_needed)
4387 {
4388 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4389 return true;
4390 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4391 return false;
4392 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4393 && !cfun->calls_alloca)
4394 return true;
4395 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4396 return true;
0b7f8166
MS
4397
4398 return false;
43e9d192 4399 }
777e6976 4400
43e9d192
IB
4401 return true;
4402}
4403
4404HOST_WIDE_INT
4405aarch64_initial_elimination_offset (unsigned from, unsigned to)
4406{
43e9d192 4407 aarch64_layout_frame ();
78c29983
MS
4408
4409 if (to == HARD_FRAME_POINTER_REGNUM)
4410 {
4411 if (from == ARG_POINTER_REGNUM)
1c960e02 4412 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4413
4414 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4415 return (cfun->machine->frame.hard_fp_offset
4416 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4417 }
4418
4419 if (to == STACK_POINTER_REGNUM)
4420 {
4421 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4422 return (cfun->machine->frame.frame_size
4423 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4424 }
4425
1c960e02 4426 return cfun->machine->frame.frame_size;
43e9d192
IB
4427}
4428
43e9d192
IB
4429/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4430 previous frame. */
4431
4432rtx
4433aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4434{
4435 if (count != 0)
4436 return const0_rtx;
4437 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4438}
4439
4440
4441static void
4442aarch64_asm_trampoline_template (FILE *f)
4443{
28514dda
YZ
4444 if (TARGET_ILP32)
4445 {
4446 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4447 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4448 }
4449 else
4450 {
4451 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4452 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4453 }
01a3a324 4454 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4455 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4456 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4457 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4458}
4459
4460static void
4461aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4462{
4463 rtx fnaddr, mem, a_tramp;
28514dda 4464 const int tramp_code_sz = 16;
43e9d192
IB
4465
4466 /* Don't need to copy the trailing D-words, we fill those in below. */
4467 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4468 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4469 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4470 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4471 if (GET_MODE (fnaddr) != ptr_mode)
4472 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4473 emit_move_insn (mem, fnaddr);
4474
28514dda 4475 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4476 emit_move_insn (mem, chain_value);
4477
4478 /* XXX We should really define a "clear_cache" pattern and use
4479 gen_clear_cache(). */
4480 a_tramp = XEXP (m_tramp, 0);
4481 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4482 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4483 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4484 ptr_mode);
43e9d192
IB
4485}
4486
4487static unsigned char
4488aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4489{
4490 switch (regclass)
4491 {
fee9ba42 4492 case CALLER_SAVE_REGS:
43e9d192
IB
4493 case POINTER_REGS:
4494 case GENERAL_REGS:
4495 case ALL_REGS:
4496 case FP_REGS:
4497 case FP_LO_REGS:
4498 return
4499 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4500 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4501 case STACK_REG:
4502 return 1;
4503
4504 case NO_REGS:
4505 return 0;
4506
4507 default:
4508 break;
4509 }
4510 gcc_unreachable ();
4511}
4512
4513static reg_class_t
78d8b9f0 4514aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4515{
51bb310d 4516 if (regclass == POINTER_REGS)
78d8b9f0
IB
4517 return GENERAL_REGS;
4518
51bb310d
MS
4519 if (regclass == STACK_REG)
4520 {
4521 if (REG_P(x)
4522 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4523 return regclass;
4524
4525 return NO_REGS;
4526 }
4527
78d8b9f0
IB
4528 /* If it's an integer immediate that MOVI can't handle, then
4529 FP_REGS is not an option, so we return NO_REGS instead. */
4530 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4531 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4532 return NO_REGS;
4533
27bd251b
IB
4534 /* Register eliminiation can result in a request for
4535 SP+constant->FP_REGS. We cannot support such operations which
4536 use SP as source and an FP_REG as destination, so reject out
4537 right now. */
4538 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4539 {
4540 rtx lhs = XEXP (x, 0);
4541
4542 /* Look through a possible SUBREG introduced by ILP32. */
4543 if (GET_CODE (lhs) == SUBREG)
4544 lhs = SUBREG_REG (lhs);
4545
4546 gcc_assert (REG_P (lhs));
4547 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4548 POINTER_REGS));
4549 return NO_REGS;
4550 }
4551
78d8b9f0 4552 return regclass;
43e9d192
IB
4553}
4554
4555void
4556aarch64_asm_output_labelref (FILE* f, const char *name)
4557{
4558 asm_fprintf (f, "%U%s", name);
4559}
4560
4561static void
4562aarch64_elf_asm_constructor (rtx symbol, int priority)
4563{
4564 if (priority == DEFAULT_INIT_PRIORITY)
4565 default_ctor_section_asm_out_constructor (symbol, priority);
4566 else
4567 {
4568 section *s;
4569 char buf[18];
4570 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4571 s = get_section (buf, SECTION_WRITE, NULL);
4572 switch_to_section (s);
4573 assemble_align (POINTER_SIZE);
28514dda 4574 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4575 }
4576}
4577
4578static void
4579aarch64_elf_asm_destructor (rtx symbol, int priority)
4580{
4581 if (priority == DEFAULT_INIT_PRIORITY)
4582 default_dtor_section_asm_out_destructor (symbol, priority);
4583 else
4584 {
4585 section *s;
4586 char buf[18];
4587 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4588 s = get_section (buf, SECTION_WRITE, NULL);
4589 switch_to_section (s);
4590 assemble_align (POINTER_SIZE);
28514dda 4591 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4592 }
4593}
4594
4595const char*
4596aarch64_output_casesi (rtx *operands)
4597{
4598 char buf[100];
4599 char label[100];
b32d5189 4600 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
4601 int index;
4602 static const char *const patterns[4][2] =
4603 {
4604 {
4605 "ldrb\t%w3, [%0,%w1,uxtw]",
4606 "add\t%3, %4, %w3, sxtb #2"
4607 },
4608 {
4609 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4610 "add\t%3, %4, %w3, sxth #2"
4611 },
4612 {
4613 "ldr\t%w3, [%0,%w1,uxtw #2]",
4614 "add\t%3, %4, %w3, sxtw #2"
4615 },
4616 /* We assume that DImode is only generated when not optimizing and
4617 that we don't really need 64-bit address offsets. That would
4618 imply an object file with 8GB of code in a single function! */
4619 {
4620 "ldr\t%w3, [%0,%w1,uxtw #2]",
4621 "add\t%3, %4, %w3, sxtw #2"
4622 }
4623 };
4624
4625 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4626
4627 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4628
4629 gcc_assert (index >= 0 && index <= 3);
4630
4631 /* Need to implement table size reduction, by chaning the code below. */
4632 output_asm_insn (patterns[index][0], operands);
4633 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4634 snprintf (buf, sizeof (buf),
4635 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4636 output_asm_insn (buf, operands);
4637 output_asm_insn (patterns[index][1], operands);
4638 output_asm_insn ("br\t%3", operands);
4639 assemble_label (asm_out_file, label);
4640 return "";
4641}
4642
4643
4644/* Return size in bits of an arithmetic operand which is shifted/scaled and
4645 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4646 operator. */
4647
4648int
4649aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4650{
4651 if (shift >= 0 && shift <= 3)
4652 {
4653 int size;
4654 for (size = 8; size <= 32; size *= 2)
4655 {
4656 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4657 if (mask == bits << shift)
4658 return size;
4659 }
4660 }
4661 return 0;
4662}
4663
4664static bool
4665aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4666 const_rtx x ATTRIBUTE_UNUSED)
4667{
4668 /* We can't use blocks for constants when we're using a per-function
4669 constant pool. */
4670 return false;
4671}
4672
4673static section *
4674aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4675 rtx x ATTRIBUTE_UNUSED,
4676 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4677{
4678 /* Force all constant pool entries into the current function section. */
4679 return function_section (current_function_decl);
4680}
4681
4682
4683/* Costs. */
4684
4685/* Helper function for rtx cost calculation. Strip a shift expression
4686 from X. Returns the inner operand if successful, or the original
4687 expression on failure. */
4688static rtx
4689aarch64_strip_shift (rtx x)
4690{
4691 rtx op = x;
4692
57b77d46
RE
4693 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4694 we can convert both to ROR during final output. */
43e9d192
IB
4695 if ((GET_CODE (op) == ASHIFT
4696 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
4697 || GET_CODE (op) == LSHIFTRT
4698 || GET_CODE (op) == ROTATERT
4699 || GET_CODE (op) == ROTATE)
43e9d192
IB
4700 && CONST_INT_P (XEXP (op, 1)))
4701 return XEXP (op, 0);
4702
4703 if (GET_CODE (op) == MULT
4704 && CONST_INT_P (XEXP (op, 1))
4705 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4706 return XEXP (op, 0);
4707
4708 return x;
4709}
4710
4745e701 4711/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
4712 expression from X. Returns the inner operand if successful, or the
4713 original expression on failure. We deal with a number of possible
4714 canonicalization variations here. */
4715static rtx
4745e701 4716aarch64_strip_extend (rtx x)
43e9d192
IB
4717{
4718 rtx op = x;
4719
4720 /* Zero and sign extraction of a widened value. */
4721 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4722 && XEXP (op, 2) == const0_rtx
4745e701 4723 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
4724 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4725 XEXP (op, 1)))
4726 return XEXP (XEXP (op, 0), 0);
4727
4728 /* It can also be represented (for zero-extend) as an AND with an
4729 immediate. */
4730 if (GET_CODE (op) == AND
4731 && GET_CODE (XEXP (op, 0)) == MULT
4732 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4733 && CONST_INT_P (XEXP (op, 1))
4734 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4735 INTVAL (XEXP (op, 1))) != 0)
4736 return XEXP (XEXP (op, 0), 0);
4737
4738 /* Now handle extended register, as this may also have an optional
4739 left shift by 1..4. */
4740 if (GET_CODE (op) == ASHIFT
4741 && CONST_INT_P (XEXP (op, 1))
4742 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4743 op = XEXP (op, 0);
4744
4745 if (GET_CODE (op) == ZERO_EXTEND
4746 || GET_CODE (op) == SIGN_EXTEND)
4747 op = XEXP (op, 0);
4748
4749 if (op != x)
4750 return op;
4751
4745e701
JG
4752 return x;
4753}
4754
4755/* Helper function for rtx cost calculation. Calculate the cost of
4756 a MULT, which may be part of a multiply-accumulate rtx. Return
4757 the calculated cost of the expression, recursing manually in to
4758 operands where needed. */
4759
4760static int
4761aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4762{
4763 rtx op0, op1;
4764 const struct cpu_cost_table *extra_cost
4765 = aarch64_tune_params->insn_extra_cost;
4766 int cost = 0;
4767 bool maybe_fma = (outer == PLUS || outer == MINUS);
4768 enum machine_mode mode = GET_MODE (x);
4769
4770 gcc_checking_assert (code == MULT);
4771
4772 op0 = XEXP (x, 0);
4773 op1 = XEXP (x, 1);
4774
4775 if (VECTOR_MODE_P (mode))
4776 mode = GET_MODE_INNER (mode);
4777
4778 /* Integer multiply/fma. */
4779 if (GET_MODE_CLASS (mode) == MODE_INT)
4780 {
4781 /* The multiply will be canonicalized as a shift, cost it as such. */
4782 if (CONST_INT_P (op1)
4783 && exact_log2 (INTVAL (op1)) > 0)
4784 {
4785 if (speed)
4786 {
4787 if (maybe_fma)
4788 /* ADD (shifted register). */
4789 cost += extra_cost->alu.arith_shift;
4790 else
4791 /* LSL (immediate). */
4792 cost += extra_cost->alu.shift;
4793 }
4794
4795 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4796
4797 return cost;
4798 }
4799
4800 /* Integer multiplies or FMAs have zero/sign extending variants. */
4801 if ((GET_CODE (op0) == ZERO_EXTEND
4802 && GET_CODE (op1) == ZERO_EXTEND)
4803 || (GET_CODE (op0) == SIGN_EXTEND
4804 && GET_CODE (op1) == SIGN_EXTEND))
4805 {
4806 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4807 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4808
4809 if (speed)
4810 {
4811 if (maybe_fma)
4812 /* MADD/SMADDL/UMADDL. */
4813 cost += extra_cost->mult[0].extend_add;
4814 else
4815 /* MUL/SMULL/UMULL. */
4816 cost += extra_cost->mult[0].extend;
4817 }
4818
4819 return cost;
4820 }
4821
4822 /* This is either an integer multiply or an FMA. In both cases
4823 we want to recurse and cost the operands. */
4824 cost += rtx_cost (op0, MULT, 0, speed)
4825 + rtx_cost (op1, MULT, 1, speed);
4826
4827 if (speed)
4828 {
4829 if (maybe_fma)
4830 /* MADD. */
4831 cost += extra_cost->mult[mode == DImode].add;
4832 else
4833 /* MUL. */
4834 cost += extra_cost->mult[mode == DImode].simple;
4835 }
4836
4837 return cost;
4838 }
4839 else
4840 {
4841 if (speed)
4842 {
3d840f7d 4843 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
4844 operands. */
4845 if (GET_CODE (op0) == NEG)
3d840f7d 4846 op0 = XEXP (op0, 0);
4745e701 4847 if (GET_CODE (op1) == NEG)
3d840f7d 4848 op1 = XEXP (op1, 0);
4745e701
JG
4849
4850 if (maybe_fma)
4851 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4852 cost += extra_cost->fp[mode == DFmode].fma;
4853 else
3d840f7d 4854 /* FMUL/FNMUL. */
4745e701
JG
4855 cost += extra_cost->fp[mode == DFmode].mult;
4856 }
4857
4858 cost += rtx_cost (op0, MULT, 0, speed)
4859 + rtx_cost (op1, MULT, 1, speed);
4860 return cost;
4861 }
43e9d192
IB
4862}
4863
67747367
JG
4864static int
4865aarch64_address_cost (rtx x,
4866 enum machine_mode mode,
4867 addr_space_t as ATTRIBUTE_UNUSED,
4868 bool speed)
4869{
4870 enum rtx_code c = GET_CODE (x);
4871 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4872 struct aarch64_address_info info;
4873 int cost = 0;
4874 info.shift = 0;
4875
4876 if (!aarch64_classify_address (&info, x, mode, c, false))
4877 {
4878 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4879 {
4880 /* This is a CONST or SYMBOL ref which will be split
4881 in a different way depending on the code model in use.
4882 Cost it through the generic infrastructure. */
4883 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4884 /* Divide through by the cost of one instruction to
4885 bring it to the same units as the address costs. */
4886 cost_symbol_ref /= COSTS_N_INSNS (1);
4887 /* The cost is then the cost of preparing the address,
4888 followed by an immediate (possibly 0) offset. */
4889 return cost_symbol_ref + addr_cost->imm_offset;
4890 }
4891 else
4892 {
4893 /* This is most likely a jump table from a case
4894 statement. */
4895 return addr_cost->register_offset;
4896 }
4897 }
4898
4899 switch (info.type)
4900 {
4901 case ADDRESS_LO_SUM:
4902 case ADDRESS_SYMBOLIC:
4903 case ADDRESS_REG_IMM:
4904 cost += addr_cost->imm_offset;
4905 break;
4906
4907 case ADDRESS_REG_WB:
4908 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4909 cost += addr_cost->pre_modify;
4910 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4911 cost += addr_cost->post_modify;
4912 else
4913 gcc_unreachable ();
4914
4915 break;
4916
4917 case ADDRESS_REG_REG:
4918 cost += addr_cost->register_offset;
4919 break;
4920
4921 case ADDRESS_REG_UXTW:
4922 case ADDRESS_REG_SXTW:
4923 cost += addr_cost->register_extend;
4924 break;
4925
4926 default:
4927 gcc_unreachable ();
4928 }
4929
4930
4931 if (info.shift > 0)
4932 {
4933 /* For the sake of calculating the cost of the shifted register
4934 component, we can treat same sized modes in the same way. */
4935 switch (GET_MODE_BITSIZE (mode))
4936 {
4937 case 16:
4938 cost += addr_cost->addr_scale_costs.hi;
4939 break;
4940
4941 case 32:
4942 cost += addr_cost->addr_scale_costs.si;
4943 break;
4944
4945 case 64:
4946 cost += addr_cost->addr_scale_costs.di;
4947 break;
4948
4949 /* We can't tell, or this is a 128-bit vector. */
4950 default:
4951 cost += addr_cost->addr_scale_costs.ti;
4952 break;
4953 }
4954 }
4955
4956 return cost;
4957}
4958
7cc2145f
JG
4959/* Return true if the RTX X in mode MODE is a zero or sign extract
4960 usable in an ADD or SUB (extended register) instruction. */
4961static bool
4962aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4963{
4964 /* Catch add with a sign extract.
4965 This is add_<optab><mode>_multp2. */
4966 if (GET_CODE (x) == SIGN_EXTRACT
4967 || GET_CODE (x) == ZERO_EXTRACT)
4968 {
4969 rtx op0 = XEXP (x, 0);
4970 rtx op1 = XEXP (x, 1);
4971 rtx op2 = XEXP (x, 2);
4972
4973 if (GET_CODE (op0) == MULT
4974 && CONST_INT_P (op1)
4975 && op2 == const0_rtx
4976 && CONST_INT_P (XEXP (op0, 1))
4977 && aarch64_is_extend_from_extract (mode,
4978 XEXP (op0, 1),
4979 op1))
4980 {
4981 return true;
4982 }
4983 }
4984
4985 return false;
4986}
4987
61263118
KT
4988static bool
4989aarch64_frint_unspec_p (unsigned int u)
4990{
4991 switch (u)
4992 {
4993 case UNSPEC_FRINTZ:
4994 case UNSPEC_FRINTP:
4995 case UNSPEC_FRINTM:
4996 case UNSPEC_FRINTA:
4997 case UNSPEC_FRINTN:
4998 case UNSPEC_FRINTX:
4999 case UNSPEC_FRINTI:
5000 return true;
5001
5002 default:
5003 return false;
5004 }
5005}
5006
2d5ffe46
AP
5007/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5008 storing it in *COST. Result is true if the total cost of the operation
5009 has now been calculated. */
5010static bool
5011aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5012{
b9e3afe9
AP
5013 rtx inner;
5014 rtx comparator;
5015 enum rtx_code cmpcode;
5016
5017 if (COMPARISON_P (op0))
5018 {
5019 inner = XEXP (op0, 0);
5020 comparator = XEXP (op0, 1);
5021 cmpcode = GET_CODE (op0);
5022 }
5023 else
5024 {
5025 inner = op0;
5026 comparator = const0_rtx;
5027 cmpcode = NE;
5028 }
5029
2d5ffe46
AP
5030 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5031 {
5032 /* Conditional branch. */
b9e3afe9 5033 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5034 return true;
5035 else
5036 {
b9e3afe9 5037 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 5038 {
2d5ffe46
AP
5039 if (comparator == const0_rtx)
5040 {
5041 /* TBZ/TBNZ/CBZ/CBNZ. */
5042 if (GET_CODE (inner) == ZERO_EXTRACT)
5043 /* TBZ/TBNZ. */
5044 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5045 0, speed);
5046 else
5047 /* CBZ/CBNZ. */
b9e3afe9 5048 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
5049
5050 return true;
5051 }
5052 }
b9e3afe9 5053 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 5054 {
2d5ffe46
AP
5055 /* TBZ/TBNZ. */
5056 if (comparator == const0_rtx)
5057 return true;
5058 }
5059 }
5060 }
b9e3afe9 5061 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
5062 {
5063 /* It's a conditional operation based on the status flags,
5064 so it must be some flavor of CSEL. */
5065
5066 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5067 if (GET_CODE (op1) == NEG
5068 || GET_CODE (op1) == NOT
5069 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5070 op1 = XEXP (op1, 0);
5071
5072 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5073 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5074 return true;
5075 }
5076
5077 /* We don't know what this is, cost all operands. */
5078 return false;
5079}
5080
43e9d192
IB
5081/* Calculate the cost of calculating X, storing it in *COST. Result
5082 is true if the total cost of the operation has now been calculated. */
5083static bool
5084aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5085 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5086{
a8eecd00 5087 rtx op0, op1, op2;
73250c4c 5088 const struct cpu_cost_table *extra_cost
43e9d192 5089 = aarch64_tune_params->insn_extra_cost;
9dfc162c 5090 enum machine_mode mode = GET_MODE (x);
43e9d192 5091
7fc5ef02
JG
5092 /* By default, assume that everything has equivalent cost to the
5093 cheapest instruction. Any additional costs are applied as a delta
5094 above this default. */
5095 *cost = COSTS_N_INSNS (1);
5096
5097 /* TODO: The cost infrastructure currently does not handle
5098 vector operations. Assume that all vector operations
5099 are equally expensive. */
5100 if (VECTOR_MODE_P (mode))
5101 {
5102 if (speed)
5103 *cost += extra_cost->vect.alu;
5104 return true;
5105 }
5106
43e9d192
IB
5107 switch (code)
5108 {
5109 case SET:
ba123b0d
JG
5110 /* The cost depends entirely on the operands to SET. */
5111 *cost = 0;
43e9d192
IB
5112 op0 = SET_DEST (x);
5113 op1 = SET_SRC (x);
5114
5115 switch (GET_CODE (op0))
5116 {
5117 case MEM:
5118 if (speed)
2961177e
JG
5119 {
5120 rtx address = XEXP (op0, 0);
5121 if (GET_MODE_CLASS (mode) == MODE_INT)
5122 *cost += extra_cost->ldst.store;
5123 else if (mode == SFmode)
5124 *cost += extra_cost->ldst.storef;
5125 else if (mode == DFmode)
5126 *cost += extra_cost->ldst.stored;
5127
5128 *cost +=
5129 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5130 0, speed));
5131 }
43e9d192 5132
ba123b0d 5133 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5134 return true;
5135
5136 case SUBREG:
5137 if (! REG_P (SUBREG_REG (op0)))
5138 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5139
43e9d192
IB
5140 /* Fall through. */
5141 case REG:
ba123b0d
JG
5142 /* const0_rtx is in general free, but we will use an
5143 instruction to set a register to 0. */
5144 if (REG_P (op1) || op1 == const0_rtx)
5145 {
5146 /* The cost is 1 per register copied. */
5147 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5148 / UNITS_PER_WORD;
5149 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5150 }
5151 else
5152 /* Cost is just the cost of the RHS of the set. */
5153 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5154 return true;
5155
ba123b0d 5156 case ZERO_EXTRACT:
43e9d192 5157 case SIGN_EXTRACT:
ba123b0d
JG
5158 /* Bit-field insertion. Strip any redundant widening of
5159 the RHS to meet the width of the target. */
43e9d192
IB
5160 if (GET_CODE (op1) == SUBREG)
5161 op1 = SUBREG_REG (op1);
5162 if ((GET_CODE (op1) == ZERO_EXTEND
5163 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5164 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5165 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5166 >= INTVAL (XEXP (op0, 1))))
5167 op1 = XEXP (op1, 0);
ba123b0d
JG
5168
5169 if (CONST_INT_P (op1))
5170 {
5171 /* MOV immediate is assumed to always be cheap. */
5172 *cost = COSTS_N_INSNS (1);
5173 }
5174 else
5175 {
5176 /* BFM. */
5177 if (speed)
5178 *cost += extra_cost->alu.bfi;
5179 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5180 }
5181
43e9d192
IB
5182 return true;
5183
5184 default:
ba123b0d
JG
5185 /* We can't make sense of this, assume default cost. */
5186 *cost = COSTS_N_INSNS (1);
61263118 5187 return false;
43e9d192
IB
5188 }
5189 return false;
5190
9dfc162c
JG
5191 case CONST_INT:
5192 /* If an instruction can incorporate a constant within the
5193 instruction, the instruction's expression avoids calling
5194 rtx_cost() on the constant. If rtx_cost() is called on a
5195 constant, then it is usually because the constant must be
5196 moved into a register by one or more instructions.
5197
5198 The exception is constant 0, which can be expressed
5199 as XZR/WZR and is therefore free. The exception to this is
5200 if we have (set (reg) (const0_rtx)) in which case we must cost
5201 the move. However, we can catch that when we cost the SET, so
5202 we don't need to consider that here. */
5203 if (x == const0_rtx)
5204 *cost = 0;
5205 else
5206 {
5207 /* To an approximation, building any other constant is
5208 proportionally expensive to the number of instructions
5209 required to build that constant. This is true whether we
5210 are compiling for SPEED or otherwise. */
5211 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5212 INTVAL (x),
5213 false));
5214 }
5215 return true;
5216
5217 case CONST_DOUBLE:
5218 if (speed)
5219 {
5220 /* mov[df,sf]_aarch64. */
5221 if (aarch64_float_const_representable_p (x))
5222 /* FMOV (scalar immediate). */
5223 *cost += extra_cost->fp[mode == DFmode].fpconst;
5224 else if (!aarch64_float_const_zero_rtx_p (x))
5225 {
5226 /* This will be a load from memory. */
5227 if (mode == DFmode)
5228 *cost += extra_cost->ldst.loadd;
5229 else
5230 *cost += extra_cost->ldst.loadf;
5231 }
5232 else
5233 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5234 or MOV v0.s[0], wzr - neither of which are modeled by the
5235 cost tables. Just use the default cost. */
5236 {
5237 }
5238 }
5239
5240 return true;
5241
43e9d192
IB
5242 case MEM:
5243 if (speed)
2961177e
JG
5244 {
5245 /* For loads we want the base cost of a load, plus an
5246 approximation for the additional cost of the addressing
5247 mode. */
5248 rtx address = XEXP (x, 0);
5249 if (GET_MODE_CLASS (mode) == MODE_INT)
5250 *cost += extra_cost->ldst.load;
5251 else if (mode == SFmode)
5252 *cost += extra_cost->ldst.loadf;
5253 else if (mode == DFmode)
5254 *cost += extra_cost->ldst.loadd;
5255
5256 *cost +=
5257 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5258 0, speed));
5259 }
43e9d192
IB
5260
5261 return true;
5262
5263 case NEG:
4745e701
JG
5264 op0 = XEXP (x, 0);
5265
5266 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5267 {
5268 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5269 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5270 {
5271 /* CSETM. */
5272 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5273 return true;
5274 }
5275
5276 /* Cost this as SUB wzr, X. */
5277 op0 = CONST0_RTX (GET_MODE (x));
5278 op1 = XEXP (x, 0);
5279 goto cost_minus;
5280 }
5281
5282 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5283 {
5284 /* Support (neg(fma...)) as a single instruction only if
5285 sign of zeros is unimportant. This matches the decision
5286 making in aarch64.md. */
5287 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5288 {
5289 /* FNMADD. */
5290 *cost = rtx_cost (op0, NEG, 0, speed);
5291 return true;
5292 }
5293 if (speed)
5294 /* FNEG. */
5295 *cost += extra_cost->fp[mode == DFmode].neg;
5296 return false;
5297 }
5298
5299 return false;
43e9d192 5300
781aeb73
KT
5301 case CLRSB:
5302 case CLZ:
5303 if (speed)
5304 *cost += extra_cost->alu.clz;
5305
5306 return false;
5307
43e9d192
IB
5308 case COMPARE:
5309 op0 = XEXP (x, 0);
5310 op1 = XEXP (x, 1);
5311
5312 if (op1 == const0_rtx
5313 && GET_CODE (op0) == AND)
5314 {
5315 x = op0;
5316 goto cost_logic;
5317 }
5318
a8eecd00
JG
5319 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5320 {
5321 /* TODO: A write to the CC flags possibly costs extra, this
5322 needs encoding in the cost tables. */
5323
5324 /* CC_ZESWPmode supports zero extend for free. */
5325 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5326 op0 = XEXP (op0, 0);
5327
5328 /* ANDS. */
5329 if (GET_CODE (op0) == AND)
5330 {
5331 x = op0;
5332 goto cost_logic;
5333 }
5334
5335 if (GET_CODE (op0) == PLUS)
5336 {
5337 /* ADDS (and CMN alias). */
5338 x = op0;
5339 goto cost_plus;
5340 }
5341
5342 if (GET_CODE (op0) == MINUS)
5343 {
5344 /* SUBS. */
5345 x = op0;
5346 goto cost_minus;
5347 }
5348
5349 if (GET_CODE (op1) == NEG)
5350 {
5351 /* CMN. */
5352 if (speed)
5353 *cost += extra_cost->alu.arith;
5354
5355 *cost += rtx_cost (op0, COMPARE, 0, speed);
5356 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5357 return true;
5358 }
5359
5360 /* CMP.
5361
5362 Compare can freely swap the order of operands, and
5363 canonicalization puts the more complex operation first.
5364 But the integer MINUS logic expects the shift/extend
5365 operation in op1. */
5366 if (! (REG_P (op0)
5367 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5368 {
5369 op0 = XEXP (x, 1);
5370 op1 = XEXP (x, 0);
5371 }
5372 goto cost_minus;
5373 }
5374
5375 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5376 {
5377 /* FCMP. */
5378 if (speed)
5379 *cost += extra_cost->fp[mode == DFmode].compare;
5380
5381 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5382 {
5383 /* FCMP supports constant 0.0 for no extra cost. */
5384 return true;
5385 }
5386 return false;
5387 }
5388
5389 return false;
43e9d192
IB
5390
5391 case MINUS:
4745e701
JG
5392 {
5393 op0 = XEXP (x, 0);
5394 op1 = XEXP (x, 1);
5395
5396cost_minus:
5397 /* Detect valid immediates. */
5398 if ((GET_MODE_CLASS (mode) == MODE_INT
5399 || (GET_MODE_CLASS (mode) == MODE_CC
5400 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5401 && CONST_INT_P (op1)
5402 && aarch64_uimm12_shift (INTVAL (op1)))
5403 {
5404 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5405
4745e701
JG
5406 if (speed)
5407 /* SUB(S) (immediate). */
5408 *cost += extra_cost->alu.arith;
5409 return true;
5410
5411 }
5412
7cc2145f
JG
5413 /* Look for SUB (extended register). */
5414 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5415 {
5416 if (speed)
5417 *cost += extra_cost->alu.arith_shift;
5418
5419 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5420 (enum rtx_code) GET_CODE (op1),
5421 0, speed);
5422 return true;
5423 }
5424
4745e701
JG
5425 rtx new_op1 = aarch64_strip_extend (op1);
5426
5427 /* Cost this as an FMA-alike operation. */
5428 if ((GET_CODE (new_op1) == MULT
5429 || GET_CODE (new_op1) == ASHIFT)
5430 && code != COMPARE)
5431 {
5432 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5433 (enum rtx_code) code,
5434 speed);
43e9d192 5435 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5436 return true;
5437 }
43e9d192 5438
4745e701 5439 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5440
4745e701
JG
5441 if (speed)
5442 {
5443 if (GET_MODE_CLASS (mode) == MODE_INT)
5444 /* SUB(S). */
5445 *cost += extra_cost->alu.arith;
5446 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5447 /* FSUB. */
5448 *cost += extra_cost->fp[mode == DFmode].addsub;
5449 }
5450 return true;
5451 }
43e9d192
IB
5452
5453 case PLUS:
4745e701
JG
5454 {
5455 rtx new_op0;
43e9d192 5456
4745e701
JG
5457 op0 = XEXP (x, 0);
5458 op1 = XEXP (x, 1);
43e9d192 5459
a8eecd00 5460cost_plus:
4745e701
JG
5461 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5462 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5463 {
5464 /* CSINC. */
5465 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5466 *cost += rtx_cost (op1, PLUS, 1, speed);
5467 return true;
5468 }
43e9d192 5469
4745e701
JG
5470 if (GET_MODE_CLASS (mode) == MODE_INT
5471 && CONST_INT_P (op1)
5472 && aarch64_uimm12_shift (INTVAL (op1)))
5473 {
5474 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5475
4745e701
JG
5476 if (speed)
5477 /* ADD (immediate). */
5478 *cost += extra_cost->alu.arith;
5479 return true;
5480 }
5481
7cc2145f
JG
5482 /* Look for ADD (extended register). */
5483 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5484 {
5485 if (speed)
5486 *cost += extra_cost->alu.arith_shift;
5487
5488 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5489 (enum rtx_code) GET_CODE (op0),
5490 0, speed);
5491 return true;
5492 }
5493
4745e701
JG
5494 /* Strip any extend, leave shifts behind as we will
5495 cost them through mult_cost. */
5496 new_op0 = aarch64_strip_extend (op0);
5497
5498 if (GET_CODE (new_op0) == MULT
5499 || GET_CODE (new_op0) == ASHIFT)
5500 {
5501 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5502 speed);
5503 *cost += rtx_cost (op1, PLUS, 1, speed);
5504 return true;
5505 }
5506
5507 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5508 + rtx_cost (op1, PLUS, 1, speed));
5509
5510 if (speed)
5511 {
5512 if (GET_MODE_CLASS (mode) == MODE_INT)
5513 /* ADD. */
5514 *cost += extra_cost->alu.arith;
5515 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5516 /* FADD. */
5517 *cost += extra_cost->fp[mode == DFmode].addsub;
5518 }
5519 return true;
5520 }
43e9d192 5521
18b42b2a
KT
5522 case BSWAP:
5523 *cost = COSTS_N_INSNS (1);
5524
5525 if (speed)
5526 *cost += extra_cost->alu.rev;
5527
5528 return false;
5529
43e9d192 5530 case IOR:
f7d5cf8d
KT
5531 if (aarch_rev16_p (x))
5532 {
5533 *cost = COSTS_N_INSNS (1);
5534
5535 if (speed)
5536 *cost += extra_cost->alu.rev;
5537
5538 return true;
5539 }
5540 /* Fall through. */
43e9d192
IB
5541 case XOR:
5542 case AND:
5543 cost_logic:
5544 op0 = XEXP (x, 0);
5545 op1 = XEXP (x, 1);
5546
268c3b47
JG
5547 if (code == AND
5548 && GET_CODE (op0) == MULT
5549 && CONST_INT_P (XEXP (op0, 1))
5550 && CONST_INT_P (op1)
5551 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5552 INTVAL (op1)) != 0)
5553 {
5554 /* This is a UBFM/SBFM. */
5555 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5556 if (speed)
5557 *cost += extra_cost->alu.bfx;
5558 return true;
5559 }
5560
43e9d192
IB
5561 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5562 {
268c3b47
JG
5563 /* We possibly get the immediate for free, this is not
5564 modelled. */
43e9d192
IB
5565 if (CONST_INT_P (op1)
5566 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5567 {
268c3b47
JG
5568 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5569
5570 if (speed)
5571 *cost += extra_cost->alu.logical;
5572
5573 return true;
43e9d192
IB
5574 }
5575 else
5576 {
268c3b47
JG
5577 rtx new_op0 = op0;
5578
5579 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5580 if (GET_CODE (op0) == NOT)
5581 op0 = XEXP (op0, 0);
268c3b47
JG
5582
5583 new_op0 = aarch64_strip_shift (op0);
5584
5585 /* If we had a shift on op0 then this is a logical-shift-
5586 by-register/immediate operation. Otherwise, this is just
5587 a logical operation. */
5588 if (speed)
5589 {
5590 if (new_op0 != op0)
5591 {
5592 /* Shift by immediate. */
5593 if (CONST_INT_P (XEXP (op0, 1)))
5594 *cost += extra_cost->alu.log_shift;
5595 else
5596 *cost += extra_cost->alu.log_shift_reg;
5597 }
5598 else
5599 *cost += extra_cost->alu.logical;
5600 }
5601
5602 /* In both cases we want to cost both operands. */
5603 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5604 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5605
5606 return true;
43e9d192 5607 }
43e9d192
IB
5608 }
5609 return false;
5610
268c3b47
JG
5611 case NOT:
5612 /* MVN. */
5613 if (speed)
5614 *cost += extra_cost->alu.logical;
5615
5616 /* The logical instruction could have the shifted register form,
5617 but the cost is the same if the shift is processed as a separate
5618 instruction, so we don't bother with it here. */
5619 return false;
5620
43e9d192 5621 case ZERO_EXTEND:
b1685e62
JG
5622
5623 op0 = XEXP (x, 0);
5624 /* If a value is written in SI mode, then zero extended to DI
5625 mode, the operation will in general be free as a write to
5626 a 'w' register implicitly zeroes the upper bits of an 'x'
5627 register. However, if this is
5628
5629 (set (reg) (zero_extend (reg)))
5630
5631 we must cost the explicit register move. */
5632 if (mode == DImode
5633 && GET_MODE (op0) == SImode
5634 && outer == SET)
5635 {
5636 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5637
5638 if (!op_cost && speed)
5639 /* MOV. */
5640 *cost += extra_cost->alu.extend;
5641 else
5642 /* Free, the cost is that of the SI mode operation. */
5643 *cost = op_cost;
5644
5645 return true;
5646 }
5647 else if (MEM_P (XEXP (x, 0)))
43e9d192 5648 {
b1685e62
JG
5649 /* All loads can zero extend to any size for free. */
5650 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5651 return true;
5652 }
b1685e62
JG
5653
5654 /* UXTB/UXTH. */
5655 if (speed)
5656 *cost += extra_cost->alu.extend;
5657
43e9d192
IB
5658 return false;
5659
5660 case SIGN_EXTEND:
b1685e62 5661 if (MEM_P (XEXP (x, 0)))
43e9d192 5662 {
b1685e62
JG
5663 /* LDRSH. */
5664 if (speed)
5665 {
5666 rtx address = XEXP (XEXP (x, 0), 0);
5667 *cost += extra_cost->ldst.load_sign_extend;
5668
5669 *cost +=
5670 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5671 0, speed));
5672 }
43e9d192
IB
5673 return true;
5674 }
b1685e62
JG
5675
5676 if (speed)
5677 *cost += extra_cost->alu.extend;
43e9d192
IB
5678 return false;
5679
ba0cfa17
JG
5680 case ASHIFT:
5681 op0 = XEXP (x, 0);
5682 op1 = XEXP (x, 1);
5683
5684 if (CONST_INT_P (op1))
5685 {
5686 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5687 aliases. */
5688 if (speed)
5689 *cost += extra_cost->alu.shift;
5690
5691 /* We can incorporate zero/sign extend for free. */
5692 if (GET_CODE (op0) == ZERO_EXTEND
5693 || GET_CODE (op0) == SIGN_EXTEND)
5694 op0 = XEXP (op0, 0);
5695
5696 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5697 return true;
5698 }
5699 else
5700 {
5701 /* LSLV. */
5702 if (speed)
5703 *cost += extra_cost->alu.shift_reg;
5704
5705 return false; /* All arguments need to be in registers. */
5706 }
5707
43e9d192 5708 case ROTATE:
43e9d192
IB
5709 case ROTATERT:
5710 case LSHIFTRT:
43e9d192 5711 case ASHIFTRT:
ba0cfa17
JG
5712 op0 = XEXP (x, 0);
5713 op1 = XEXP (x, 1);
43e9d192 5714
ba0cfa17
JG
5715 if (CONST_INT_P (op1))
5716 {
5717 /* ASR (immediate) and friends. */
5718 if (speed)
5719 *cost += extra_cost->alu.shift;
43e9d192 5720
ba0cfa17
JG
5721 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5722 return true;
5723 }
5724 else
5725 {
5726
5727 /* ASR (register) and friends. */
5728 if (speed)
5729 *cost += extra_cost->alu.shift_reg;
5730
5731 return false; /* All arguments need to be in registers. */
5732 }
43e9d192 5733
909734be
JG
5734 case SYMBOL_REF:
5735
5736 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5737 {
5738 /* LDR. */
5739 if (speed)
5740 *cost += extra_cost->ldst.load;
5741 }
5742 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5743 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5744 {
5745 /* ADRP, followed by ADD. */
5746 *cost += COSTS_N_INSNS (1);
5747 if (speed)
5748 *cost += 2 * extra_cost->alu.arith;
5749 }
5750 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5751 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5752 {
5753 /* ADR. */
5754 if (speed)
5755 *cost += extra_cost->alu.arith;
5756 }
5757
5758 if (flag_pic)
5759 {
5760 /* One extra load instruction, after accessing the GOT. */
5761 *cost += COSTS_N_INSNS (1);
5762 if (speed)
5763 *cost += extra_cost->ldst.load;
5764 }
43e9d192
IB
5765 return true;
5766
909734be 5767 case HIGH:
43e9d192 5768 case LO_SUM:
909734be
JG
5769 /* ADRP/ADD (immediate). */
5770 if (speed)
5771 *cost += extra_cost->alu.arith;
43e9d192
IB
5772 return true;
5773
5774 case ZERO_EXTRACT:
5775 case SIGN_EXTRACT:
7cc2145f
JG
5776 /* UBFX/SBFX. */
5777 if (speed)
5778 *cost += extra_cost->alu.bfx;
5779
5780 /* We can trust that the immediates used will be correct (there
5781 are no by-register forms), so we need only cost op0. */
5782 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
5783 return true;
5784
5785 case MULT:
4745e701
JG
5786 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5787 /* aarch64_rtx_mult_cost always handles recursion to its
5788 operands. */
5789 return true;
43e9d192
IB
5790
5791 case MOD:
5792 case UMOD:
43e9d192
IB
5793 if (speed)
5794 {
5795 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
5796 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5797 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 5798 else if (GET_MODE (x) == DFmode)
73250c4c
KT
5799 *cost += (extra_cost->fp[1].mult
5800 + extra_cost->fp[1].div);
43e9d192 5801 else if (GET_MODE (x) == SFmode)
73250c4c
KT
5802 *cost += (extra_cost->fp[0].mult
5803 + extra_cost->fp[0].div);
43e9d192
IB
5804 }
5805 return false; /* All arguments need to be in registers. */
5806
5807 case DIV:
5808 case UDIV:
4105fe38 5809 case SQRT:
43e9d192
IB
5810 if (speed)
5811 {
4105fe38
JG
5812 if (GET_MODE_CLASS (mode) == MODE_INT)
5813 /* There is no integer SQRT, so only DIV and UDIV can get
5814 here. */
5815 *cost += extra_cost->mult[mode == DImode].idiv;
5816 else
5817 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
5818 }
5819 return false; /* All arguments need to be in registers. */
5820
a8eecd00 5821 case IF_THEN_ELSE:
2d5ffe46
AP
5822 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5823 XEXP (x, 2), cost, speed);
a8eecd00
JG
5824
5825 case EQ:
5826 case NE:
5827 case GT:
5828 case GTU:
5829 case LT:
5830 case LTU:
5831 case GE:
5832 case GEU:
5833 case LE:
5834 case LEU:
5835
5836 return false; /* All arguments must be in registers. */
5837
b292109f
JG
5838 case FMA:
5839 op0 = XEXP (x, 0);
5840 op1 = XEXP (x, 1);
5841 op2 = XEXP (x, 2);
5842
5843 if (speed)
5844 *cost += extra_cost->fp[mode == DFmode].fma;
5845
5846 /* FMSUB, FNMADD, and FNMSUB are free. */
5847 if (GET_CODE (op0) == NEG)
5848 op0 = XEXP (op0, 0);
5849
5850 if (GET_CODE (op2) == NEG)
5851 op2 = XEXP (op2, 0);
5852
5853 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5854 and the by-element operand as operand 0. */
5855 if (GET_CODE (op1) == NEG)
5856 op1 = XEXP (op1, 0);
5857
5858 /* Catch vector-by-element operations. The by-element operand can
5859 either be (vec_duplicate (vec_select (x))) or just
5860 (vec_select (x)), depending on whether we are multiplying by
5861 a vector or a scalar.
5862
5863 Canonicalization is not very good in these cases, FMA4 will put the
5864 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5865 if (GET_CODE (op0) == VEC_DUPLICATE)
5866 op0 = XEXP (op0, 0);
5867 else if (GET_CODE (op1) == VEC_DUPLICATE)
5868 op1 = XEXP (op1, 0);
5869
5870 if (GET_CODE (op0) == VEC_SELECT)
5871 op0 = XEXP (op0, 0);
5872 else if (GET_CODE (op1) == VEC_SELECT)
5873 op1 = XEXP (op1, 0);
5874
5875 /* If the remaining parameters are not registers,
5876 get the cost to put them into registers. */
5877 *cost += rtx_cost (op0, FMA, 0, speed);
5878 *cost += rtx_cost (op1, FMA, 1, speed);
5879 *cost += rtx_cost (op2, FMA, 2, speed);
5880 return true;
5881
5882 case FLOAT_EXTEND:
5883 if (speed)
5884 *cost += extra_cost->fp[mode == DFmode].widen;
5885 return false;
5886
5887 case FLOAT_TRUNCATE:
5888 if (speed)
5889 *cost += extra_cost->fp[mode == DFmode].narrow;
5890 return false;
5891
61263118
KT
5892 case FIX:
5893 case UNSIGNED_FIX:
5894 x = XEXP (x, 0);
5895 /* Strip the rounding part. They will all be implemented
5896 by the fcvt* family of instructions anyway. */
5897 if (GET_CODE (x) == UNSPEC)
5898 {
5899 unsigned int uns_code = XINT (x, 1);
5900
5901 if (uns_code == UNSPEC_FRINTA
5902 || uns_code == UNSPEC_FRINTM
5903 || uns_code == UNSPEC_FRINTN
5904 || uns_code == UNSPEC_FRINTP
5905 || uns_code == UNSPEC_FRINTZ)
5906 x = XVECEXP (x, 0, 0);
5907 }
5908
5909 if (speed)
5910 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5911
5912 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5913 return true;
5914
b292109f
JG
5915 case ABS:
5916 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5917 {
5918 /* FABS and FNEG are analogous. */
5919 if (speed)
5920 *cost += extra_cost->fp[mode == DFmode].neg;
5921 }
5922 else
5923 {
5924 /* Integer ABS will either be split to
5925 two arithmetic instructions, or will be an ABS
5926 (scalar), which we don't model. */
5927 *cost = COSTS_N_INSNS (2);
5928 if (speed)
5929 *cost += 2 * extra_cost->alu.arith;
5930 }
5931 return false;
5932
5933 case SMAX:
5934 case SMIN:
5935 if (speed)
5936 {
5937 /* FMAXNM/FMINNM/FMAX/FMIN.
5938 TODO: This may not be accurate for all implementations, but
5939 we do not model this in the cost tables. */
5940 *cost += extra_cost->fp[mode == DFmode].addsub;
5941 }
5942 return false;
5943
61263118
KT
5944 case UNSPEC:
5945 /* The floating point round to integer frint* instructions. */
5946 if (aarch64_frint_unspec_p (XINT (x, 1)))
5947 {
5948 if (speed)
5949 *cost += extra_cost->fp[mode == DFmode].roundint;
5950
5951 return false;
5952 }
781aeb73
KT
5953
5954 if (XINT (x, 1) == UNSPEC_RBIT)
5955 {
5956 if (speed)
5957 *cost += extra_cost->alu.rev;
5958
5959 return false;
5960 }
61263118
KT
5961 break;
5962
fb620c4a
JG
5963 case TRUNCATE:
5964
5965 /* Decompose <su>muldi3_highpart. */
5966 if (/* (truncate:DI */
5967 mode == DImode
5968 /* (lshiftrt:TI */
5969 && GET_MODE (XEXP (x, 0)) == TImode
5970 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5971 /* (mult:TI */
5972 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5973 /* (ANY_EXTEND:TI (reg:DI))
5974 (ANY_EXTEND:TI (reg:DI))) */
5975 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5976 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5977 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5978 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5979 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5980 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5981 /* (const_int 64) */
5982 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5983 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5984 {
5985 /* UMULH/SMULH. */
5986 if (speed)
5987 *cost += extra_cost->mult[mode == DImode].extend;
5988 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5989 MULT, 0, speed);
5990 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5991 MULT, 1, speed);
5992 return true;
5993 }
5994
5995 /* Fall through. */
43e9d192 5996 default:
61263118 5997 break;
43e9d192 5998 }
61263118
KT
5999
6000 if (dump_file && (dump_flags & TDF_DETAILS))
6001 fprintf (dump_file,
6002 "\nFailed to cost RTX. Assuming default cost.\n");
6003
6004 return true;
43e9d192
IB
6005}
6006
0ee859b5
JG
6007/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6008 calculated for X. This cost is stored in *COST. Returns true
6009 if the total cost of X was calculated. */
6010static bool
6011aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6012 int param, int *cost, bool speed)
6013{
6014 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6015
6016 if (dump_file && (dump_flags & TDF_DETAILS))
6017 {
6018 print_rtl_single (dump_file, x);
6019 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6020 speed ? "Hot" : "Cold",
6021 *cost, result ? "final" : "partial");
6022 }
6023
6024 return result;
6025}
6026
43e9d192 6027static int
8a3a7e67
RH
6028aarch64_register_move_cost (enum machine_mode mode,
6029 reg_class_t from_i, reg_class_t to_i)
43e9d192 6030{
8a3a7e67
RH
6031 enum reg_class from = (enum reg_class) from_i;
6032 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
6033 const struct cpu_regmove_cost *regmove_cost
6034 = aarch64_tune_params->regmove_cost;
6035
3be07662
WD
6036 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6037 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6038 to = GENERAL_REGS;
6039
6040 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6041 from = GENERAL_REGS;
6042
6ee70f81
AP
6043 /* Moving between GPR and stack cost is the same as GP2GP. */
6044 if ((from == GENERAL_REGS && to == STACK_REG)
6045 || (to == GENERAL_REGS && from == STACK_REG))
6046 return regmove_cost->GP2GP;
6047
6048 /* To/From the stack register, we move via the gprs. */
6049 if (to == STACK_REG || from == STACK_REG)
6050 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6051 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6052
8919453c
WD
6053 if (GET_MODE_SIZE (mode) == 16)
6054 {
6055 /* 128-bit operations on general registers require 2 instructions. */
6056 if (from == GENERAL_REGS && to == GENERAL_REGS)
6057 return regmove_cost->GP2GP * 2;
6058 else if (from == GENERAL_REGS)
6059 return regmove_cost->GP2FP * 2;
6060 else if (to == GENERAL_REGS)
6061 return regmove_cost->FP2GP * 2;
6062
6063 /* When AdvSIMD instructions are disabled it is not possible to move
6064 a 128-bit value directly between Q registers. This is handled in
6065 secondary reload. A general register is used as a scratch to move
6066 the upper DI value and the lower DI value is moved directly,
6067 hence the cost is the sum of three moves. */
6068 if (! TARGET_SIMD)
6069 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6070
6071 return regmove_cost->FP2FP;
6072 }
6073
43e9d192
IB
6074 if (from == GENERAL_REGS && to == GENERAL_REGS)
6075 return regmove_cost->GP2GP;
6076 else if (from == GENERAL_REGS)
6077 return regmove_cost->GP2FP;
6078 else if (to == GENERAL_REGS)
6079 return regmove_cost->FP2GP;
6080
43e9d192
IB
6081 return regmove_cost->FP2FP;
6082}
6083
6084static int
6085aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
6086 reg_class_t rclass ATTRIBUTE_UNUSED,
6087 bool in ATTRIBUTE_UNUSED)
6088{
6089 return aarch64_tune_params->memmov_cost;
6090}
6091
d126a4ae
AP
6092/* Return the number of instructions that can be issued per cycle. */
6093static int
6094aarch64_sched_issue_rate (void)
6095{
6096 return aarch64_tune_params->issue_rate;
6097}
6098
8990e73a
TB
6099/* Vectorizer cost model target hooks. */
6100
6101/* Implement targetm.vectorize.builtin_vectorization_cost. */
6102static int
6103aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6104 tree vectype,
6105 int misalign ATTRIBUTE_UNUSED)
6106{
6107 unsigned elements;
6108
6109 switch (type_of_cost)
6110 {
6111 case scalar_stmt:
6112 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6113
6114 case scalar_load:
6115 return aarch64_tune_params->vec_costs->scalar_load_cost;
6116
6117 case scalar_store:
6118 return aarch64_tune_params->vec_costs->scalar_store_cost;
6119
6120 case vector_stmt:
6121 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6122
6123 case vector_load:
6124 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6125
6126 case vector_store:
6127 return aarch64_tune_params->vec_costs->vec_store_cost;
6128
6129 case vec_to_scalar:
6130 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6131
6132 case scalar_to_vec:
6133 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6134
6135 case unaligned_load:
6136 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6137
6138 case unaligned_store:
6139 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6140
6141 case cond_branch_taken:
6142 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6143
6144 case cond_branch_not_taken:
6145 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6146
6147 case vec_perm:
6148 case vec_promote_demote:
6149 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6150
6151 case vec_construct:
6152 elements = TYPE_VECTOR_SUBPARTS (vectype);
6153 return elements / 2 + 1;
6154
6155 default:
6156 gcc_unreachable ();
6157 }
6158}
6159
6160/* Implement targetm.vectorize.add_stmt_cost. */
6161static unsigned
6162aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6163 struct _stmt_vec_info *stmt_info, int misalign,
6164 enum vect_cost_model_location where)
6165{
6166 unsigned *cost = (unsigned *) data;
6167 unsigned retval = 0;
6168
6169 if (flag_vect_cost_model)
6170 {
6171 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6172 int stmt_cost =
6173 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6174
6175 /* Statements in an inner loop relative to the loop being
6176 vectorized are weighted more heavily. The value here is
6177 a function (linear for now) of the loop nest level. */
6178 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6179 {
6180 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6181 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6182 unsigned nest_level = loop_depth (loop);
6183
6184 count *= nest_level;
6185 }
6186
6187 retval = (unsigned) (count * stmt_cost);
6188 cost[where] += retval;
6189 }
6190
6191 return retval;
6192}
6193
43e9d192
IB
6194static void initialize_aarch64_code_model (void);
6195
6196/* Parse the architecture extension string. */
6197
6198static void
6199aarch64_parse_extension (char *str)
6200{
6201 /* The extension string is parsed left to right. */
6202 const struct aarch64_option_extension *opt = NULL;
6203
6204 /* Flag to say whether we are adding or removing an extension. */
6205 int adding_ext = -1;
6206
6207 while (str != NULL && *str != 0)
6208 {
6209 char *ext;
6210 size_t len;
6211
6212 str++;
6213 ext = strchr (str, '+');
6214
6215 if (ext != NULL)
6216 len = ext - str;
6217 else
6218 len = strlen (str);
6219
6220 if (len >= 2 && strncmp (str, "no", 2) == 0)
6221 {
6222 adding_ext = 0;
6223 len -= 2;
6224 str += 2;
6225 }
6226 else if (len > 0)
6227 adding_ext = 1;
6228
6229 if (len == 0)
6230 {
6231 error ("missing feature modifier after %qs", "+no");
6232 return;
6233 }
6234
6235 /* Scan over the extensions table trying to find an exact match. */
6236 for (opt = all_extensions; opt->name != NULL; opt++)
6237 {
6238 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6239 {
6240 /* Add or remove the extension. */
6241 if (adding_ext)
6242 aarch64_isa_flags |= opt->flags_on;
6243 else
6244 aarch64_isa_flags &= ~(opt->flags_off);
6245 break;
6246 }
6247 }
6248
6249 if (opt->name == NULL)
6250 {
6251 /* Extension not found in list. */
6252 error ("unknown feature modifier %qs", str);
6253 return;
6254 }
6255
6256 str = ext;
6257 };
6258
6259 return;
6260}
6261
6262/* Parse the ARCH string. */
6263
6264static void
6265aarch64_parse_arch (void)
6266{
6267 char *ext;
6268 const struct processor *arch;
6269 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6270 size_t len;
6271
6272 strcpy (str, aarch64_arch_string);
6273
6274 ext = strchr (str, '+');
6275
6276 if (ext != NULL)
6277 len = ext - str;
6278 else
6279 len = strlen (str);
6280
6281 if (len == 0)
6282 {
6283 error ("missing arch name in -march=%qs", str);
6284 return;
6285 }
6286
6287 /* Loop through the list of supported ARCHs to find a match. */
6288 for (arch = all_architectures; arch->name != NULL; arch++)
6289 {
6290 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6291 {
6292 selected_arch = arch;
6293 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6294
6295 if (!selected_cpu)
6296 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6297
6298 if (ext != NULL)
6299 {
6300 /* ARCH string contains at least one extension. */
6301 aarch64_parse_extension (ext);
6302 }
6303
ffee7aa9
JG
6304 if (strcmp (selected_arch->arch, selected_cpu->arch))
6305 {
6306 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6307 selected_cpu->name, selected_arch->name);
6308 }
6309
43e9d192
IB
6310 return;
6311 }
6312 }
6313
6314 /* ARCH name not found in list. */
6315 error ("unknown value %qs for -march", str);
6316 return;
6317}
6318
6319/* Parse the CPU string. */
6320
6321static void
6322aarch64_parse_cpu (void)
6323{
6324 char *ext;
6325 const struct processor *cpu;
6326 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6327 size_t len;
6328
6329 strcpy (str, aarch64_cpu_string);
6330
6331 ext = strchr (str, '+');
6332
6333 if (ext != NULL)
6334 len = ext - str;
6335 else
6336 len = strlen (str);
6337
6338 if (len == 0)
6339 {
6340 error ("missing cpu name in -mcpu=%qs", str);
6341 return;
6342 }
6343
6344 /* Loop through the list of supported CPUs to find a match. */
6345 for (cpu = all_cores; cpu->name != NULL; cpu++)
6346 {
6347 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6348 {
6349 selected_cpu = cpu;
192ed1dd 6350 selected_tune = cpu;
43e9d192
IB
6351 aarch64_isa_flags = selected_cpu->flags;
6352
6353 if (ext != NULL)
6354 {
6355 /* CPU string contains at least one extension. */
6356 aarch64_parse_extension (ext);
6357 }
6358
6359 return;
6360 }
6361 }
6362
6363 /* CPU name not found in list. */
6364 error ("unknown value %qs for -mcpu", str);
6365 return;
6366}
6367
6368/* Parse the TUNE string. */
6369
6370static void
6371aarch64_parse_tune (void)
6372{
6373 const struct processor *cpu;
6374 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6375 strcpy (str, aarch64_tune_string);
6376
6377 /* Loop through the list of supported CPUs to find a match. */
6378 for (cpu = all_cores; cpu->name != NULL; cpu++)
6379 {
6380 if (strcmp (cpu->name, str) == 0)
6381 {
6382 selected_tune = cpu;
6383 return;
6384 }
6385 }
6386
6387 /* CPU name not found in list. */
6388 error ("unknown value %qs for -mtune", str);
6389 return;
6390}
6391
6392
6393/* Implement TARGET_OPTION_OVERRIDE. */
6394
6395static void
6396aarch64_override_options (void)
6397{
ffee7aa9
JG
6398 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6399 If either of -march or -mtune is given, they override their
6400 respective component of -mcpu.
43e9d192 6401
ffee7aa9
JG
6402 So, first parse AARCH64_CPU_STRING, then the others, be careful
6403 with -march as, if -mcpu is not present on the command line, march
6404 must set a sensible default CPU. */
6405 if (aarch64_cpu_string)
43e9d192 6406 {
ffee7aa9 6407 aarch64_parse_cpu ();
43e9d192
IB
6408 }
6409
ffee7aa9 6410 if (aarch64_arch_string)
43e9d192 6411 {
ffee7aa9 6412 aarch64_parse_arch ();
43e9d192
IB
6413 }
6414
6415 if (aarch64_tune_string)
6416 {
6417 aarch64_parse_tune ();
6418 }
6419
63892fa2
KV
6420#ifndef HAVE_AS_MABI_OPTION
6421 /* The compiler may have been configured with 2.23.* binutils, which does
6422 not have support for ILP32. */
6423 if (TARGET_ILP32)
6424 error ("Assembler does not support -mabi=ilp32");
6425#endif
6426
43e9d192
IB
6427 initialize_aarch64_code_model ();
6428
6429 aarch64_build_bitmask_table ();
6430
6431 /* This target defaults to strict volatile bitfields. */
6432 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6433 flag_strict_volatile_bitfields = 1;
6434
6435 /* If the user did not specify a processor, choose the default
6436 one for them. This will be the CPU set during configuration using
a3cd0246 6437 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6438 if (!selected_cpu)
6439 {
6440 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6441 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6442 }
6443
6444 gcc_assert (selected_cpu);
6445
6446 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6447 if (!selected_tune)
6448 selected_tune = &all_cores[selected_cpu->core];
6449
6450 aarch64_tune_flags = selected_tune->flags;
6451 aarch64_tune = selected_tune->core;
6452 aarch64_tune_params = selected_tune->tune;
6453
5e396da6
KT
6454 if (aarch64_fix_a53_err835769 == 2)
6455 {
6456#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6457 aarch64_fix_a53_err835769 = 1;
6458#else
6459 aarch64_fix_a53_err835769 = 0;
6460#endif
6461 }
6462
43e9d192
IB
6463 aarch64_override_options_after_change ();
6464}
6465
6466/* Implement targetm.override_options_after_change. */
6467
6468static void
6469aarch64_override_options_after_change (void)
6470{
0b7f8166
MS
6471 if (flag_omit_frame_pointer)
6472 flag_omit_leaf_frame_pointer = false;
6473 else if (flag_omit_leaf_frame_pointer)
6474 flag_omit_frame_pointer = true;
43e9d192
IB
6475}
6476
6477static struct machine_function *
6478aarch64_init_machine_status (void)
6479{
6480 struct machine_function *machine;
766090c2 6481 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6482 return machine;
6483}
6484
6485void
6486aarch64_init_expanders (void)
6487{
6488 init_machine_status = aarch64_init_machine_status;
6489}
6490
6491/* A checking mechanism for the implementation of the various code models. */
6492static void
6493initialize_aarch64_code_model (void)
6494{
6495 if (flag_pic)
6496 {
6497 switch (aarch64_cmodel_var)
6498 {
6499 case AARCH64_CMODEL_TINY:
6500 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6501 break;
6502 case AARCH64_CMODEL_SMALL:
6503 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6504 break;
6505 case AARCH64_CMODEL_LARGE:
6506 sorry ("code model %qs with -f%s", "large",
6507 flag_pic > 1 ? "PIC" : "pic");
6508 default:
6509 gcc_unreachable ();
6510 }
6511 }
6512 else
6513 aarch64_cmodel = aarch64_cmodel_var;
6514}
6515
6516/* Return true if SYMBOL_REF X binds locally. */
6517
6518static bool
6519aarch64_symbol_binds_local_p (const_rtx x)
6520{
6521 return (SYMBOL_REF_DECL (x)
6522 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6523 : SYMBOL_REF_LOCAL_P (x));
6524}
6525
6526/* Return true if SYMBOL_REF X is thread local */
6527static bool
6528aarch64_tls_symbol_p (rtx x)
6529{
6530 if (! TARGET_HAVE_TLS)
6531 return false;
6532
6533 if (GET_CODE (x) != SYMBOL_REF)
6534 return false;
6535
6536 return SYMBOL_REF_TLS_MODEL (x) != 0;
6537}
6538
6539/* Classify a TLS symbol into one of the TLS kinds. */
6540enum aarch64_symbol_type
6541aarch64_classify_tls_symbol (rtx x)
6542{
6543 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6544
6545 switch (tls_kind)
6546 {
6547 case TLS_MODEL_GLOBAL_DYNAMIC:
6548 case TLS_MODEL_LOCAL_DYNAMIC:
6549 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6550
6551 case TLS_MODEL_INITIAL_EXEC:
6552 return SYMBOL_SMALL_GOTTPREL;
6553
6554 case TLS_MODEL_LOCAL_EXEC:
6555 return SYMBOL_SMALL_TPREL;
6556
6557 case TLS_MODEL_EMULATED:
6558 case TLS_MODEL_NONE:
6559 return SYMBOL_FORCE_TO_MEM;
6560
6561 default:
6562 gcc_unreachable ();
6563 }
6564}
6565
6566/* Return the method that should be used to access SYMBOL_REF or
6567 LABEL_REF X in context CONTEXT. */
17f4d4bf 6568
43e9d192
IB
6569enum aarch64_symbol_type
6570aarch64_classify_symbol (rtx x,
6571 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6572{
6573 if (GET_CODE (x) == LABEL_REF)
6574 {
6575 switch (aarch64_cmodel)
6576 {
6577 case AARCH64_CMODEL_LARGE:
6578 return SYMBOL_FORCE_TO_MEM;
6579
6580 case AARCH64_CMODEL_TINY_PIC:
6581 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6582 return SYMBOL_TINY_ABSOLUTE;
6583
43e9d192
IB
6584 case AARCH64_CMODEL_SMALL_PIC:
6585 case AARCH64_CMODEL_SMALL:
6586 return SYMBOL_SMALL_ABSOLUTE;
6587
6588 default:
6589 gcc_unreachable ();
6590 }
6591 }
6592
17f4d4bf 6593 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6594 {
4a985a37
MS
6595 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6596 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6597
6598 if (aarch64_tls_symbol_p (x))
6599 return aarch64_classify_tls_symbol (x);
6600
17f4d4bf
CSS
6601 switch (aarch64_cmodel)
6602 {
6603 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6604 if (SYMBOL_REF_WEAK (x))
6605 return SYMBOL_FORCE_TO_MEM;
6606 return SYMBOL_TINY_ABSOLUTE;
6607
17f4d4bf
CSS
6608 case AARCH64_CMODEL_SMALL:
6609 if (SYMBOL_REF_WEAK (x))
6610 return SYMBOL_FORCE_TO_MEM;
6611 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6612
17f4d4bf 6613 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6614 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6615 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6616 return SYMBOL_TINY_ABSOLUTE;
6617
17f4d4bf
CSS
6618 case AARCH64_CMODEL_SMALL_PIC:
6619 if (!aarch64_symbol_binds_local_p (x))
6620 return SYMBOL_SMALL_GOT;
6621 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6622
17f4d4bf
CSS
6623 default:
6624 gcc_unreachable ();
6625 }
43e9d192 6626 }
17f4d4bf 6627
43e9d192
IB
6628 /* By default push everything into the constant pool. */
6629 return SYMBOL_FORCE_TO_MEM;
6630}
6631
43e9d192
IB
6632bool
6633aarch64_constant_address_p (rtx x)
6634{
6635 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6636}
6637
6638bool
6639aarch64_legitimate_pic_operand_p (rtx x)
6640{
6641 if (GET_CODE (x) == SYMBOL_REF
6642 || (GET_CODE (x) == CONST
6643 && GET_CODE (XEXP (x, 0)) == PLUS
6644 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6645 return false;
6646
6647 return true;
6648}
6649
3520f7cc
JG
6650/* Return true if X holds either a quarter-precision or
6651 floating-point +0.0 constant. */
6652static bool
6653aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6654{
6655 if (!CONST_DOUBLE_P (x))
6656 return false;
6657
6658 /* TODO: We could handle moving 0.0 to a TFmode register,
6659 but first we would like to refactor the movtf_aarch64
6660 to be more amicable to split moves properly and
6661 correctly gate on TARGET_SIMD. For now - reject all
6662 constants which are not to SFmode or DFmode registers. */
6663 if (!(mode == SFmode || mode == DFmode))
6664 return false;
6665
6666 if (aarch64_float_const_zero_rtx_p (x))
6667 return true;
6668 return aarch64_float_const_representable_p (x);
6669}
6670
43e9d192
IB
6671static bool
6672aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6673{
6674 /* Do not allow vector struct mode constants. We could support
6675 0 and -1 easily, but they need support in aarch64-simd.md. */
6676 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6677 return false;
6678
6679 /* This could probably go away because
6680 we now decompose CONST_INTs according to expand_mov_immediate. */
6681 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6682 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6683 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6684 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
6685
6686 if (GET_CODE (x) == HIGH
6687 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6688 return true;
6689
6690 return aarch64_constant_address_p (x);
6691}
6692
a5bc806c 6693rtx
43e9d192
IB
6694aarch64_load_tp (rtx target)
6695{
6696 if (!target
6697 || GET_MODE (target) != Pmode
6698 || !register_operand (target, Pmode))
6699 target = gen_reg_rtx (Pmode);
6700
6701 /* Can return in any reg. */
6702 emit_insn (gen_aarch64_load_tp_hard (target));
6703 return target;
6704}
6705
43e9d192
IB
6706/* On AAPCS systems, this is the "struct __va_list". */
6707static GTY(()) tree va_list_type;
6708
6709/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6710 Return the type to use as __builtin_va_list.
6711
6712 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6713
6714 struct __va_list
6715 {
6716 void *__stack;
6717 void *__gr_top;
6718 void *__vr_top;
6719 int __gr_offs;
6720 int __vr_offs;
6721 }; */
6722
6723static tree
6724aarch64_build_builtin_va_list (void)
6725{
6726 tree va_list_name;
6727 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6728
6729 /* Create the type. */
6730 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6731 /* Give it the required name. */
6732 va_list_name = build_decl (BUILTINS_LOCATION,
6733 TYPE_DECL,
6734 get_identifier ("__va_list"),
6735 va_list_type);
6736 DECL_ARTIFICIAL (va_list_name) = 1;
6737 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 6738 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
6739
6740 /* Create the fields. */
6741 f_stack = build_decl (BUILTINS_LOCATION,
6742 FIELD_DECL, get_identifier ("__stack"),
6743 ptr_type_node);
6744 f_grtop = build_decl (BUILTINS_LOCATION,
6745 FIELD_DECL, get_identifier ("__gr_top"),
6746 ptr_type_node);
6747 f_vrtop = build_decl (BUILTINS_LOCATION,
6748 FIELD_DECL, get_identifier ("__vr_top"),
6749 ptr_type_node);
6750 f_groff = build_decl (BUILTINS_LOCATION,
6751 FIELD_DECL, get_identifier ("__gr_offs"),
6752 integer_type_node);
6753 f_vroff = build_decl (BUILTINS_LOCATION,
6754 FIELD_DECL, get_identifier ("__vr_offs"),
6755 integer_type_node);
6756
6757 DECL_ARTIFICIAL (f_stack) = 1;
6758 DECL_ARTIFICIAL (f_grtop) = 1;
6759 DECL_ARTIFICIAL (f_vrtop) = 1;
6760 DECL_ARTIFICIAL (f_groff) = 1;
6761 DECL_ARTIFICIAL (f_vroff) = 1;
6762
6763 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6764 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6765 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6766 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6767 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6768
6769 TYPE_FIELDS (va_list_type) = f_stack;
6770 DECL_CHAIN (f_stack) = f_grtop;
6771 DECL_CHAIN (f_grtop) = f_vrtop;
6772 DECL_CHAIN (f_vrtop) = f_groff;
6773 DECL_CHAIN (f_groff) = f_vroff;
6774
6775 /* Compute its layout. */
6776 layout_type (va_list_type);
6777
6778 return va_list_type;
6779}
6780
6781/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6782static void
6783aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6784{
6785 const CUMULATIVE_ARGS *cum;
6786 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6787 tree stack, grtop, vrtop, groff, vroff;
6788 tree t;
6789 int gr_save_area_size;
6790 int vr_save_area_size;
6791 int vr_offset;
6792
6793 cum = &crtl->args.info;
6794 gr_save_area_size
6795 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6796 vr_save_area_size
6797 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6798
6799 if (TARGET_GENERAL_REGS_ONLY)
6800 {
6801 if (cum->aapcs_nvrn > 0)
6802 sorry ("%qs and floating point or vector arguments",
6803 "-mgeneral-regs-only");
6804 vr_save_area_size = 0;
6805 }
6806
6807 f_stack = TYPE_FIELDS (va_list_type_node);
6808 f_grtop = DECL_CHAIN (f_stack);
6809 f_vrtop = DECL_CHAIN (f_grtop);
6810 f_groff = DECL_CHAIN (f_vrtop);
6811 f_vroff = DECL_CHAIN (f_groff);
6812
6813 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6814 NULL_TREE);
6815 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6816 NULL_TREE);
6817 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6818 NULL_TREE);
6819 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6820 NULL_TREE);
6821 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6822 NULL_TREE);
6823
6824 /* Emit code to initialize STACK, which points to the next varargs stack
6825 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6826 by named arguments. STACK is 8-byte aligned. */
6827 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6828 if (cum->aapcs_stack_size > 0)
6829 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6830 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6831 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6832
6833 /* Emit code to initialize GRTOP, the top of the GR save area.
6834 virtual_incoming_args_rtx should have been 16 byte aligned. */
6835 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6836 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6837 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6838
6839 /* Emit code to initialize VRTOP, the top of the VR save area.
6840 This address is gr_save_area_bytes below GRTOP, rounded
6841 down to the next 16-byte boundary. */
6842 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6843 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6844 STACK_BOUNDARY / BITS_PER_UNIT);
6845
6846 if (vr_offset)
6847 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6848 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6849 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6850
6851 /* Emit code to initialize GROFF, the offset from GRTOP of the
6852 next GPR argument. */
6853 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6854 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6855 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6856
6857 /* Likewise emit code to initialize VROFF, the offset from FTOP
6858 of the next VR argument. */
6859 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6860 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6861 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6862}
6863
6864/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6865
6866static tree
6867aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6868 gimple_seq *post_p ATTRIBUTE_UNUSED)
6869{
6870 tree addr;
6871 bool indirect_p;
6872 bool is_ha; /* is HFA or HVA. */
6873 bool dw_align; /* double-word align. */
6874 enum machine_mode ag_mode = VOIDmode;
6875 int nregs;
6876 enum machine_mode mode;
6877
6878 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6879 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6880 HOST_WIDE_INT size, rsize, adjust, align;
6881 tree t, u, cond1, cond2;
6882
6883 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6884 if (indirect_p)
6885 type = build_pointer_type (type);
6886
6887 mode = TYPE_MODE (type);
6888
6889 f_stack = TYPE_FIELDS (va_list_type_node);
6890 f_grtop = DECL_CHAIN (f_stack);
6891 f_vrtop = DECL_CHAIN (f_grtop);
6892 f_groff = DECL_CHAIN (f_vrtop);
6893 f_vroff = DECL_CHAIN (f_groff);
6894
6895 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6896 f_stack, NULL_TREE);
6897 size = int_size_in_bytes (type);
6898 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6899
6900 dw_align = false;
6901 adjust = 0;
6902 if (aarch64_vfp_is_call_or_return_candidate (mode,
6903 type,
6904 &ag_mode,
6905 &nregs,
6906 &is_ha))
6907 {
6908 /* TYPE passed in fp/simd registers. */
6909 if (TARGET_GENERAL_REGS_ONLY)
6910 sorry ("%qs and floating point or vector arguments",
6911 "-mgeneral-regs-only");
6912
6913 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6914 unshare_expr (valist), f_vrtop, NULL_TREE);
6915 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6916 unshare_expr (valist), f_vroff, NULL_TREE);
6917
6918 rsize = nregs * UNITS_PER_VREG;
6919
6920 if (is_ha)
6921 {
6922 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6923 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6924 }
6925 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6926 && size < UNITS_PER_VREG)
6927 {
6928 adjust = UNITS_PER_VREG - size;
6929 }
6930 }
6931 else
6932 {
6933 /* TYPE passed in general registers. */
6934 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6935 unshare_expr (valist), f_grtop, NULL_TREE);
6936 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6937 unshare_expr (valist), f_groff, NULL_TREE);
6938 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6939 nregs = rsize / UNITS_PER_WORD;
6940
6941 if (align > 8)
6942 dw_align = true;
6943
6944 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6945 && size < UNITS_PER_WORD)
6946 {
6947 adjust = UNITS_PER_WORD - size;
6948 }
6949 }
6950
6951 /* Get a local temporary for the field value. */
6952 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6953
6954 /* Emit code to branch if off >= 0. */
6955 t = build2 (GE_EXPR, boolean_type_node, off,
6956 build_int_cst (TREE_TYPE (off), 0));
6957 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6958
6959 if (dw_align)
6960 {
6961 /* Emit: offs = (offs + 15) & -16. */
6962 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6963 build_int_cst (TREE_TYPE (off), 15));
6964 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6965 build_int_cst (TREE_TYPE (off), -16));
6966 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6967 }
6968 else
6969 roundup = NULL;
6970
6971 /* Update ap.__[g|v]r_offs */
6972 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6973 build_int_cst (TREE_TYPE (off), rsize));
6974 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6975
6976 /* String up. */
6977 if (roundup)
6978 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6979
6980 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6981 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6982 build_int_cst (TREE_TYPE (f_off), 0));
6983 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6984
6985 /* String up: make sure the assignment happens before the use. */
6986 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6987 COND_EXPR_ELSE (cond1) = t;
6988
6989 /* Prepare the trees handling the argument that is passed on the stack;
6990 the top level node will store in ON_STACK. */
6991 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6992 if (align > 8)
6993 {
6994 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6995 t = fold_convert (intDI_type_node, arg);
6996 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6997 build_int_cst (TREE_TYPE (t), 15));
6998 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6999 build_int_cst (TREE_TYPE (t), -16));
7000 t = fold_convert (TREE_TYPE (arg), t);
7001 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7002 }
7003 else
7004 roundup = NULL;
7005 /* Advance ap.__stack */
7006 t = fold_convert (intDI_type_node, arg);
7007 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7008 build_int_cst (TREE_TYPE (t), size + 7));
7009 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7010 build_int_cst (TREE_TYPE (t), -8));
7011 t = fold_convert (TREE_TYPE (arg), t);
7012 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7013 /* String up roundup and advance. */
7014 if (roundup)
7015 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7016 /* String up with arg */
7017 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7018 /* Big-endianness related address adjustment. */
7019 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7020 && size < UNITS_PER_WORD)
7021 {
7022 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7023 size_int (UNITS_PER_WORD - size));
7024 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7025 }
7026
7027 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7028 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7029
7030 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7031 t = off;
7032 if (adjust)
7033 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7034 build_int_cst (TREE_TYPE (off), adjust));
7035
7036 t = fold_convert (sizetype, t);
7037 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7038
7039 if (is_ha)
7040 {
7041 /* type ha; // treat as "struct {ftype field[n];}"
7042 ... [computing offs]
7043 for (i = 0; i <nregs; ++i, offs += 16)
7044 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7045 return ha; */
7046 int i;
7047 tree tmp_ha, field_t, field_ptr_t;
7048
7049 /* Declare a local variable. */
7050 tmp_ha = create_tmp_var_raw (type, "ha");
7051 gimple_add_tmp_var (tmp_ha);
7052
7053 /* Establish the base type. */
7054 switch (ag_mode)
7055 {
7056 case SFmode:
7057 field_t = float_type_node;
7058 field_ptr_t = float_ptr_type_node;
7059 break;
7060 case DFmode:
7061 field_t = double_type_node;
7062 field_ptr_t = double_ptr_type_node;
7063 break;
7064 case TFmode:
7065 field_t = long_double_type_node;
7066 field_ptr_t = long_double_ptr_type_node;
7067 break;
7068/* The half precision and quad precision are not fully supported yet. Enable
7069 the following code after the support is complete. Need to find the correct
7070 type node for __fp16 *. */
7071#if 0
7072 case HFmode:
7073 field_t = float_type_node;
7074 field_ptr_t = float_ptr_type_node;
7075 break;
7076#endif
7077 case V2SImode:
7078 case V4SImode:
7079 {
7080 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7081 field_t = build_vector_type_for_mode (innertype, ag_mode);
7082 field_ptr_t = build_pointer_type (field_t);
7083 }
7084 break;
7085 default:
7086 gcc_assert (0);
7087 }
7088
7089 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7090 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7091 addr = t;
7092 t = fold_convert (field_ptr_t, addr);
7093 t = build2 (MODIFY_EXPR, field_t,
7094 build1 (INDIRECT_REF, field_t, tmp_ha),
7095 build1 (INDIRECT_REF, field_t, t));
7096
7097 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7098 for (i = 1; i < nregs; ++i)
7099 {
7100 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7101 u = fold_convert (field_ptr_t, addr);
7102 u = build2 (MODIFY_EXPR, field_t,
7103 build2 (MEM_REF, field_t, tmp_ha,
7104 build_int_cst (field_ptr_t,
7105 (i *
7106 int_size_in_bytes (field_t)))),
7107 build1 (INDIRECT_REF, field_t, u));
7108 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7109 }
7110
7111 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7112 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7113 }
7114
7115 COND_EXPR_ELSE (cond2) = t;
7116 addr = fold_convert (build_pointer_type (type), cond1);
7117 addr = build_va_arg_indirect_ref (addr);
7118
7119 if (indirect_p)
7120 addr = build_va_arg_indirect_ref (addr);
7121
7122 return addr;
7123}
7124
7125/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7126
7127static void
7128aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7129 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7130 int no_rtl)
7131{
7132 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7133 CUMULATIVE_ARGS local_cum;
7134 int gr_saved, vr_saved;
7135
7136 /* The caller has advanced CUM up to, but not beyond, the last named
7137 argument. Advance a local copy of CUM past the last "real" named
7138 argument, to find out how many registers are left over. */
7139 local_cum = *cum;
7140 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7141
7142 /* Found out how many registers we need to save. */
7143 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7144 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7145
7146 if (TARGET_GENERAL_REGS_ONLY)
7147 {
7148 if (local_cum.aapcs_nvrn > 0)
7149 sorry ("%qs and floating point or vector arguments",
7150 "-mgeneral-regs-only");
7151 vr_saved = 0;
7152 }
7153
7154 if (!no_rtl)
7155 {
7156 if (gr_saved > 0)
7157 {
7158 rtx ptr, mem;
7159
7160 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7161 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7162 - gr_saved * UNITS_PER_WORD);
7163 mem = gen_frame_mem (BLKmode, ptr);
7164 set_mem_alias_set (mem, get_varargs_alias_set ());
7165
7166 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7167 mem, gr_saved);
7168 }
7169 if (vr_saved > 0)
7170 {
7171 /* We can't use move_block_from_reg, because it will use
7172 the wrong mode, storing D regs only. */
7173 enum machine_mode mode = TImode;
7174 int off, i;
7175
7176 /* Set OFF to the offset from virtual_incoming_args_rtx of
7177 the first vector register. The VR save area lies below
7178 the GR one, and is aligned to 16 bytes. */
7179 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7180 STACK_BOUNDARY / BITS_PER_UNIT);
7181 off -= vr_saved * UNITS_PER_VREG;
7182
7183 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7184 {
7185 rtx ptr, mem;
7186
7187 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7188 mem = gen_frame_mem (mode, ptr);
7189 set_mem_alias_set (mem, get_varargs_alias_set ());
7190 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7191 off += UNITS_PER_VREG;
7192 }
7193 }
7194 }
7195
7196 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7197 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7198 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7199 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7200 STACK_BOUNDARY / BITS_PER_UNIT)
7201 + vr_saved * UNITS_PER_VREG);
7202}
7203
7204static void
7205aarch64_conditional_register_usage (void)
7206{
7207 int i;
7208 if (!TARGET_FLOAT)
7209 {
7210 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7211 {
7212 fixed_regs[i] = 1;
7213 call_used_regs[i] = 1;
7214 }
7215 }
7216}
7217
7218/* Walk down the type tree of TYPE counting consecutive base elements.
7219 If *MODEP is VOIDmode, then set it to the first valid floating point
7220 type. If a non-floating point type is found, or if a floating point
7221 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7222 otherwise return the count in the sub-tree. */
7223static int
7224aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7225{
7226 enum machine_mode mode;
7227 HOST_WIDE_INT size;
7228
7229 switch (TREE_CODE (type))
7230 {
7231 case REAL_TYPE:
7232 mode = TYPE_MODE (type);
7233 if (mode != DFmode && mode != SFmode && mode != TFmode)
7234 return -1;
7235
7236 if (*modep == VOIDmode)
7237 *modep = mode;
7238
7239 if (*modep == mode)
7240 return 1;
7241
7242 break;
7243
7244 case COMPLEX_TYPE:
7245 mode = TYPE_MODE (TREE_TYPE (type));
7246 if (mode != DFmode && mode != SFmode && mode != TFmode)
7247 return -1;
7248
7249 if (*modep == VOIDmode)
7250 *modep = mode;
7251
7252 if (*modep == mode)
7253 return 2;
7254
7255 break;
7256
7257 case VECTOR_TYPE:
7258 /* Use V2SImode and V4SImode as representatives of all 64-bit
7259 and 128-bit vector types. */
7260 size = int_size_in_bytes (type);
7261 switch (size)
7262 {
7263 case 8:
7264 mode = V2SImode;
7265 break;
7266 case 16:
7267 mode = V4SImode;
7268 break;
7269 default:
7270 return -1;
7271 }
7272
7273 if (*modep == VOIDmode)
7274 *modep = mode;
7275
7276 /* Vector modes are considered to be opaque: two vectors are
7277 equivalent for the purposes of being homogeneous aggregates
7278 if they are the same size. */
7279 if (*modep == mode)
7280 return 1;
7281
7282 break;
7283
7284 case ARRAY_TYPE:
7285 {
7286 int count;
7287 tree index = TYPE_DOMAIN (type);
7288
807e902e
KZ
7289 /* Can't handle incomplete types nor sizes that are not
7290 fixed. */
7291 if (!COMPLETE_TYPE_P (type)
7292 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7293 return -1;
7294
7295 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7296 if (count == -1
7297 || !index
7298 || !TYPE_MAX_VALUE (index)
cc269bb6 7299 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7300 || !TYPE_MIN_VALUE (index)
cc269bb6 7301 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7302 || count < 0)
7303 return -1;
7304
ae7e9ddd
RS
7305 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7306 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7307
7308 /* There must be no padding. */
807e902e 7309 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7310 return -1;
7311
7312 return count;
7313 }
7314
7315 case RECORD_TYPE:
7316 {
7317 int count = 0;
7318 int sub_count;
7319 tree field;
7320
807e902e
KZ
7321 /* Can't handle incomplete types nor sizes that are not
7322 fixed. */
7323 if (!COMPLETE_TYPE_P (type)
7324 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7325 return -1;
7326
7327 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7328 {
7329 if (TREE_CODE (field) != FIELD_DECL)
7330 continue;
7331
7332 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7333 if (sub_count < 0)
7334 return -1;
7335 count += sub_count;
7336 }
7337
7338 /* There must be no padding. */
807e902e 7339 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7340 return -1;
7341
7342 return count;
7343 }
7344
7345 case UNION_TYPE:
7346 case QUAL_UNION_TYPE:
7347 {
7348 /* These aren't very interesting except in a degenerate case. */
7349 int count = 0;
7350 int sub_count;
7351 tree field;
7352
807e902e
KZ
7353 /* Can't handle incomplete types nor sizes that are not
7354 fixed. */
7355 if (!COMPLETE_TYPE_P (type)
7356 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7357 return -1;
7358
7359 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7360 {
7361 if (TREE_CODE (field) != FIELD_DECL)
7362 continue;
7363
7364 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7365 if (sub_count < 0)
7366 return -1;
7367 count = count > sub_count ? count : sub_count;
7368 }
7369
7370 /* There must be no padding. */
807e902e 7371 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7372 return -1;
7373
7374 return count;
7375 }
7376
7377 default:
7378 break;
7379 }
7380
7381 return -1;
7382}
7383
38e8f663
YR
7384/* Return true if we use LRA instead of reload pass. */
7385static bool
7386aarch64_lra_p (void)
7387{
7388 return aarch64_lra_flag;
7389}
7390
43e9d192
IB
7391/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7392 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7393 array types. The C99 floating-point complex types are also considered
7394 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7395 types, which are GCC extensions and out of the scope of AAPCS64, are
7396 treated as composite types here as well.
7397
7398 Note that MODE itself is not sufficient in determining whether a type
7399 is such a composite type or not. This is because
7400 stor-layout.c:compute_record_mode may have already changed the MODE
7401 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7402 structure with only one field may have its MODE set to the mode of the
7403 field. Also an integer mode whose size matches the size of the
7404 RECORD_TYPE type may be used to substitute the original mode
7405 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7406 solely relied on. */
7407
7408static bool
7409aarch64_composite_type_p (const_tree type,
7410 enum machine_mode mode)
7411{
7412 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7413 return true;
7414
7415 if (mode == BLKmode
7416 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7417 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7418 return true;
7419
7420 return false;
7421}
7422
7423/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7424 type as described in AAPCS64 \S 4.1.2.
7425
7426 See the comment above aarch64_composite_type_p for the notes on MODE. */
7427
7428static bool
7429aarch64_short_vector_p (const_tree type,
7430 enum machine_mode mode)
7431{
7432 HOST_WIDE_INT size = -1;
7433
7434 if (type && TREE_CODE (type) == VECTOR_TYPE)
7435 size = int_size_in_bytes (type);
7436 else if (!aarch64_composite_type_p (type, mode)
7437 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7438 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7439 size = GET_MODE_SIZE (mode);
7440
7441 return (size == 8 || size == 16) ? true : false;
7442}
7443
7444/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7445 shall be passed or returned in simd/fp register(s) (providing these
7446 parameter passing registers are available).
7447
7448 Upon successful return, *COUNT returns the number of needed registers,
7449 *BASE_MODE returns the mode of the individual register and when IS_HAF
7450 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7451 floating-point aggregate or a homogeneous short-vector aggregate. */
7452
7453static bool
7454aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7455 const_tree type,
7456 enum machine_mode *base_mode,
7457 int *count,
7458 bool *is_ha)
7459{
7460 enum machine_mode new_mode = VOIDmode;
7461 bool composite_p = aarch64_composite_type_p (type, mode);
7462
7463 if (is_ha != NULL) *is_ha = false;
7464
7465 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7466 || aarch64_short_vector_p (type, mode))
7467 {
7468 *count = 1;
7469 new_mode = mode;
7470 }
7471 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7472 {
7473 if (is_ha != NULL) *is_ha = true;
7474 *count = 2;
7475 new_mode = GET_MODE_INNER (mode);
7476 }
7477 else if (type && composite_p)
7478 {
7479 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7480
7481 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7482 {
7483 if (is_ha != NULL) *is_ha = true;
7484 *count = ag_count;
7485 }
7486 else
7487 return false;
7488 }
7489 else
7490 return false;
7491
7492 *base_mode = new_mode;
7493 return true;
7494}
7495
7496/* Implement TARGET_STRUCT_VALUE_RTX. */
7497
7498static rtx
7499aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7500 int incoming ATTRIBUTE_UNUSED)
7501{
7502 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7503}
7504
7505/* Implements target hook vector_mode_supported_p. */
7506static bool
7507aarch64_vector_mode_supported_p (enum machine_mode mode)
7508{
7509 if (TARGET_SIMD
7510 && (mode == V4SImode || mode == V8HImode
7511 || mode == V16QImode || mode == V2DImode
7512 || mode == V2SImode || mode == V4HImode
7513 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
7514 || mode == V4SFmode || mode == V2DFmode
7515 || mode == V1DFmode))
43e9d192
IB
7516 return true;
7517
7518 return false;
7519}
7520
b7342d25
IB
7521/* Return appropriate SIMD container
7522 for MODE within a vector of WIDTH bits. */
43e9d192 7523static enum machine_mode
b7342d25 7524aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 7525{
b7342d25 7526 gcc_assert (width == 64 || width == 128);
43e9d192 7527 if (TARGET_SIMD)
b7342d25
IB
7528 {
7529 if (width == 128)
7530 switch (mode)
7531 {
7532 case DFmode:
7533 return V2DFmode;
7534 case SFmode:
7535 return V4SFmode;
7536 case SImode:
7537 return V4SImode;
7538 case HImode:
7539 return V8HImode;
7540 case QImode:
7541 return V16QImode;
7542 case DImode:
7543 return V2DImode;
7544 default:
7545 break;
7546 }
7547 else
7548 switch (mode)
7549 {
7550 case SFmode:
7551 return V2SFmode;
7552 case SImode:
7553 return V2SImode;
7554 case HImode:
7555 return V4HImode;
7556 case QImode:
7557 return V8QImode;
7558 default:
7559 break;
7560 }
7561 }
43e9d192
IB
7562 return word_mode;
7563}
7564
b7342d25
IB
7565/* Return 128-bit container as the preferred SIMD mode for MODE. */
7566static enum machine_mode
7567aarch64_preferred_simd_mode (enum machine_mode mode)
7568{
7569 return aarch64_simd_container_mode (mode, 128);
7570}
7571
3b357264
JG
7572/* Return the bitmask of possible vector sizes for the vectorizer
7573 to iterate over. */
7574static unsigned int
7575aarch64_autovectorize_vector_sizes (void)
7576{
7577 return (16 | 8);
7578}
7579
c6fc9e43
YZ
7580/* A table to help perform AArch64-specific name mangling for AdvSIMD
7581 vector types in order to conform to the AAPCS64 (see "Procedure
7582 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7583 qualify for emission with the mangled names defined in that document,
7584 a vector type must not only be of the correct mode but also be
7585 composed of AdvSIMD vector element types (e.g.
7586 _builtin_aarch64_simd_qi); these types are registered by
7587 aarch64_init_simd_builtins (). In other words, vector types defined
7588 in other ways e.g. via vector_size attribute will get default
7589 mangled names. */
7590typedef struct
7591{
7592 enum machine_mode mode;
7593 const char *element_type_name;
7594 const char *mangled_name;
7595} aarch64_simd_mangle_map_entry;
7596
7597static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7598 /* 64-bit containerized types. */
7599 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7600 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7601 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7602 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7603 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7604 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7605 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
096c59be
AL
7606 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7607 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
c6a29a09 7608 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
c6fc9e43
YZ
7609 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7610 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7611 /* 128-bit containerized types. */
7612 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7613 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7614 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7615 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7616 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7617 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7618 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7619 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7620 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7621 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7622 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7623 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 7624 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
7625 { VOIDmode, NULL, NULL }
7626};
7627
ac2b960f
YZ
7628/* Implement TARGET_MANGLE_TYPE. */
7629
6f549691 7630static const char *
ac2b960f
YZ
7631aarch64_mangle_type (const_tree type)
7632{
7633 /* The AArch64 ABI documents say that "__va_list" has to be
7634 managled as if it is in the "std" namespace. */
7635 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7636 return "St9__va_list";
7637
c6fc9e43
YZ
7638 /* Check the mode of the vector type, and the name of the vector
7639 element type, against the table. */
7640 if (TREE_CODE (type) == VECTOR_TYPE)
7641 {
7642 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7643
7644 while (pos->mode != VOIDmode)
7645 {
7646 tree elt_type = TREE_TYPE (type);
7647
7648 if (pos->mode == TYPE_MODE (type)
7649 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7650 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7651 pos->element_type_name))
7652 return pos->mangled_name;
7653
7654 pos++;
7655 }
7656 }
7657
ac2b960f
YZ
7658 /* Use the default mangling. */
7659 return NULL;
7660}
7661
8baff86e
KT
7662
7663/* Return true if the rtx_insn contains a MEM RTX somewhere
7664 in it. */
75cf1494
KT
7665
7666static bool
8baff86e 7667has_memory_op (rtx_insn *mem_insn)
75cf1494 7668{
8baff86e
KT
7669 subrtx_iterator::array_type array;
7670 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
7671 if (MEM_P (*iter))
7672 return true;
7673
7674 return false;
75cf1494
KT
7675}
7676
7677/* Find the first rtx_insn before insn that will generate an assembly
7678 instruction. */
7679
7680static rtx_insn *
7681aarch64_prev_real_insn (rtx_insn *insn)
7682{
7683 if (!insn)
7684 return NULL;
7685
7686 do
7687 {
7688 insn = prev_real_insn (insn);
7689 }
7690 while (insn && recog_memoized (insn) < 0);
7691
7692 return insn;
7693}
7694
7695static bool
7696is_madd_op (enum attr_type t1)
7697{
7698 unsigned int i;
7699 /* A number of these may be AArch32 only. */
7700 enum attr_type mlatypes[] = {
7701 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7702 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7703 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7704 };
7705
7706 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7707 {
7708 if (t1 == mlatypes[i])
7709 return true;
7710 }
7711
7712 return false;
7713}
7714
7715/* Check if there is a register dependency between a load and the insn
7716 for which we hold recog_data. */
7717
7718static bool
7719dep_between_memop_and_curr (rtx memop)
7720{
7721 rtx load_reg;
7722 int opno;
7723
8baff86e 7724 gcc_assert (GET_CODE (memop) == SET);
75cf1494
KT
7725
7726 if (!REG_P (SET_DEST (memop)))
7727 return false;
7728
7729 load_reg = SET_DEST (memop);
8baff86e 7730 for (opno = 1; opno < recog_data.n_operands; opno++)
75cf1494
KT
7731 {
7732 rtx operand = recog_data.operand[opno];
7733 if (REG_P (operand)
7734 && reg_overlap_mentioned_p (load_reg, operand))
7735 return true;
7736
7737 }
7738 return false;
7739}
7740
8baff86e
KT
7741
7742/* When working around the Cortex-A53 erratum 835769,
7743 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
7744 instruction and has a preceding memory instruction such that a NOP
7745 should be inserted between them. */
7746
75cf1494
KT
7747bool
7748aarch64_madd_needs_nop (rtx_insn* insn)
7749{
7750 enum attr_type attr_type;
7751 rtx_insn *prev;
7752 rtx body;
7753
7754 if (!aarch64_fix_a53_err835769)
7755 return false;
7756
7757 if (recog_memoized (insn) < 0)
7758 return false;
7759
7760 attr_type = get_attr_type (insn);
7761 if (!is_madd_op (attr_type))
7762 return false;
7763
7764 prev = aarch64_prev_real_insn (insn);
8baff86e 7765 if (!prev || !has_memory_op (prev))
75cf1494
KT
7766 return false;
7767
7768 body = single_set (prev);
7769
7770 /* If the previous insn is a memory op and there is no dependency between
8baff86e
KT
7771 it and the DImode madd, emit a NOP between them. If body is NULL then we
7772 have a complex memory operation, probably a load/store pair.
7773 Be conservative for now and emit a NOP. */
7774 if (GET_MODE (recog_data.operand[0]) == DImode
7775 && (!body || !dep_between_memop_and_curr (body)))
75cf1494
KT
7776 return true;
7777
7778 return false;
7779
7780}
7781
8baff86e
KT
7782
7783/* Implement FINAL_PRESCAN_INSN. */
7784
75cf1494
KT
7785void
7786aarch64_final_prescan_insn (rtx_insn *insn)
7787{
7788 if (aarch64_madd_needs_nop (insn))
7789 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7790}
7791
7792
43e9d192 7793/* Return the equivalent letter for size. */
81c2dfb9 7794static char
43e9d192
IB
7795sizetochar (int size)
7796{
7797 switch (size)
7798 {
7799 case 64: return 'd';
7800 case 32: return 's';
7801 case 16: return 'h';
7802 case 8 : return 'b';
7803 default: gcc_unreachable ();
7804 }
7805}
7806
3520f7cc
JG
7807/* Return true iff x is a uniform vector of floating-point
7808 constants, and the constant can be represented in
7809 quarter-precision form. Note, as aarch64_float_const_representable
7810 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7811static bool
7812aarch64_vect_float_const_representable_p (rtx x)
7813{
7814 int i = 0;
7815 REAL_VALUE_TYPE r0, ri;
7816 rtx x0, xi;
7817
7818 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7819 return false;
7820
7821 x0 = CONST_VECTOR_ELT (x, 0);
7822 if (!CONST_DOUBLE_P (x0))
7823 return false;
7824
7825 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7826
7827 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7828 {
7829 xi = CONST_VECTOR_ELT (x, i);
7830 if (!CONST_DOUBLE_P (xi))
7831 return false;
7832
7833 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7834 if (!REAL_VALUES_EQUAL (r0, ri))
7835 return false;
7836 }
7837
7838 return aarch64_float_const_representable_p (x0);
7839}
7840
d8edd899 7841/* Return true for valid and false for invalid. */
3ea63f60 7842bool
48063b9d
IB
7843aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7844 struct simd_immediate_info *info)
43e9d192
IB
7845{
7846#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7847 matches = 1; \
7848 for (i = 0; i < idx; i += (STRIDE)) \
7849 if (!(TEST)) \
7850 matches = 0; \
7851 if (matches) \
7852 { \
7853 immtype = (CLASS); \
7854 elsize = (ELSIZE); \
43e9d192
IB
7855 eshift = (SHIFT); \
7856 emvn = (NEG); \
7857 break; \
7858 }
7859
7860 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7861 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7862 unsigned char bytes[16];
43e9d192
IB
7863 int immtype = -1, matches;
7864 unsigned int invmask = inverse ? 0xff : 0;
7865 int eshift, emvn;
7866
43e9d192 7867 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 7868 {
81c2dfb9
IB
7869 if (! (aarch64_simd_imm_zero_p (op, mode)
7870 || aarch64_vect_float_const_representable_p (op)))
d8edd899 7871 return false;
3520f7cc 7872
48063b9d
IB
7873 if (info)
7874 {
7875 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 7876 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
7877 info->mvn = false;
7878 info->shift = 0;
7879 }
3520f7cc 7880
d8edd899 7881 return true;
3520f7cc 7882 }
43e9d192
IB
7883
7884 /* Splat vector constant out into a byte vector. */
7885 for (i = 0; i < n_elts; i++)
7886 {
4b1e108c
AL
7887 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7888 it must be laid out in the vector register in reverse order. */
7889 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
7890 unsigned HOST_WIDE_INT elpart;
7891 unsigned int part, parts;
7892
4aa81c2e 7893 if (CONST_INT_P (el))
43e9d192
IB
7894 {
7895 elpart = INTVAL (el);
7896 parts = 1;
7897 }
7898 else if (GET_CODE (el) == CONST_DOUBLE)
7899 {
7900 elpart = CONST_DOUBLE_LOW (el);
7901 parts = 2;
7902 }
7903 else
7904 gcc_unreachable ();
7905
7906 for (part = 0; part < parts; part++)
7907 {
7908 unsigned int byte;
7909 for (byte = 0; byte < innersize; byte++)
7910 {
7911 bytes[idx++] = (elpart & 0xff) ^ invmask;
7912 elpart >>= BITS_PER_UNIT;
7913 }
7914 if (GET_CODE (el) == CONST_DOUBLE)
7915 elpart = CONST_DOUBLE_HIGH (el);
7916 }
7917 }
7918
7919 /* Sanity check. */
7920 gcc_assert (idx == GET_MODE_SIZE (mode));
7921
7922 do
7923 {
7924 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7925 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7926
7927 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7928 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7929
7930 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7931 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7932
7933 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7934 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7935
7936 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7937
7938 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7939
7940 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7941 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7942
7943 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7944 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7945
7946 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7947 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7948
7949 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7950 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7951
7952 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7953
7954 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7955
7956 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 7957 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
7958
7959 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 7960 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
7961
7962 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 7963 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
7964
7965 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 7966 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
7967
7968 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7969
7970 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7971 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7972 }
7973 while (0);
7974
e4f0f84d 7975 if (immtype == -1)
d8edd899 7976 return false;
43e9d192 7977
48063b9d 7978 if (info)
43e9d192 7979 {
48063b9d 7980 info->element_width = elsize;
48063b9d
IB
7981 info->mvn = emvn != 0;
7982 info->shift = eshift;
7983
43e9d192
IB
7984 unsigned HOST_WIDE_INT imm = 0;
7985
e4f0f84d
TB
7986 if (immtype >= 12 && immtype <= 15)
7987 info->msl = true;
7988
43e9d192
IB
7989 /* Un-invert bytes of recognized vector, if necessary. */
7990 if (invmask != 0)
7991 for (i = 0; i < idx; i++)
7992 bytes[i] ^= invmask;
7993
7994 if (immtype == 17)
7995 {
7996 /* FIXME: Broken on 32-bit H_W_I hosts. */
7997 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7998
7999 for (i = 0; i < 8; i++)
8000 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8001 << (i * BITS_PER_UNIT);
8002
43e9d192 8003
48063b9d
IB
8004 info->value = GEN_INT (imm);
8005 }
8006 else
8007 {
8008 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8009 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
8010
8011 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
8012 generic constants. */
8013 if (info->mvn)
43e9d192 8014 imm = ~imm;
48063b9d
IB
8015 imm = (imm >> info->shift) & 0xff;
8016 info->value = GEN_INT (imm);
8017 }
43e9d192
IB
8018 }
8019
48063b9d 8020 return true;
43e9d192
IB
8021#undef CHECK
8022}
8023
43e9d192
IB
8024/* Check of immediate shift constants are within range. */
8025bool
8026aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
8027{
8028 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8029 if (left)
ddeabd3e 8030 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 8031 else
ddeabd3e 8032 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
8033}
8034
3520f7cc
JG
8035/* Return true if X is a uniform vector where all elements
8036 are either the floating-point constant 0.0 or the
8037 integer constant 0. */
43e9d192
IB
8038bool
8039aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
8040{
3520f7cc 8041 return x == CONST0_RTX (mode);
43e9d192
IB
8042}
8043
8044bool
8045aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
8046{
8047 HOST_WIDE_INT imm = INTVAL (x);
8048 int i;
8049
8050 for (i = 0; i < 8; i++)
8051 {
8052 unsigned int byte = imm & 0xff;
8053 if (byte != 0xff && byte != 0)
8054 return false;
8055 imm >>= 8;
8056 }
8057
8058 return true;
8059}
8060
83f8c414
CSS
8061bool
8062aarch64_mov_operand_p (rtx x,
a5350ddc 8063 enum aarch64_symbol_context context,
83f8c414
CSS
8064 enum machine_mode mode)
8065{
83f8c414
CSS
8066 if (GET_CODE (x) == HIGH
8067 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8068 return true;
8069
8070 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
8071 return true;
8072
8073 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8074 return true;
8075
a5350ddc
CSS
8076 return aarch64_classify_symbolic_expression (x, context)
8077 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
8078}
8079
43e9d192
IB
8080/* Return a const_int vector of VAL. */
8081rtx
8082aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
8083{
8084 int nunits = GET_MODE_NUNITS (mode);
8085 rtvec v = rtvec_alloc (nunits);
8086 int i;
8087
8088 for (i=0; i < nunits; i++)
8089 RTVEC_ELT (v, i) = GEN_INT (val);
8090
8091 return gen_rtx_CONST_VECTOR (mode, v);
8092}
8093
051d0e2f
SN
8094/* Check OP is a legal scalar immediate for the MOVI instruction. */
8095
8096bool
8097aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
8098{
8099 enum machine_mode vmode;
8100
8101 gcc_assert (!VECTOR_MODE_P (mode));
8102 vmode = aarch64_preferred_simd_mode (mode);
8103 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8104 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8105}
8106
988fa693
JG
8107/* Construct and return a PARALLEL RTX vector with elements numbering the
8108 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8109 the vector - from the perspective of the architecture. This does not
8110 line up with GCC's perspective on lane numbers, so we end up with
8111 different masks depending on our target endian-ness. The diagram
8112 below may help. We must draw the distinction when building masks
8113 which select one half of the vector. An instruction selecting
8114 architectural low-lanes for a big-endian target, must be described using
8115 a mask selecting GCC high-lanes.
8116
8117 Big-Endian Little-Endian
8118
8119GCC 0 1 2 3 3 2 1 0
8120 | x | x | x | x | | x | x | x | x |
8121Architecture 3 2 1 0 3 2 1 0
8122
8123Low Mask: { 2, 3 } { 0, 1 }
8124High Mask: { 0, 1 } { 2, 3 }
8125*/
8126
43e9d192
IB
8127rtx
8128aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
8129{
8130 int nunits = GET_MODE_NUNITS (mode);
8131 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8132 int high_base = nunits / 2;
8133 int low_base = 0;
8134 int base;
43e9d192
IB
8135 rtx t1;
8136 int i;
8137
988fa693
JG
8138 if (BYTES_BIG_ENDIAN)
8139 base = high ? low_base : high_base;
8140 else
8141 base = high ? high_base : low_base;
8142
8143 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8144 RTVEC_ELT (v, i) = GEN_INT (base + i);
8145
8146 t1 = gen_rtx_PARALLEL (mode, v);
8147 return t1;
8148}
8149
988fa693
JG
8150/* Check OP for validity as a PARALLEL RTX vector with elements
8151 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8152 from the perspective of the architecture. See the diagram above
8153 aarch64_simd_vect_par_cnst_half for more details. */
8154
8155bool
8156aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
8157 bool high)
8158{
8159 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8160 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8161 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8162 int i = 0;
8163
8164 if (!VECTOR_MODE_P (mode))
8165 return false;
8166
8167 if (count_op != count_ideal)
8168 return false;
8169
8170 for (i = 0; i < count_ideal; i++)
8171 {
8172 rtx elt_op = XVECEXP (op, 0, i);
8173 rtx elt_ideal = XVECEXP (ideal, 0, i);
8174
4aa81c2e 8175 if (!CONST_INT_P (elt_op)
988fa693
JG
8176 || INTVAL (elt_ideal) != INTVAL (elt_op))
8177 return false;
8178 }
8179 return true;
8180}
8181
43e9d192
IB
8182/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8183 HIGH (exclusive). */
8184void
8185aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8186{
8187 HOST_WIDE_INT lane;
4aa81c2e 8188 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8189 lane = INTVAL (operand);
8190
8191 if (lane < low || lane >= high)
8192 error ("lane out of range");
8193}
8194
43e9d192
IB
8195/* Emit code to place a AdvSIMD pair result in memory locations (with equal
8196 registers). */
8197void
8198aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
8199 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8200 rtx op1)
8201{
8202 rtx mem = gen_rtx_MEM (mode, destaddr);
8203 rtx tmp1 = gen_reg_rtx (mode);
8204 rtx tmp2 = gen_reg_rtx (mode);
8205
8206 emit_insn (intfn (tmp1, op1, tmp2));
8207
8208 emit_move_insn (mem, tmp1);
8209 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8210 emit_move_insn (mem, tmp2);
8211}
8212
8213/* Return TRUE if OP is a valid vector addressing mode. */
8214bool
8215aarch64_simd_mem_operand_p (rtx op)
8216{
8217 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8218 || REG_P (XEXP (op, 0)));
43e9d192
IB
8219}
8220
8221/* Set up OPERANDS for a register copy from SRC to DEST, taking care
8222 not to early-clobber SRC registers in the process.
8223
8224 We assume that the operands described by SRC and DEST represent a
8225 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8226 number of components into which the copy has been decomposed. */
8227void
8228aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8229 rtx *src, unsigned int count)
8230{
8231 unsigned int i;
8232
8233 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8234 || REGNO (operands[0]) < REGNO (operands[1]))
8235 {
8236 for (i = 0; i < count; i++)
8237 {
8238 operands[2 * i] = dest[i];
8239 operands[2 * i + 1] = src[i];
8240 }
8241 }
8242 else
8243 {
8244 for (i = 0; i < count; i++)
8245 {
8246 operands[2 * i] = dest[count - i - 1];
8247 operands[2 * i + 1] = src[count - i - 1];
8248 }
8249 }
8250}
8251
8252/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8253 one of VSTRUCT modes: OI, CI or XI. */
8254int
647d790d 8255aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8256{
43e9d192
IB
8257 enum machine_mode mode;
8258
8259 extract_insn_cached (insn);
8260
8261 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8262 {
8263 mode = GET_MODE (recog_data.operand[0]);
8264 switch (mode)
8265 {
8266 case OImode:
8267 return 8;
8268 case CImode:
8269 return 12;
8270 case XImode:
8271 return 16;
8272 default:
8273 gcc_unreachable ();
8274 }
8275 }
8276 return 4;
8277}
8278
db0253a4
TB
8279/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8280 alignment of a vector to 128 bits. */
8281static HOST_WIDE_INT
8282aarch64_simd_vector_alignment (const_tree type)
8283{
9439e9a1 8284 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8285 return MIN (align, 128);
8286}
8287
8288/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8289static bool
8290aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8291{
8292 if (is_packed)
8293 return false;
8294
8295 /* We guarantee alignment for vectors up to 128-bits. */
8296 if (tree_int_cst_compare (TYPE_SIZE (type),
8297 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8298 return false;
8299
8300 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8301 return true;
8302}
8303
4369c11e
TB
8304/* If VALS is a vector constant that can be loaded into a register
8305 using DUP, generate instructions to do so and return an RTX to
8306 assign to the register. Otherwise return NULL_RTX. */
8307static rtx
8308aarch64_simd_dup_constant (rtx vals)
8309{
8310 enum machine_mode mode = GET_MODE (vals);
8311 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8312 int n_elts = GET_MODE_NUNITS (mode);
8313 bool all_same = true;
8314 rtx x;
8315 int i;
8316
8317 if (GET_CODE (vals) != CONST_VECTOR)
8318 return NULL_RTX;
8319
8320 for (i = 1; i < n_elts; ++i)
8321 {
8322 x = CONST_VECTOR_ELT (vals, i);
8323 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8324 all_same = false;
8325 }
8326
8327 if (!all_same)
8328 return NULL_RTX;
8329
8330 /* We can load this constant by using DUP and a constant in a
8331 single ARM register. This will be cheaper than a vector
8332 load. */
8333 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8334 return gen_rtx_VEC_DUPLICATE (mode, x);
8335}
8336
8337
8338/* Generate code to load VALS, which is a PARALLEL containing only
8339 constants (for vec_init) or CONST_VECTOR, efficiently into a
8340 register. Returns an RTX to copy into the register, or NULL_RTX
8341 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8342static rtx
4369c11e
TB
8343aarch64_simd_make_constant (rtx vals)
8344{
8345 enum machine_mode mode = GET_MODE (vals);
8346 rtx const_dup;
8347 rtx const_vec = NULL_RTX;
8348 int n_elts = GET_MODE_NUNITS (mode);
8349 int n_const = 0;
8350 int i;
8351
8352 if (GET_CODE (vals) == CONST_VECTOR)
8353 const_vec = vals;
8354 else if (GET_CODE (vals) == PARALLEL)
8355 {
8356 /* A CONST_VECTOR must contain only CONST_INTs and
8357 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8358 Only store valid constants in a CONST_VECTOR. */
8359 for (i = 0; i < n_elts; ++i)
8360 {
8361 rtx x = XVECEXP (vals, 0, i);
8362 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8363 n_const++;
8364 }
8365 if (n_const == n_elts)
8366 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8367 }
8368 else
8369 gcc_unreachable ();
8370
8371 if (const_vec != NULL_RTX
48063b9d 8372 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8373 /* Load using MOVI/MVNI. */
8374 return const_vec;
8375 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8376 /* Loaded using DUP. */
8377 return const_dup;
8378 else if (const_vec != NULL_RTX)
8379 /* Load from constant pool. We can not take advantage of single-cycle
8380 LD1 because we need a PC-relative addressing mode. */
8381 return const_vec;
8382 else
8383 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8384 We can not construct an initializer. */
8385 return NULL_RTX;
8386}
8387
8388void
8389aarch64_expand_vector_init (rtx target, rtx vals)
8390{
8391 enum machine_mode mode = GET_MODE (target);
8392 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8393 int n_elts = GET_MODE_NUNITS (mode);
8394 int n_var = 0, one_var = -1;
8395 bool all_same = true;
8396 rtx x, mem;
8397 int i;
8398
8399 x = XVECEXP (vals, 0, 0);
8400 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8401 n_var = 1, one_var = 0;
8402
8403 for (i = 1; i < n_elts; ++i)
8404 {
8405 x = XVECEXP (vals, 0, i);
8406 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8407 ++n_var, one_var = i;
8408
8409 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8410 all_same = false;
8411 }
8412
8413 if (n_var == 0)
8414 {
8415 rtx constant = aarch64_simd_make_constant (vals);
8416 if (constant != NULL_RTX)
8417 {
8418 emit_move_insn (target, constant);
8419 return;
8420 }
8421 }
8422
8423 /* Splat a single non-constant element if we can. */
8424 if (all_same)
8425 {
8426 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8427 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8428 return;
8429 }
8430
8431 /* One field is non-constant. Load constant then overwrite varying
8432 field. This is more efficient than using the stack. */
8433 if (n_var == 1)
8434 {
8435 rtx copy = copy_rtx (vals);
8436 rtx index = GEN_INT (one_var);
8437 enum insn_code icode;
8438
8439 /* Load constant part of vector, substitute neighboring value for
8440 varying element. */
8441 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8442 aarch64_expand_vector_init (target, copy);
8443
8444 /* Insert variable. */
8445 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8446 icode = optab_handler (vec_set_optab, mode);
8447 gcc_assert (icode != CODE_FOR_nothing);
8448 emit_insn (GEN_FCN (icode) (target, x, index));
8449 return;
8450 }
8451
8452 /* Construct the vector in memory one field at a time
8453 and load the whole vector. */
8454 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8455 for (i = 0; i < n_elts; i++)
8456 emit_move_insn (adjust_address_nv (mem, inner_mode,
8457 i * GET_MODE_SIZE (inner_mode)),
8458 XVECEXP (vals, 0, i));
8459 emit_move_insn (target, mem);
8460
8461}
8462
43e9d192
IB
8463static unsigned HOST_WIDE_INT
8464aarch64_shift_truncation_mask (enum machine_mode mode)
8465{
8466 return
8467 (aarch64_vector_mode_supported_p (mode)
8468 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8469}
8470
8471#ifndef TLS_SECTION_ASM_FLAG
8472#define TLS_SECTION_ASM_FLAG 'T'
8473#endif
8474
8475void
8476aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8477 tree decl ATTRIBUTE_UNUSED)
8478{
8479 char flagchars[10], *f = flagchars;
8480
8481 /* If we have already declared this section, we can use an
8482 abbreviated form to switch back to it -- unless this section is
8483 part of a COMDAT groups, in which case GAS requires the full
8484 declaration every time. */
8485 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8486 && (flags & SECTION_DECLARED))
8487 {
8488 fprintf (asm_out_file, "\t.section\t%s\n", name);
8489 return;
8490 }
8491
8492 if (!(flags & SECTION_DEBUG))
8493 *f++ = 'a';
8494 if (flags & SECTION_WRITE)
8495 *f++ = 'w';
8496 if (flags & SECTION_CODE)
8497 *f++ = 'x';
8498 if (flags & SECTION_SMALL)
8499 *f++ = 's';
8500 if (flags & SECTION_MERGE)
8501 *f++ = 'M';
8502 if (flags & SECTION_STRINGS)
8503 *f++ = 'S';
8504 if (flags & SECTION_TLS)
8505 *f++ = TLS_SECTION_ASM_FLAG;
8506 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8507 *f++ = 'G';
8508 *f = '\0';
8509
8510 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8511
8512 if (!(flags & SECTION_NOTYPE))
8513 {
8514 const char *type;
8515 const char *format;
8516
8517 if (flags & SECTION_BSS)
8518 type = "nobits";
8519 else
8520 type = "progbits";
8521
8522#ifdef TYPE_OPERAND_FMT
8523 format = "," TYPE_OPERAND_FMT;
8524#else
8525 format = ",@%s";
8526#endif
8527
8528 fprintf (asm_out_file, format, type);
8529
8530 if (flags & SECTION_ENTSIZE)
8531 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8532 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8533 {
8534 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8535 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8536 else
8537 fprintf (asm_out_file, ",%s,comdat",
8538 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8539 }
8540 }
8541
8542 putc ('\n', asm_out_file);
8543}
8544
8545/* Select a format to encode pointers in exception handling data. */
8546int
8547aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8548{
8549 int type;
8550 switch (aarch64_cmodel)
8551 {
8552 case AARCH64_CMODEL_TINY:
8553 case AARCH64_CMODEL_TINY_PIC:
8554 case AARCH64_CMODEL_SMALL:
8555 case AARCH64_CMODEL_SMALL_PIC:
8556 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8557 for everything. */
8558 type = DW_EH_PE_sdata4;
8559 break;
8560 default:
8561 /* No assumptions here. 8-byte relocs required. */
8562 type = DW_EH_PE_sdata8;
8563 break;
8564 }
8565 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8566}
8567
0462169c
SN
8568/* Emit load exclusive. */
8569
8570static void
8571aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8572 rtx mem, rtx model_rtx)
8573{
8574 rtx (*gen) (rtx, rtx, rtx);
8575
8576 switch (mode)
8577 {
8578 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8579 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8580 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8581 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8582 default:
8583 gcc_unreachable ();
8584 }
8585
8586 emit_insn (gen (rval, mem, model_rtx));
8587}
8588
8589/* Emit store exclusive. */
8590
8591static void
8592aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8593 rtx rval, rtx mem, rtx model_rtx)
8594{
8595 rtx (*gen) (rtx, rtx, rtx, rtx);
8596
8597 switch (mode)
8598 {
8599 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8600 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8601 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8602 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8603 default:
8604 gcc_unreachable ();
8605 }
8606
8607 emit_insn (gen (bval, rval, mem, model_rtx));
8608}
8609
8610/* Mark the previous jump instruction as unlikely. */
8611
8612static void
8613aarch64_emit_unlikely_jump (rtx insn)
8614{
e5af9ddd 8615 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8616
8617 insn = emit_jump_insn (insn);
e5af9ddd 8618 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8619}
8620
8621/* Expand a compare and swap pattern. */
8622
8623void
8624aarch64_expand_compare_and_swap (rtx operands[])
8625{
8626 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8627 enum machine_mode mode, cmp_mode;
8628 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8629
8630 bval = operands[0];
8631 rval = operands[1];
8632 mem = operands[2];
8633 oldval = operands[3];
8634 newval = operands[4];
8635 is_weak = operands[5];
8636 mod_s = operands[6];
8637 mod_f = operands[7];
8638 mode = GET_MODE (mem);
8639 cmp_mode = mode;
8640
8641 /* Normally the succ memory model must be stronger than fail, but in the
8642 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8643 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8644
8645 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8646 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8647 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8648
8649 switch (mode)
8650 {
8651 case QImode:
8652 case HImode:
8653 /* For short modes, we're going to perform the comparison in SImode,
8654 so do the zero-extension now. */
8655 cmp_mode = SImode;
8656 rval = gen_reg_rtx (SImode);
8657 oldval = convert_modes (SImode, mode, oldval, true);
8658 /* Fall through. */
8659
8660 case SImode:
8661 case DImode:
8662 /* Force the value into a register if needed. */
8663 if (!aarch64_plus_operand (oldval, mode))
8664 oldval = force_reg (cmp_mode, oldval);
8665 break;
8666
8667 default:
8668 gcc_unreachable ();
8669 }
8670
8671 switch (mode)
8672 {
8673 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8674 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8675 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8676 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8677 default:
8678 gcc_unreachable ();
8679 }
8680
8681 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8682
8683 if (mode == QImode || mode == HImode)
8684 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8685
8686 x = gen_rtx_REG (CCmode, CC_REGNUM);
8687 x = gen_rtx_EQ (SImode, x, const0_rtx);
8688 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8689}
8690
8691/* Split a compare and swap pattern. */
8692
8693void
8694aarch64_split_compare_and_swap (rtx operands[])
8695{
8696 rtx rval, mem, oldval, newval, scratch;
8697 enum machine_mode mode;
0462169c 8698 bool is_weak;
5d8a22a5
DM
8699 rtx_code_label *label1, *label2;
8700 rtx x, cond;
0462169c
SN
8701
8702 rval = operands[0];
8703 mem = operands[1];
8704 oldval = operands[2];
8705 newval = operands[3];
8706 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8707 scratch = operands[7];
8708 mode = GET_MODE (mem);
8709
5d8a22a5 8710 label1 = NULL;
0462169c
SN
8711 if (!is_weak)
8712 {
8713 label1 = gen_label_rtx ();
8714 emit_label (label1);
8715 }
8716 label2 = gen_label_rtx ();
8717
8718 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8719
8720 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8721 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8722 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8723 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8724 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8725
8726 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8727
8728 if (!is_weak)
8729 {
8730 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8731 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8732 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8733 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8734 }
8735 else
8736 {
8737 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8738 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8739 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8740 }
8741
8742 emit_label (label2);
8743}
8744
8745/* Split an atomic operation. */
8746
8747void
8748aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8749 rtx value, rtx model_rtx, rtx cond)
8750{
8751 enum machine_mode mode = GET_MODE (mem);
8752 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
8753 rtx_code_label *label;
8754 rtx x;
0462169c
SN
8755
8756 label = gen_label_rtx ();
8757 emit_label (label);
8758
8759 if (new_out)
8760 new_out = gen_lowpart (wmode, new_out);
8761 if (old_out)
8762 old_out = gen_lowpart (wmode, old_out);
8763 else
8764 old_out = new_out;
8765 value = simplify_gen_subreg (wmode, value, mode, 0);
8766
8767 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8768
8769 switch (code)
8770 {
8771 case SET:
8772 new_out = value;
8773 break;
8774
8775 case NOT:
8776 x = gen_rtx_AND (wmode, old_out, value);
8777 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8778 x = gen_rtx_NOT (wmode, new_out);
8779 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8780 break;
8781
8782 case MINUS:
8783 if (CONST_INT_P (value))
8784 {
8785 value = GEN_INT (-INTVAL (value));
8786 code = PLUS;
8787 }
8788 /* Fall through. */
8789
8790 default:
8791 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8792 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8793 break;
8794 }
8795
8796 aarch64_emit_store_exclusive (mode, cond, mem,
8797 gen_lowpart (mode, new_out), model_rtx);
8798
8799 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8800 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8801 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8802 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8803}
8804
95ca411e
YZ
8805static void
8806aarch64_print_extension (void)
8807{
8808 const struct aarch64_option_extension *opt = NULL;
8809
8810 for (opt = all_extensions; opt->name != NULL; opt++)
8811 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8812 asm_fprintf (asm_out_file, "+%s", opt->name);
8813
8814 asm_fprintf (asm_out_file, "\n");
8815}
8816
43e9d192
IB
8817static void
8818aarch64_start_file (void)
8819{
8820 if (selected_arch)
95ca411e
YZ
8821 {
8822 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8823 aarch64_print_extension ();
8824 }
43e9d192 8825 else if (selected_cpu)
95ca411e 8826 {
682287fb
JG
8827 const char *truncated_name
8828 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8829 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
8830 aarch64_print_extension ();
8831 }
43e9d192
IB
8832 default_file_start();
8833}
8834
8835/* Target hook for c_mode_for_suffix. */
8836static enum machine_mode
8837aarch64_c_mode_for_suffix (char suffix)
8838{
8839 if (suffix == 'q')
8840 return TFmode;
8841
8842 return VOIDmode;
8843}
8844
3520f7cc
JG
8845/* We can only represent floating point constants which will fit in
8846 "quarter-precision" values. These values are characterised by
8847 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8848 by:
8849
8850 (-1)^s * (n/16) * 2^r
8851
8852 Where:
8853 's' is the sign bit.
8854 'n' is an integer in the range 16 <= n <= 31.
8855 'r' is an integer in the range -3 <= r <= 4. */
8856
8857/* Return true iff X can be represented by a quarter-precision
8858 floating point immediate operand X. Note, we cannot represent 0.0. */
8859bool
8860aarch64_float_const_representable_p (rtx x)
8861{
8862 /* This represents our current view of how many bits
8863 make up the mantissa. */
8864 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 8865 int exponent;
3520f7cc 8866 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 8867 REAL_VALUE_TYPE r, m;
807e902e 8868 bool fail;
3520f7cc
JG
8869
8870 if (!CONST_DOUBLE_P (x))
8871 return false;
8872
94bfa2da
TV
8873 if (GET_MODE (x) == VOIDmode)
8874 return false;
8875
3520f7cc
JG
8876 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8877
8878 /* We cannot represent infinities, NaNs or +/-zero. We won't
8879 know if we have +zero until we analyse the mantissa, but we
8880 can reject the other invalid values. */
8881 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8882 || REAL_VALUE_MINUS_ZERO (r))
8883 return false;
8884
ba96cdfb 8885 /* Extract exponent. */
3520f7cc
JG
8886 r = real_value_abs (&r);
8887 exponent = REAL_EXP (&r);
8888
8889 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8890 highest (sign) bit, with a fixed binary point at bit point_pos.
8891 m1 holds the low part of the mantissa, m2 the high part.
8892 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8893 bits for the mantissa, this can fail (low bits will be lost). */
8894 real_ldexp (&m, &r, point_pos - exponent);
807e902e 8895 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
8896
8897 /* If the low part of the mantissa has bits set we cannot represent
8898 the value. */
807e902e 8899 if (w.elt (0) != 0)
3520f7cc
JG
8900 return false;
8901 /* We have rejected the lower HOST_WIDE_INT, so update our
8902 understanding of how many bits lie in the mantissa and
8903 look only at the high HOST_WIDE_INT. */
807e902e 8904 mantissa = w.elt (1);
3520f7cc
JG
8905 point_pos -= HOST_BITS_PER_WIDE_INT;
8906
8907 /* We can only represent values with a mantissa of the form 1.xxxx. */
8908 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8909 if ((mantissa & mask) != 0)
8910 return false;
8911
8912 /* Having filtered unrepresentable values, we may now remove all
8913 but the highest 5 bits. */
8914 mantissa >>= point_pos - 5;
8915
8916 /* We cannot represent the value 0.0, so reject it. This is handled
8917 elsewhere. */
8918 if (mantissa == 0)
8919 return false;
8920
8921 /* Then, as bit 4 is always set, we can mask it off, leaving
8922 the mantissa in the range [0, 15]. */
8923 mantissa &= ~(1 << 4);
8924 gcc_assert (mantissa <= 15);
8925
8926 /* GCC internally does not use IEEE754-like encoding (where normalized
8927 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8928 Our mantissa values are shifted 4 places to the left relative to
8929 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8930 by 5 places to correct for GCC's representation. */
8931 exponent = 5 - exponent;
8932
8933 return (exponent >= 0 && exponent <= 7);
8934}
8935
8936char*
81c2dfb9 8937aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
8938 enum machine_mode mode,
8939 unsigned width)
8940{
3ea63f60 8941 bool is_valid;
3520f7cc 8942 static char templ[40];
3520f7cc 8943 const char *mnemonic;
e4f0f84d 8944 const char *shift_op;
3520f7cc 8945 unsigned int lane_count = 0;
81c2dfb9 8946 char element_char;
3520f7cc 8947
e4f0f84d 8948 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
8949
8950 /* This will return true to show const_vector is legal for use as either
8951 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8952 also update INFO to show how the immediate should be generated. */
81c2dfb9 8953 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
8954 gcc_assert (is_valid);
8955
81c2dfb9 8956 element_char = sizetochar (info.element_width);
48063b9d
IB
8957 lane_count = width / info.element_width;
8958
3520f7cc
JG
8959 mode = GET_MODE_INNER (mode);
8960 if (mode == SFmode || mode == DFmode)
8961 {
48063b9d
IB
8962 gcc_assert (info.shift == 0 && ! info.mvn);
8963 if (aarch64_float_const_zero_rtx_p (info.value))
8964 info.value = GEN_INT (0);
8965 else
8966 {
8967#define buf_size 20
8968 REAL_VALUE_TYPE r;
8969 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8970 char float_buf[buf_size] = {'\0'};
8971 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8972#undef buf_size
8973
8974 if (lane_count == 1)
8975 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8976 else
8977 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 8978 lane_count, element_char, float_buf);
48063b9d
IB
8979 return templ;
8980 }
3520f7cc 8981 }
3520f7cc 8982
48063b9d 8983 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 8984 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
8985
8986 if (lane_count == 1)
48063b9d
IB
8987 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8988 mnemonic, UINTVAL (info.value));
8989 else if (info.shift)
8990 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
8991 ", %s %d", mnemonic, lane_count, element_char,
8992 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 8993 else
48063b9d 8994 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 8995 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
8996 return templ;
8997}
8998
b7342d25
IB
8999char*
9000aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9001 enum machine_mode mode)
9002{
9003 enum machine_mode vmode;
9004
9005 gcc_assert (!VECTOR_MODE_P (mode));
9006 vmode = aarch64_simd_container_mode (mode, 64);
9007 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9008 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9009}
9010
88b08073
JG
9011/* Split operands into moves from op[1] + op[2] into op[0]. */
9012
9013void
9014aarch64_split_combinev16qi (rtx operands[3])
9015{
9016 unsigned int dest = REGNO (operands[0]);
9017 unsigned int src1 = REGNO (operands[1]);
9018 unsigned int src2 = REGNO (operands[2]);
9019 enum machine_mode halfmode = GET_MODE (operands[1]);
9020 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9021 rtx destlo, desthi;
9022
9023 gcc_assert (halfmode == V16QImode);
9024
9025 if (src1 == dest && src2 == dest + halfregs)
9026 {
9027 /* No-op move. Can't split to nothing; emit something. */
9028 emit_note (NOTE_INSN_DELETED);
9029 return;
9030 }
9031
9032 /* Preserve register attributes for variable tracking. */
9033 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9034 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9035 GET_MODE_SIZE (halfmode));
9036
9037 /* Special case of reversed high/low parts. */
9038 if (reg_overlap_mentioned_p (operands[2], destlo)
9039 && reg_overlap_mentioned_p (operands[1], desthi))
9040 {
9041 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9042 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9043 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9044 }
9045 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9046 {
9047 /* Try to avoid unnecessary moves if part of the result
9048 is in the right place already. */
9049 if (src1 != dest)
9050 emit_move_insn (destlo, operands[1]);
9051 if (src2 != dest + halfregs)
9052 emit_move_insn (desthi, operands[2]);
9053 }
9054 else
9055 {
9056 if (src2 != dest + halfregs)
9057 emit_move_insn (desthi, operands[2]);
9058 if (src1 != dest)
9059 emit_move_insn (destlo, operands[1]);
9060 }
9061}
9062
9063/* vec_perm support. */
9064
9065#define MAX_VECT_LEN 16
9066
9067struct expand_vec_perm_d
9068{
9069 rtx target, op0, op1;
9070 unsigned char perm[MAX_VECT_LEN];
9071 enum machine_mode vmode;
9072 unsigned char nelt;
9073 bool one_vector_p;
9074 bool testing_p;
9075};
9076
9077/* Generate a variable permutation. */
9078
9079static void
9080aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9081{
9082 enum machine_mode vmode = GET_MODE (target);
9083 bool one_vector_p = rtx_equal_p (op0, op1);
9084
9085 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9086 gcc_checking_assert (GET_MODE (op0) == vmode);
9087 gcc_checking_assert (GET_MODE (op1) == vmode);
9088 gcc_checking_assert (GET_MODE (sel) == vmode);
9089 gcc_checking_assert (TARGET_SIMD);
9090
9091 if (one_vector_p)
9092 {
9093 if (vmode == V8QImode)
9094 {
9095 /* Expand the argument to a V16QI mode by duplicating it. */
9096 rtx pair = gen_reg_rtx (V16QImode);
9097 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9098 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9099 }
9100 else
9101 {
9102 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9103 }
9104 }
9105 else
9106 {
9107 rtx pair;
9108
9109 if (vmode == V8QImode)
9110 {
9111 pair = gen_reg_rtx (V16QImode);
9112 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9113 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9114 }
9115 else
9116 {
9117 pair = gen_reg_rtx (OImode);
9118 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9119 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9120 }
9121 }
9122}
9123
9124void
9125aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9126{
9127 enum machine_mode vmode = GET_MODE (target);
c9d1a16a 9128 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9129 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9130 rtx mask;
88b08073
JG
9131
9132 /* The TBL instruction does not use a modulo index, so we must take care
9133 of that ourselves. */
f7c4e5b8
AL
9134 mask = aarch64_simd_gen_const_vector_dup (vmode,
9135 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9136 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9137
f7c4e5b8
AL
9138 /* For big-endian, we also need to reverse the index within the vector
9139 (but not which vector). */
9140 if (BYTES_BIG_ENDIAN)
9141 {
9142 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9143 if (!one_vector_p)
9144 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9145 sel = expand_simple_binop (vmode, XOR, sel, mask,
9146 NULL, 0, OPTAB_LIB_WIDEN);
9147 }
88b08073
JG
9148 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9149}
9150
cc4d934f
JG
9151/* Recognize patterns suitable for the TRN instructions. */
9152static bool
9153aarch64_evpc_trn (struct expand_vec_perm_d *d)
9154{
9155 unsigned int i, odd, mask, nelt = d->nelt;
9156 rtx out, in0, in1, x;
9157 rtx (*gen) (rtx, rtx, rtx);
9158 enum machine_mode vmode = d->vmode;
9159
9160 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9161 return false;
9162
9163 /* Note that these are little-endian tests.
9164 We correct for big-endian later. */
9165 if (d->perm[0] == 0)
9166 odd = 0;
9167 else if (d->perm[0] == 1)
9168 odd = 1;
9169 else
9170 return false;
9171 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9172
9173 for (i = 0; i < nelt; i += 2)
9174 {
9175 if (d->perm[i] != i + odd)
9176 return false;
9177 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9178 return false;
9179 }
9180
9181 /* Success! */
9182 if (d->testing_p)
9183 return true;
9184
9185 in0 = d->op0;
9186 in1 = d->op1;
9187 if (BYTES_BIG_ENDIAN)
9188 {
9189 x = in0, in0 = in1, in1 = x;
9190 odd = !odd;
9191 }
9192 out = d->target;
9193
9194 if (odd)
9195 {
9196 switch (vmode)
9197 {
9198 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9199 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9200 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9201 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9202 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9203 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9204 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9205 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9206 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9207 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9208 default:
9209 return false;
9210 }
9211 }
9212 else
9213 {
9214 switch (vmode)
9215 {
9216 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9217 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9218 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9219 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9220 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9221 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9222 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9223 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9224 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9225 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9226 default:
9227 return false;
9228 }
9229 }
9230
9231 emit_insn (gen (out, in0, in1));
9232 return true;
9233}
9234
9235/* Recognize patterns suitable for the UZP instructions. */
9236static bool
9237aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9238{
9239 unsigned int i, odd, mask, nelt = d->nelt;
9240 rtx out, in0, in1, x;
9241 rtx (*gen) (rtx, rtx, rtx);
9242 enum machine_mode vmode = d->vmode;
9243
9244 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9245 return false;
9246
9247 /* Note that these are little-endian tests.
9248 We correct for big-endian later. */
9249 if (d->perm[0] == 0)
9250 odd = 0;
9251 else if (d->perm[0] == 1)
9252 odd = 1;
9253 else
9254 return false;
9255 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9256
9257 for (i = 0; i < nelt; i++)
9258 {
9259 unsigned elt = (i * 2 + odd) & mask;
9260 if (d->perm[i] != elt)
9261 return false;
9262 }
9263
9264 /* Success! */
9265 if (d->testing_p)
9266 return true;
9267
9268 in0 = d->op0;
9269 in1 = d->op1;
9270 if (BYTES_BIG_ENDIAN)
9271 {
9272 x = in0, in0 = in1, in1 = x;
9273 odd = !odd;
9274 }
9275 out = d->target;
9276
9277 if (odd)
9278 {
9279 switch (vmode)
9280 {
9281 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9282 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9283 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9284 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9285 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9286 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9287 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9288 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9289 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9290 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9291 default:
9292 return false;
9293 }
9294 }
9295 else
9296 {
9297 switch (vmode)
9298 {
9299 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9300 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9301 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9302 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9303 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9304 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9305 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9306 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9307 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9308 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9309 default:
9310 return false;
9311 }
9312 }
9313
9314 emit_insn (gen (out, in0, in1));
9315 return true;
9316}
9317
9318/* Recognize patterns suitable for the ZIP instructions. */
9319static bool
9320aarch64_evpc_zip (struct expand_vec_perm_d *d)
9321{
9322 unsigned int i, high, mask, nelt = d->nelt;
9323 rtx out, in0, in1, x;
9324 rtx (*gen) (rtx, rtx, rtx);
9325 enum machine_mode vmode = d->vmode;
9326
9327 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9328 return false;
9329
9330 /* Note that these are little-endian tests.
9331 We correct for big-endian later. */
9332 high = nelt / 2;
9333 if (d->perm[0] == high)
9334 /* Do Nothing. */
9335 ;
9336 else if (d->perm[0] == 0)
9337 high = 0;
9338 else
9339 return false;
9340 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9341
9342 for (i = 0; i < nelt / 2; i++)
9343 {
9344 unsigned elt = (i + high) & mask;
9345 if (d->perm[i * 2] != elt)
9346 return false;
9347 elt = (elt + nelt) & mask;
9348 if (d->perm[i * 2 + 1] != elt)
9349 return false;
9350 }
9351
9352 /* Success! */
9353 if (d->testing_p)
9354 return true;
9355
9356 in0 = d->op0;
9357 in1 = d->op1;
9358 if (BYTES_BIG_ENDIAN)
9359 {
9360 x = in0, in0 = in1, in1 = x;
9361 high = !high;
9362 }
9363 out = d->target;
9364
9365 if (high)
9366 {
9367 switch (vmode)
9368 {
9369 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9370 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9371 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9372 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9373 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9374 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9375 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9376 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9377 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9378 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9379 default:
9380 return false;
9381 }
9382 }
9383 else
9384 {
9385 switch (vmode)
9386 {
9387 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9388 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9389 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9390 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9391 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9392 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9393 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9394 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9395 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9396 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9397 default:
9398 return false;
9399 }
9400 }
9401
9402 emit_insn (gen (out, in0, in1));
9403 return true;
9404}
9405
ae0533da
AL
9406/* Recognize patterns for the EXT insn. */
9407
9408static bool
9409aarch64_evpc_ext (struct expand_vec_perm_d *d)
9410{
9411 unsigned int i, nelt = d->nelt;
9412 rtx (*gen) (rtx, rtx, rtx, rtx);
9413 rtx offset;
9414
9415 unsigned int location = d->perm[0]; /* Always < nelt. */
9416
9417 /* Check if the extracted indices are increasing by one. */
9418 for (i = 1; i < nelt; i++)
9419 {
9420 unsigned int required = location + i;
9421 if (d->one_vector_p)
9422 {
9423 /* We'll pass the same vector in twice, so allow indices to wrap. */
9424 required &= (nelt - 1);
9425 }
9426 if (d->perm[i] != required)
9427 return false;
9428 }
9429
ae0533da
AL
9430 switch (d->vmode)
9431 {
9432 case V16QImode: gen = gen_aarch64_extv16qi; break;
9433 case V8QImode: gen = gen_aarch64_extv8qi; break;
9434 case V4HImode: gen = gen_aarch64_extv4hi; break;
9435 case V8HImode: gen = gen_aarch64_extv8hi; break;
9436 case V2SImode: gen = gen_aarch64_extv2si; break;
9437 case V4SImode: gen = gen_aarch64_extv4si; break;
9438 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9439 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9440 case V2DImode: gen = gen_aarch64_extv2di; break;
9441 case V2DFmode: gen = gen_aarch64_extv2df; break;
9442 default:
9443 return false;
9444 }
9445
9446 /* Success! */
9447 if (d->testing_p)
9448 return true;
9449
b31e65bb
AL
9450 /* The case where (location == 0) is a no-op for both big- and little-endian,
9451 and is removed by the mid-end at optimization levels -O1 and higher. */
9452
9453 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9454 {
9455 /* After setup, we want the high elements of the first vector (stored
9456 at the LSB end of the register), and the low elements of the second
9457 vector (stored at the MSB end of the register). So swap. */
9458 rtx temp = d->op0;
9459 d->op0 = d->op1;
9460 d->op1 = temp;
9461 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9462 location = nelt - location;
9463 }
9464
9465 offset = GEN_INT (location);
9466 emit_insn (gen (d->target, d->op0, d->op1, offset));
9467 return true;
9468}
9469
923fcec3
AL
9470/* Recognize patterns for the REV insns. */
9471
9472static bool
9473aarch64_evpc_rev (struct expand_vec_perm_d *d)
9474{
9475 unsigned int i, j, diff, nelt = d->nelt;
9476 rtx (*gen) (rtx, rtx);
9477
9478 if (!d->one_vector_p)
9479 return false;
9480
9481 diff = d->perm[0];
9482 switch (diff)
9483 {
9484 case 7:
9485 switch (d->vmode)
9486 {
9487 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9488 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9489 default:
9490 return false;
9491 }
9492 break;
9493 case 3:
9494 switch (d->vmode)
9495 {
9496 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9497 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9498 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9499 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9500 default:
9501 return false;
9502 }
9503 break;
9504 case 1:
9505 switch (d->vmode)
9506 {
9507 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9508 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9509 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9510 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9511 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9512 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9513 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9514 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9515 default:
9516 return false;
9517 }
9518 break;
9519 default:
9520 return false;
9521 }
9522
9523 for (i = 0; i < nelt ; i += diff + 1)
9524 for (j = 0; j <= diff; j += 1)
9525 {
9526 /* This is guaranteed to be true as the value of diff
9527 is 7, 3, 1 and we should have enough elements in the
9528 queue to generate this. Getting a vector mask with a
9529 value of diff other than these values implies that
9530 something is wrong by the time we get here. */
9531 gcc_assert (i + j < nelt);
9532 if (d->perm[i + j] != i + diff - j)
9533 return false;
9534 }
9535
9536 /* Success! */
9537 if (d->testing_p)
9538 return true;
9539
9540 emit_insn (gen (d->target, d->op0));
9541 return true;
9542}
9543
91bd4114
JG
9544static bool
9545aarch64_evpc_dup (struct expand_vec_perm_d *d)
9546{
9547 rtx (*gen) (rtx, rtx, rtx);
9548 rtx out = d->target;
9549 rtx in0;
9550 enum machine_mode vmode = d->vmode;
9551 unsigned int i, elt, nelt = d->nelt;
9552 rtx lane;
9553
91bd4114
JG
9554 elt = d->perm[0];
9555 for (i = 1; i < nelt; i++)
9556 {
9557 if (elt != d->perm[i])
9558 return false;
9559 }
9560
9561 /* The generic preparation in aarch64_expand_vec_perm_const_1
9562 swaps the operand order and the permute indices if it finds
9563 d->perm[0] to be in the second operand. Thus, we can always
9564 use d->op0 and need not do any extra arithmetic to get the
9565 correct lane number. */
9566 in0 = d->op0;
f901401e 9567 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
9568
9569 switch (vmode)
9570 {
9571 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9572 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9573 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9574 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9575 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9576 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9577 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9578 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9579 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9580 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9581 default:
9582 return false;
9583 }
9584
9585 emit_insn (gen (out, in0, lane));
9586 return true;
9587}
9588
88b08073
JG
9589static bool
9590aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9591{
9592 rtx rperm[MAX_VECT_LEN], sel;
9593 enum machine_mode vmode = d->vmode;
9594 unsigned int i, nelt = d->nelt;
9595
88b08073
JG
9596 if (d->testing_p)
9597 return true;
9598
9599 /* Generic code will try constant permutation twice. Once with the
9600 original mode and again with the elements lowered to QImode.
9601 So wait and don't do the selector expansion ourselves. */
9602 if (vmode != V8QImode && vmode != V16QImode)
9603 return false;
9604
9605 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9606 {
9607 int nunits = GET_MODE_NUNITS (vmode);
9608
9609 /* If big-endian and two vectors we end up with a weird mixed-endian
9610 mode on NEON. Reverse the index within each word but not the word
9611 itself. */
9612 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9613 : d->perm[i]);
9614 }
88b08073
JG
9615 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9616 sel = force_reg (vmode, sel);
9617
9618 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9619 return true;
9620}
9621
9622static bool
9623aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9624{
9625 /* The pattern matching functions above are written to look for a small
9626 number to begin the sequence (0, 1, N/2). If we begin with an index
9627 from the second operand, we can swap the operands. */
9628 if (d->perm[0] >= d->nelt)
9629 {
9630 unsigned i, nelt = d->nelt;
9631 rtx x;
9632
0696116a 9633 gcc_assert (nelt == (nelt & -nelt));
88b08073 9634 for (i = 0; i < nelt; ++i)
0696116a 9635 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9636
9637 x = d->op0;
9638 d->op0 = d->op1;
9639 d->op1 = x;
9640 }
9641
9642 if (TARGET_SIMD)
cc4d934f 9643 {
923fcec3
AL
9644 if (aarch64_evpc_rev (d))
9645 return true;
9646 else if (aarch64_evpc_ext (d))
ae0533da 9647 return true;
f901401e
AL
9648 else if (aarch64_evpc_dup (d))
9649 return true;
ae0533da 9650 else if (aarch64_evpc_zip (d))
cc4d934f
JG
9651 return true;
9652 else if (aarch64_evpc_uzp (d))
9653 return true;
9654 else if (aarch64_evpc_trn (d))
9655 return true;
9656 return aarch64_evpc_tbl (d);
9657 }
88b08073
JG
9658 return false;
9659}
9660
9661/* Expand a vec_perm_const pattern. */
9662
9663bool
9664aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9665{
9666 struct expand_vec_perm_d d;
9667 int i, nelt, which;
9668
9669 d.target = target;
9670 d.op0 = op0;
9671 d.op1 = op1;
9672
9673 d.vmode = GET_MODE (target);
9674 gcc_assert (VECTOR_MODE_P (d.vmode));
9675 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9676 d.testing_p = false;
9677
9678 for (i = which = 0; i < nelt; ++i)
9679 {
9680 rtx e = XVECEXP (sel, 0, i);
9681 int ei = INTVAL (e) & (2 * nelt - 1);
9682 which |= (ei < nelt ? 1 : 2);
9683 d.perm[i] = ei;
9684 }
9685
9686 switch (which)
9687 {
9688 default:
9689 gcc_unreachable ();
9690
9691 case 3:
9692 d.one_vector_p = false;
9693 if (!rtx_equal_p (op0, op1))
9694 break;
9695
9696 /* The elements of PERM do not suggest that only the first operand
9697 is used, but both operands are identical. Allow easier matching
9698 of the permutation by folding the permutation into the single
9699 input vector. */
9700 /* Fall Through. */
9701 case 2:
9702 for (i = 0; i < nelt; ++i)
9703 d.perm[i] &= nelt - 1;
9704 d.op0 = op1;
9705 d.one_vector_p = true;
9706 break;
9707
9708 case 1:
9709 d.op1 = op0;
9710 d.one_vector_p = true;
9711 break;
9712 }
9713
9714 return aarch64_expand_vec_perm_const_1 (&d);
9715}
9716
9717static bool
9718aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9719 const unsigned char *sel)
9720{
9721 struct expand_vec_perm_d d;
9722 unsigned int i, nelt, which;
9723 bool ret;
9724
9725 d.vmode = vmode;
9726 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9727 d.testing_p = true;
9728 memcpy (d.perm, sel, nelt);
9729
9730 /* Calculate whether all elements are in one vector. */
9731 for (i = which = 0; i < nelt; ++i)
9732 {
9733 unsigned char e = d.perm[i];
9734 gcc_assert (e < 2 * nelt);
9735 which |= (e < nelt ? 1 : 2);
9736 }
9737
9738 /* If all elements are from the second vector, reindex as if from the
9739 first vector. */
9740 if (which == 2)
9741 for (i = 0; i < nelt; ++i)
9742 d.perm[i] -= nelt;
9743
9744 /* Check whether the mask can be applied to a single vector. */
9745 d.one_vector_p = (which != 3);
9746
9747 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9748 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9749 if (!d.one_vector_p)
9750 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9751
9752 start_sequence ();
9753 ret = aarch64_expand_vec_perm_const_1 (&d);
9754 end_sequence ();
9755
9756 return ret;
9757}
9758
69675d50
TB
9759/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9760bool
9761aarch64_cannot_change_mode_class (enum machine_mode from,
9762 enum machine_mode to,
9763 enum reg_class rclass)
9764{
9765 /* Full-reg subregs are allowed on general regs or any class if they are
9766 the same size. */
9767 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9768 || !reg_classes_intersect_p (FP_REGS, rclass))
9769 return false;
9770
9771 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9772 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9773 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
9774 3. Vector to Vector modes.
9775 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
9776 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9777 {
9778 if (aarch64_vector_mode_supported_p (from)
9779 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9780 return false;
9781
9782 if (GET_MODE_NUNITS (from) == 1
9783 && GET_MODE_NUNITS (to) == 1
9784 && (GET_MODE_CLASS (from) == MODE_INT
9785 || from == to))
9786 return false;
9787
9788 if (aarch64_vector_mode_supported_p (from)
9789 && aarch64_vector_mode_supported_p (to))
9790 return false;
97e1ad78
JG
9791
9792 /* Within an vector structure straddling multiple vector registers
9793 we are in a mixed-endian representation. As such, we can't
9794 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9795 switch between vectors and vector structures cheaply. */
9796 if (!BYTES_BIG_ENDIAN)
9797 if ((aarch64_vector_mode_supported_p (from)
9798 && aarch64_vect_struct_mode_p (to))
9799 || (aarch64_vector_mode_supported_p (to)
9800 && aarch64_vect_struct_mode_p (from)))
9801 return false;
69675d50
TB
9802 }
9803
9804 return true;
9805}
9806
97e1ad78
JG
9807/* Implement MODES_TIEABLE_P. */
9808
9809bool
9810aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9811{
9812 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9813 return true;
9814
9815 /* We specifically want to allow elements of "structure" modes to
9816 be tieable to the structure. This more general condition allows
9817 other rarer situations too. */
9818 if (TARGET_SIMD
9819 && aarch64_vector_mode_p (mode1)
9820 && aarch64_vector_mode_p (mode2))
9821 return true;
9822
9823 return false;
9824}
9825
e2c75eea
JG
9826/* Return a new RTX holding the result of moving POINTER forward by
9827 AMOUNT bytes. */
9828
9829static rtx
9830aarch64_move_pointer (rtx pointer, int amount)
9831{
9832 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9833
9834 return adjust_automodify_address (pointer, GET_MODE (pointer),
9835 next, amount);
9836}
9837
9838/* Return a new RTX holding the result of moving POINTER forward by the
9839 size of the mode it points to. */
9840
9841static rtx
9842aarch64_progress_pointer (rtx pointer)
9843{
9844 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9845
9846 return aarch64_move_pointer (pointer, amount);
9847}
9848
9849/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9850 MODE bytes. */
9851
9852static void
9853aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9854 enum machine_mode mode)
9855{
9856 rtx reg = gen_reg_rtx (mode);
9857
9858 /* "Cast" the pointers to the correct mode. */
9859 *src = adjust_address (*src, mode, 0);
9860 *dst = adjust_address (*dst, mode, 0);
9861 /* Emit the memcpy. */
9862 emit_move_insn (reg, *src);
9863 emit_move_insn (*dst, reg);
9864 /* Move the pointers forward. */
9865 *src = aarch64_progress_pointer (*src);
9866 *dst = aarch64_progress_pointer (*dst);
9867}
9868
9869/* Expand movmem, as if from a __builtin_memcpy. Return true if
9870 we succeed, otherwise return false. */
9871
9872bool
9873aarch64_expand_movmem (rtx *operands)
9874{
9875 unsigned int n;
9876 rtx dst = operands[0];
9877 rtx src = operands[1];
9878 rtx base;
9879 bool speed_p = !optimize_function_for_size_p (cfun);
9880
9881 /* When optimizing for size, give a better estimate of the length of a
9882 memcpy call, but use the default otherwise. */
9883 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9884
9885 /* We can't do anything smart if the amount to copy is not constant. */
9886 if (!CONST_INT_P (operands[2]))
9887 return false;
9888
9889 n = UINTVAL (operands[2]);
9890
9891 /* Try to keep the number of instructions low. For cases below 16 bytes we
9892 need to make at most two moves. For cases above 16 bytes it will be one
9893 move for each 16 byte chunk, then at most two additional moves. */
9894 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9895 return false;
9896
9897 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9898 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9899
9900 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9901 src = adjust_automodify_address (src, VOIDmode, base, 0);
9902
9903 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9904 1-byte chunk. */
9905 if (n < 4)
9906 {
9907 if (n >= 2)
9908 {
9909 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9910 n -= 2;
9911 }
9912
9913 if (n == 1)
9914 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9915
9916 return true;
9917 }
9918
9919 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9920 4-byte chunk, partially overlapping with the previously copied chunk. */
9921 if (n < 8)
9922 {
9923 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9924 n -= 4;
9925 if (n > 0)
9926 {
9927 int move = n - 4;
9928
9929 src = aarch64_move_pointer (src, move);
9930 dst = aarch64_move_pointer (dst, move);
9931 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9932 }
9933 return true;
9934 }
9935
9936 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9937 them, then (if applicable) an 8-byte chunk. */
9938 while (n >= 8)
9939 {
9940 if (n / 16)
9941 {
9942 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9943 n -= 16;
9944 }
9945 else
9946 {
9947 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9948 n -= 8;
9949 }
9950 }
9951
9952 /* Finish the final bytes of the copy. We can always do this in one
9953 instruction. We either copy the exact amount we need, or partially
9954 overlap with the previous chunk we copied and copy 8-bytes. */
9955 if (n == 0)
9956 return true;
9957 else if (n == 1)
9958 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9959 else if (n == 2)
9960 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9961 else if (n == 4)
9962 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9963 else
9964 {
9965 if (n == 3)
9966 {
9967 src = aarch64_move_pointer (src, -1);
9968 dst = aarch64_move_pointer (dst, -1);
9969 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9970 }
9971 else
9972 {
9973 int move = n - 8;
9974
9975 src = aarch64_move_pointer (src, move);
9976 dst = aarch64_move_pointer (dst, move);
9977 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9978 }
9979 }
9980
9981 return true;
9982}
9983
a3125fc2
CL
9984/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
9985
9986static unsigned HOST_WIDE_INT
9987aarch64_asan_shadow_offset (void)
9988{
9989 return (HOST_WIDE_INT_1 << 36);
9990}
9991
43e9d192
IB
9992#undef TARGET_ADDRESS_COST
9993#define TARGET_ADDRESS_COST aarch64_address_cost
9994
9995/* This hook will determines whether unnamed bitfields affect the alignment
9996 of the containing structure. The hook returns true if the structure
9997 should inherit the alignment requirements of an unnamed bitfield's
9998 type. */
9999#undef TARGET_ALIGN_ANON_BITFIELD
10000#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10001
10002#undef TARGET_ASM_ALIGNED_DI_OP
10003#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10004
10005#undef TARGET_ASM_ALIGNED_HI_OP
10006#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10007
10008#undef TARGET_ASM_ALIGNED_SI_OP
10009#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10010
10011#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10012#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10013 hook_bool_const_tree_hwi_hwi_const_tree_true
10014
10015#undef TARGET_ASM_FILE_START
10016#define TARGET_ASM_FILE_START aarch64_start_file
10017
10018#undef TARGET_ASM_OUTPUT_MI_THUNK
10019#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10020
10021#undef TARGET_ASM_SELECT_RTX_SECTION
10022#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10023
10024#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10025#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10026
10027#undef TARGET_BUILD_BUILTIN_VA_LIST
10028#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10029
10030#undef TARGET_CALLEE_COPIES
10031#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10032
10033#undef TARGET_CAN_ELIMINATE
10034#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10035
10036#undef TARGET_CANNOT_FORCE_CONST_MEM
10037#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10038
10039#undef TARGET_CONDITIONAL_REGISTER_USAGE
10040#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10041
10042/* Only the least significant bit is used for initialization guard
10043 variables. */
10044#undef TARGET_CXX_GUARD_MASK_BIT
10045#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
10046
10047#undef TARGET_C_MODE_FOR_SUFFIX
10048#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
10049
10050#ifdef TARGET_BIG_ENDIAN_DEFAULT
10051#undef TARGET_DEFAULT_TARGET_FLAGS
10052#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
10053#endif
10054
10055#undef TARGET_CLASS_MAX_NREGS
10056#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
10057
119103ca
JG
10058#undef TARGET_BUILTIN_DECL
10059#define TARGET_BUILTIN_DECL aarch64_builtin_decl
10060
43e9d192
IB
10061#undef TARGET_EXPAND_BUILTIN
10062#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
10063
10064#undef TARGET_EXPAND_BUILTIN_VA_START
10065#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
10066
9697e620
JG
10067#undef TARGET_FOLD_BUILTIN
10068#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
10069
43e9d192
IB
10070#undef TARGET_FUNCTION_ARG
10071#define TARGET_FUNCTION_ARG aarch64_function_arg
10072
10073#undef TARGET_FUNCTION_ARG_ADVANCE
10074#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
10075
10076#undef TARGET_FUNCTION_ARG_BOUNDARY
10077#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
10078
10079#undef TARGET_FUNCTION_OK_FOR_SIBCALL
10080#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
10081
10082#undef TARGET_FUNCTION_VALUE
10083#define TARGET_FUNCTION_VALUE aarch64_function_value
10084
10085#undef TARGET_FUNCTION_VALUE_REGNO_P
10086#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
10087
10088#undef TARGET_FRAME_POINTER_REQUIRED
10089#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
10090
20924b4c
AL
10091//#undef TARGET_GIMPLE_FOLD_BUILTIN
10092//#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
0ac198d3 10093
43e9d192
IB
10094#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10095#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
10096
10097#undef TARGET_INIT_BUILTINS
10098#define TARGET_INIT_BUILTINS aarch64_init_builtins
10099
10100#undef TARGET_LEGITIMATE_ADDRESS_P
10101#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
10102
10103#undef TARGET_LEGITIMATE_CONSTANT_P
10104#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
10105
10106#undef TARGET_LIBGCC_CMP_RETURN_MODE
10107#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
10108
38e8f663
YR
10109#undef TARGET_LRA_P
10110#define TARGET_LRA_P aarch64_lra_p
10111
ac2b960f
YZ
10112#undef TARGET_MANGLE_TYPE
10113#define TARGET_MANGLE_TYPE aarch64_mangle_type
10114
43e9d192
IB
10115#undef TARGET_MEMORY_MOVE_COST
10116#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
10117
10118#undef TARGET_MUST_PASS_IN_STACK
10119#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10120
10121/* This target hook should return true if accesses to volatile bitfields
10122 should use the narrowest mode possible. It should return false if these
10123 accesses should use the bitfield container type. */
10124#undef TARGET_NARROW_VOLATILE_BITFIELD
10125#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
10126
10127#undef TARGET_OPTION_OVERRIDE
10128#define TARGET_OPTION_OVERRIDE aarch64_override_options
10129
10130#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10131#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10132 aarch64_override_options_after_change
10133
10134#undef TARGET_PASS_BY_REFERENCE
10135#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
10136
10137#undef TARGET_PREFERRED_RELOAD_CLASS
10138#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
10139
10140#undef TARGET_SECONDARY_RELOAD
10141#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
10142
10143#undef TARGET_SHIFT_TRUNCATION_MASK
10144#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
10145
10146#undef TARGET_SETUP_INCOMING_VARARGS
10147#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
10148
10149#undef TARGET_STRUCT_VALUE_RTX
10150#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
10151
10152#undef TARGET_REGISTER_MOVE_COST
10153#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
10154
10155#undef TARGET_RETURN_IN_MEMORY
10156#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
10157
10158#undef TARGET_RETURN_IN_MSB
10159#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
10160
10161#undef TARGET_RTX_COSTS
7cc2145f 10162#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 10163
d126a4ae
AP
10164#undef TARGET_SCHED_ISSUE_RATE
10165#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
10166
43e9d192
IB
10167#undef TARGET_TRAMPOLINE_INIT
10168#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
10169
10170#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10171#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
10172
10173#undef TARGET_VECTOR_MODE_SUPPORTED_P
10174#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
10175
10176#undef TARGET_ARRAY_MODE_SUPPORTED_P
10177#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
10178
8990e73a
TB
10179#undef TARGET_VECTORIZE_ADD_STMT_COST
10180#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
10181
10182#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10183#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10184 aarch64_builtin_vectorization_cost
10185
43e9d192
IB
10186#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10187#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
10188
42fc9a7f
JG
10189#undef TARGET_VECTORIZE_BUILTINS
10190#define TARGET_VECTORIZE_BUILTINS
10191
10192#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
10193#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
10194 aarch64_builtin_vectorized_function
10195
3b357264
JG
10196#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
10197#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
10198 aarch64_autovectorize_vector_sizes
10199
aa87aced
KV
10200#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10201#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
10202 aarch64_atomic_assign_expand_fenv
10203
43e9d192
IB
10204/* Section anchor support. */
10205
10206#undef TARGET_MIN_ANCHOR_OFFSET
10207#define TARGET_MIN_ANCHOR_OFFSET -256
10208
10209/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10210 byte offset; we can do much more for larger data types, but have no way
10211 to determine the size of the access. We assume accesses are aligned. */
10212#undef TARGET_MAX_ANCHOR_OFFSET
10213#define TARGET_MAX_ANCHOR_OFFSET 4095
10214
db0253a4
TB
10215#undef TARGET_VECTOR_ALIGNMENT
10216#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10217
10218#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10219#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10220 aarch64_simd_vector_alignment_reachable
10221
88b08073
JG
10222/* vec_perm support. */
10223
10224#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10225#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10226 aarch64_vectorize_vec_perm_const_ok
10227
70f09188 10228
706b2314 10229#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
10230#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10231
5cb74e90
RR
10232#undef TARGET_FLAGS_REGNUM
10233#define TARGET_FLAGS_REGNUM CC_REGNUM
10234
78607708
TV
10235#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10236#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10237
a3125fc2
CL
10238#undef TARGET_ASAN_SHADOW_OFFSET
10239#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
10240
0c4ec427
RE
10241#undef TARGET_LEGITIMIZE_ADDRESS
10242#define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
10243
43e9d192
IB
10244struct gcc_target targetm = TARGET_INITIALIZER;
10245
10246#include "gt-aarch64.h"