]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64.c
[AArch64] Implement workaround for ARM Cortex-A53 erratum 835769
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
CommitLineData
bdb7bf8a 1/* Machine description for AArch64 architecture.
23a5b65a 2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
43e9d192
IB
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "insn-codes.h"
26#include "rtl.h"
27#include "insn-attr.h"
28#include "tree.h"
d8a2d370
DN
29#include "stringpool.h"
30#include "stor-layout.h"
31#include "calls.h"
32#include "varasm.h"
43e9d192
IB
33#include "regs.h"
34#include "df.h"
35#include "hard-reg-set.h"
36#include "output.h"
37#include "expr.h"
38#include "reload.h"
39#include "toplev.h"
40#include "target.h"
41#include "target-def.h"
42#include "targhooks.h"
43#include "ggc.h"
44#include "function.h"
45#include "tm_p.h"
46#include "recog.h"
47#include "langhooks.h"
48#include "diagnostic-core.h"
2fb9a547
AM
49#include "hash-table.h"
50#include "vec.h"
51#include "basic-block.h"
52#include "tree-ssa-alias.h"
53#include "internal-fn.h"
54#include "gimple-fold.h"
55#include "tree-eh.h"
56#include "gimple-expr.h"
57#include "is-a.h"
18f429e2 58#include "gimple.h"
45b0be94 59#include "gimplify.h"
43e9d192
IB
60#include "optabs.h"
61#include "dwarf2.h"
8990e73a
TB
62#include "cfgloop.h"
63#include "tree-vectorizer.h"
73250c4c 64#include "config/arm/aarch-cost-tables.h"
0ee859b5 65#include "dumpfile.h"
9b2b7279 66#include "builtins.h"
43e9d192 67
28514dda
YZ
68/* Defined for convenience. */
69#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70
43e9d192
IB
71/* Classifies an address.
72
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
75
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
78
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
81
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
84
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
87
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
90
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
93
94enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
102};
103
104struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
110};
111
48063b9d
IB
112struct simd_immediate_info
113{
114 rtx value;
115 int shift;
116 int element_width;
48063b9d 117 bool mvn;
e4f0f84d 118 bool msl;
48063b9d
IB
119};
120
43e9d192
IB
121/* The current code model. */
122enum aarch64_code_model aarch64_cmodel;
123
124#ifdef HAVE_AS_TLS
125#undef TARGET_HAVE_TLS
126#define TARGET_HAVE_TLS 1
127#endif
128
38e8f663 129static bool aarch64_lra_p (void);
43e9d192
IB
130static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
43e9d192 137static void aarch64_override_options_after_change (void);
43e9d192
IB
138static bool aarch64_vector_mode_supported_p (enum machine_mode);
139static unsigned bit_count (unsigned HOST_WIDE_INT);
88b08073
JG
140static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
141 const unsigned char *sel);
2961177e 142static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
88b08073 143
43e9d192 144/* The processor for which instructions should be scheduled. */
02fdbd5b 145enum aarch64_processor aarch64_tune = cortexa53;
43e9d192
IB
146
147/* The current tuning set. */
148const struct tune_params *aarch64_tune_params;
149
150/* Mask to specify which instructions we are allowed to generate. */
151unsigned long aarch64_isa_flags = 0;
152
153/* Mask to specify which instruction scheduling options should be used. */
154unsigned long aarch64_tune_flags = 0;
155
156/* Tuning parameters. */
157
158#if HAVE_DESIGNATED_INITIALIZERS
159#define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
160#else
161#define NAMED_PARAM(NAME, VAL) (VAL)
162#endif
163
164#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
165__extension__
166#endif
43e9d192
IB
167
168#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169__extension__
170#endif
171static const struct cpu_addrcost_table generic_addrcost_table =
172{
67747367
JG
173#if HAVE_DESIGNATED_INITIALIZERS
174 .addr_scale_costs =
175#endif
176 {
67747367
JG
177 NAMED_PARAM (hi, 0),
178 NAMED_PARAM (si, 0),
8d805e02 179 NAMED_PARAM (di, 0),
67747367
JG
180 NAMED_PARAM (ti, 0),
181 },
43e9d192
IB
182 NAMED_PARAM (pre_modify, 0),
183 NAMED_PARAM (post_modify, 0),
184 NAMED_PARAM (register_offset, 0),
185 NAMED_PARAM (register_extend, 0),
186 NAMED_PARAM (imm_offset, 0)
187};
188
60bff090
JG
189#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
190__extension__
191#endif
192static const struct cpu_addrcost_table cortexa57_addrcost_table =
193{
194#if HAVE_DESIGNATED_INITIALIZERS
195 .addr_scale_costs =
196#endif
197 {
60bff090
JG
198 NAMED_PARAM (hi, 1),
199 NAMED_PARAM (si, 0),
8d805e02 200 NAMED_PARAM (di, 0),
60bff090
JG
201 NAMED_PARAM (ti, 1),
202 },
203 NAMED_PARAM (pre_modify, 0),
204 NAMED_PARAM (post_modify, 0),
205 NAMED_PARAM (register_offset, 0),
206 NAMED_PARAM (register_extend, 0),
207 NAMED_PARAM (imm_offset, 0),
208};
209
43e9d192
IB
210#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
211__extension__
212#endif
213static const struct cpu_regmove_cost generic_regmove_cost =
214{
215 NAMED_PARAM (GP2GP, 1),
216 NAMED_PARAM (GP2FP, 2),
217 NAMED_PARAM (FP2GP, 2),
20b32e50 218 NAMED_PARAM (FP2FP, 2)
43e9d192
IB
219};
220
e4a9c55a
WD
221static const struct cpu_regmove_cost cortexa57_regmove_cost =
222{
223 NAMED_PARAM (GP2GP, 1),
224 /* Avoid the use of slow int<->fp moves for spilling by setting
225 their cost higher than memmov_cost. */
226 NAMED_PARAM (GP2FP, 5),
227 NAMED_PARAM (FP2GP, 5),
228 NAMED_PARAM (FP2FP, 2)
229};
230
231static const struct cpu_regmove_cost cortexa53_regmove_cost =
232{
233 NAMED_PARAM (GP2GP, 1),
234 /* Avoid the use of slow int<->fp moves for spilling by setting
235 their cost higher than memmov_cost. */
236 NAMED_PARAM (GP2FP, 5),
237 NAMED_PARAM (FP2GP, 5),
238 NAMED_PARAM (FP2FP, 2)
239};
240
8990e73a
TB
241/* Generic costs for vector insn classes. */
242#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
243__extension__
244#endif
245static const struct cpu_vector_cost generic_vector_cost =
246{
247 NAMED_PARAM (scalar_stmt_cost, 1),
248 NAMED_PARAM (scalar_load_cost, 1),
249 NAMED_PARAM (scalar_store_cost, 1),
250 NAMED_PARAM (vec_stmt_cost, 1),
251 NAMED_PARAM (vec_to_scalar_cost, 1),
252 NAMED_PARAM (scalar_to_vec_cost, 1),
253 NAMED_PARAM (vec_align_load_cost, 1),
254 NAMED_PARAM (vec_unalign_load_cost, 1),
255 NAMED_PARAM (vec_unalign_store_cost, 1),
256 NAMED_PARAM (vec_store_cost, 1),
257 NAMED_PARAM (cond_taken_branch_cost, 3),
258 NAMED_PARAM (cond_not_taken_branch_cost, 1)
259};
260
60bff090
JG
261/* Generic costs for vector insn classes. */
262#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
263__extension__
264#endif
265static const struct cpu_vector_cost cortexa57_vector_cost =
266{
267 NAMED_PARAM (scalar_stmt_cost, 1),
268 NAMED_PARAM (scalar_load_cost, 4),
269 NAMED_PARAM (scalar_store_cost, 1),
270 NAMED_PARAM (vec_stmt_cost, 3),
271 NAMED_PARAM (vec_to_scalar_cost, 8),
272 NAMED_PARAM (scalar_to_vec_cost, 8),
273 NAMED_PARAM (vec_align_load_cost, 5),
274 NAMED_PARAM (vec_unalign_load_cost, 5),
275 NAMED_PARAM (vec_unalign_store_cost, 1),
276 NAMED_PARAM (vec_store_cost, 1),
277 NAMED_PARAM (cond_taken_branch_cost, 1),
278 NAMED_PARAM (cond_not_taken_branch_cost, 1)
279};
280
43e9d192
IB
281#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
282__extension__
283#endif
284static const struct tune_params generic_tunings =
285{
4e2cd668 286 &cortexa57_extra_costs,
43e9d192
IB
287 &generic_addrcost_table,
288 &generic_regmove_cost,
8990e73a 289 &generic_vector_cost,
d126a4ae
AP
290 NAMED_PARAM (memmov_cost, 4),
291 NAMED_PARAM (issue_rate, 2)
43e9d192
IB
292};
293
984239ad
KT
294static const struct tune_params cortexa53_tunings =
295{
296 &cortexa53_extra_costs,
297 &generic_addrcost_table,
e4a9c55a 298 &cortexa53_regmove_cost,
984239ad 299 &generic_vector_cost,
d126a4ae
AP
300 NAMED_PARAM (memmov_cost, 4),
301 NAMED_PARAM (issue_rate, 2)
984239ad
KT
302};
303
4fd92af6
KT
304static const struct tune_params cortexa57_tunings =
305{
306 &cortexa57_extra_costs,
60bff090 307 &cortexa57_addrcost_table,
e4a9c55a 308 &cortexa57_regmove_cost,
60bff090 309 &cortexa57_vector_cost,
4fd92af6
KT
310 NAMED_PARAM (memmov_cost, 4),
311 NAMED_PARAM (issue_rate, 3)
312};
313
43e9d192
IB
314/* A processor implementing AArch64. */
315struct processor
316{
317 const char *const name;
318 enum aarch64_processor core;
319 const char *arch;
320 const unsigned long flags;
321 const struct tune_params *const tune;
322};
323
324/* Processor cores implementing AArch64. */
325static const struct processor all_cores[] =
326{
192ed1dd 327#define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
43e9d192
IB
328 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
329#include "aarch64-cores.def"
330#undef AARCH64_CORE
02fdbd5b 331 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
43e9d192
IB
332 {NULL, aarch64_none, NULL, 0, NULL}
333};
334
335/* Architectures implementing AArch64. */
336static const struct processor all_architectures[] =
337{
338#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
339 {NAME, CORE, #ARCH, FLAGS, NULL},
340#include "aarch64-arches.def"
341#undef AARCH64_ARCH
43e9d192
IB
342 {NULL, aarch64_none, NULL, 0, NULL}
343};
344
345/* Target specification. These are populated as commandline arguments
346 are processed, or NULL if not specified. */
347static const struct processor *selected_arch;
348static const struct processor *selected_cpu;
349static const struct processor *selected_tune;
350
351#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
352
353/* An ISA extension in the co-processor and main instruction set space. */
354struct aarch64_option_extension
355{
356 const char *const name;
357 const unsigned long flags_on;
358 const unsigned long flags_off;
359};
360
361/* ISA extensions in AArch64. */
362static const struct aarch64_option_extension all_extensions[] =
363{
364#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
365 {NAME, FLAGS_ON, FLAGS_OFF},
366#include "aarch64-option-extensions.def"
367#undef AARCH64_OPT_EXTENSION
368 {NULL, 0, 0}
369};
370
371/* Used to track the size of an address when generating a pre/post
372 increment address. */
373static enum machine_mode aarch64_memory_reference_mode;
374
375/* Used to force GTY into this file. */
376static GTY(()) int gty_dummy;
377
378/* A table of valid AArch64 "bitmask immediate" values for
379 logical instructions. */
380
381#define AARCH64_NUM_BITMASKS 5334
382static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
383
43e9d192
IB
384typedef enum aarch64_cond_code
385{
386 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
387 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
388 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
389}
390aarch64_cc;
391
392#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
393
394/* The condition codes of the processor, and the inverse function. */
395static const char * const aarch64_condition_codes[] =
396{
397 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
398 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
399};
400
401/* Provide a mapping from gcc register numbers to dwarf register numbers. */
402unsigned
403aarch64_dbx_register_number (unsigned regno)
404{
405 if (GP_REGNUM_P (regno))
406 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
407 else if (regno == SP_REGNUM)
408 return AARCH64_DWARF_SP;
409 else if (FP_REGNUM_P (regno))
410 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
411
412 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
413 equivalent DWARF register. */
414 return DWARF_FRAME_REGISTERS;
415}
416
417/* Return TRUE if MODE is any of the large INT modes. */
418static bool
419aarch64_vect_struct_mode_p (enum machine_mode mode)
420{
421 return mode == OImode || mode == CImode || mode == XImode;
422}
423
424/* Return TRUE if MODE is any of the vector modes. */
425static bool
426aarch64_vector_mode_p (enum machine_mode mode)
427{
428 return aarch64_vector_mode_supported_p (mode)
429 || aarch64_vect_struct_mode_p (mode);
430}
431
432/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
433static bool
434aarch64_array_mode_supported_p (enum machine_mode mode,
435 unsigned HOST_WIDE_INT nelems)
436{
437 if (TARGET_SIMD
438 && AARCH64_VALID_SIMD_QREG_MODE (mode)
439 && (nelems >= 2 && nelems <= 4))
440 return true;
441
442 return false;
443}
444
445/* Implement HARD_REGNO_NREGS. */
446
447int
448aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
449{
450 switch (aarch64_regno_regclass (regno))
451 {
452 case FP_REGS:
453 case FP_LO_REGS:
454 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
455 default:
456 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
457 }
458 gcc_unreachable ();
459}
460
461/* Implement HARD_REGNO_MODE_OK. */
462
463int
464aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
465{
466 if (GET_MODE_CLASS (mode) == MODE_CC)
467 return regno == CC_REGNUM;
468
9259db42
YZ
469 if (regno == SP_REGNUM)
470 /* The purpose of comparing with ptr_mode is to support the
471 global register variable associated with the stack pointer
472 register via the syntax of asm ("wsp") in ILP32. */
473 return mode == Pmode || mode == ptr_mode;
474
475 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
43e9d192
IB
476 return mode == Pmode;
477
478 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
479 return 1;
480
481 if (FP_REGNUM_P (regno))
482 {
483 if (aarch64_vect_struct_mode_p (mode))
484 return
485 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
486 else
487 return 1;
488 }
489
490 return 0;
491}
492
73d9ac6a
IB
493/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
494enum machine_mode
495aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
496 enum machine_mode mode)
497{
498 /* Handle modes that fit within single registers. */
499 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
500 {
501 if (GET_MODE_SIZE (mode) >= 4)
502 return mode;
503 else
504 return SImode;
505 }
506 /* Fall back to generic for multi-reg and very large modes. */
507 else
508 return choose_hard_reg_mode (regno, nregs, false);
509}
510
43e9d192
IB
511/* Return true if calls to DECL should be treated as
512 long-calls (ie called via a register). */
513static bool
514aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
515{
516 return false;
517}
518
519/* Return true if calls to symbol-ref SYM should be treated as
520 long-calls (ie called via a register). */
521bool
522aarch64_is_long_call_p (rtx sym)
523{
524 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
525}
526
527/* Return true if the offsets to a zero/sign-extract operation
528 represent an expression that matches an extend operation. The
529 operands represent the paramters from
530
4745e701 531 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
43e9d192
IB
532bool
533aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
534 rtx extract_imm)
535{
536 HOST_WIDE_INT mult_val, extract_val;
537
538 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
539 return false;
540
541 mult_val = INTVAL (mult_imm);
542 extract_val = INTVAL (extract_imm);
543
544 if (extract_val > 8
545 && extract_val < GET_MODE_BITSIZE (mode)
546 && exact_log2 (extract_val & ~7) > 0
547 && (extract_val & 7) <= 4
548 && mult_val == (1 << (extract_val & 7)))
549 return true;
550
551 return false;
552}
553
554/* Emit an insn that's a simple single-set. Both the operands must be
555 known to be valid. */
556inline static rtx
557emit_set_insn (rtx x, rtx y)
558{
559 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
560}
561
562/* X and Y are two things to compare using CODE. Emit the compare insn and
563 return the rtx for register 0 in the proper mode. */
564rtx
565aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
566{
567 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
568 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
569
570 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
571 return cc_reg;
572}
573
574/* Build the SYMBOL_REF for __tls_get_addr. */
575
576static GTY(()) rtx tls_get_addr_libfunc;
577
578rtx
579aarch64_tls_get_addr (void)
580{
581 if (!tls_get_addr_libfunc)
582 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
583 return tls_get_addr_libfunc;
584}
585
586/* Return the TLS model to use for ADDR. */
587
588static enum tls_model
589tls_symbolic_operand_type (rtx addr)
590{
591 enum tls_model tls_kind = TLS_MODEL_NONE;
592 rtx sym, addend;
593
594 if (GET_CODE (addr) == CONST)
595 {
596 split_const (addr, &sym, &addend);
597 if (GET_CODE (sym) == SYMBOL_REF)
598 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
599 }
600 else if (GET_CODE (addr) == SYMBOL_REF)
601 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
602
603 return tls_kind;
604}
605
606/* We'll allow lo_sum's in addresses in our legitimate addresses
607 so that combine would take care of combining addresses where
608 necessary, but for generation purposes, we'll generate the address
609 as :
610 RTL Absolute
611 tmp = hi (symbol_ref); adrp x1, foo
612 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
613 nop
614
615 PIC TLS
616 adrp x1, :got:foo adrp tmp, :tlsgd:foo
617 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
618 bl __tls_get_addr
619 nop
620
621 Load TLS symbol, depending on TLS mechanism and TLS access model.
622
623 Global Dynamic - Traditional TLS:
624 adrp tmp, :tlsgd:imm
625 add dest, tmp, #:tlsgd_lo12:imm
626 bl __tls_get_addr
627
628 Global Dynamic - TLS Descriptors:
629 adrp dest, :tlsdesc:imm
630 ldr tmp, [dest, #:tlsdesc_lo12:imm]
631 add dest, dest, #:tlsdesc_lo12:imm
632 blr tmp
633 mrs tp, tpidr_el0
634 add dest, dest, tp
635
636 Initial Exec:
637 mrs tp, tpidr_el0
638 adrp tmp, :gottprel:imm
639 ldr dest, [tmp, #:gottprel_lo12:imm]
640 add dest, dest, tp
641
642 Local Exec:
643 mrs tp, tpidr_el0
644 add t0, tp, #:tprel_hi12:imm
645 add t0, #:tprel_lo12_nc:imm
646*/
647
648static void
649aarch64_load_symref_appropriately (rtx dest, rtx imm,
650 enum aarch64_symbol_type type)
651{
652 switch (type)
653 {
654 case SYMBOL_SMALL_ABSOLUTE:
655 {
28514dda 656 /* In ILP32, the mode of dest can be either SImode or DImode. */
43e9d192 657 rtx tmp_reg = dest;
28514dda
YZ
658 enum machine_mode mode = GET_MODE (dest);
659
660 gcc_assert (mode == Pmode || mode == ptr_mode);
661
43e9d192 662 if (can_create_pseudo_p ())
28514dda 663 tmp_reg = gen_reg_rtx (mode);
43e9d192 664
28514dda 665 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
43e9d192
IB
666 emit_insn (gen_add_losym (dest, tmp_reg, imm));
667 return;
668 }
669
a5350ddc
CSS
670 case SYMBOL_TINY_ABSOLUTE:
671 emit_insn (gen_rtx_SET (Pmode, dest, imm));
672 return;
673
43e9d192
IB
674 case SYMBOL_SMALL_GOT:
675 {
28514dda
YZ
676 /* In ILP32, the mode of dest can be either SImode or DImode,
677 while the got entry is always of SImode size. The mode of
678 dest depends on how dest is used: if dest is assigned to a
679 pointer (e.g. in the memory), it has SImode; it may have
680 DImode if dest is dereferenced to access the memeory.
681 This is why we have to handle three different ldr_got_small
682 patterns here (two patterns for ILP32). */
43e9d192 683 rtx tmp_reg = dest;
28514dda
YZ
684 enum machine_mode mode = GET_MODE (dest);
685
43e9d192 686 if (can_create_pseudo_p ())
28514dda
YZ
687 tmp_reg = gen_reg_rtx (mode);
688
689 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
690 if (mode == ptr_mode)
691 {
692 if (mode == DImode)
693 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
694 else
695 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
696 }
697 else
698 {
699 gcc_assert (mode == Pmode);
700 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
701 }
702
43e9d192
IB
703 return;
704 }
705
706 case SYMBOL_SMALL_TLSGD:
707 {
5d8a22a5 708 rtx_insn *insns;
43e9d192
IB
709 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
710
711 start_sequence ();
78607708 712 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
43e9d192
IB
713 insns = get_insns ();
714 end_sequence ();
715
716 RTL_CONST_CALL_P (insns) = 1;
717 emit_libcall_block (insns, dest, result, imm);
718 return;
719 }
720
721 case SYMBOL_SMALL_TLSDESC:
722 {
621ad2de
AP
723 enum machine_mode mode = GET_MODE (dest);
724 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
43e9d192
IB
725 rtx tp;
726
621ad2de
AP
727 gcc_assert (mode == Pmode || mode == ptr_mode);
728
729 /* In ILP32, the got entry is always of SImode size. Unlike
730 small GOT, the dest is fixed at reg 0. */
731 if (TARGET_ILP32)
732 emit_insn (gen_tlsdesc_small_si (imm));
733 else
734 emit_insn (gen_tlsdesc_small_di (imm));
43e9d192 735 tp = aarch64_load_tp (NULL);
621ad2de
AP
736
737 if (mode != Pmode)
738 tp = gen_lowpart (mode, tp);
739
740 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
43e9d192
IB
741 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
742 return;
743 }
744
745 case SYMBOL_SMALL_GOTTPREL:
746 {
621ad2de
AP
747 /* In ILP32, the mode of dest can be either SImode or DImode,
748 while the got entry is always of SImode size. The mode of
749 dest depends on how dest is used: if dest is assigned to a
750 pointer (e.g. in the memory), it has SImode; it may have
751 DImode if dest is dereferenced to access the memeory.
752 This is why we have to handle three different tlsie_small
753 patterns here (two patterns for ILP32). */
754 enum machine_mode mode = GET_MODE (dest);
755 rtx tmp_reg = gen_reg_rtx (mode);
43e9d192 756 rtx tp = aarch64_load_tp (NULL);
621ad2de
AP
757
758 if (mode == ptr_mode)
759 {
760 if (mode == DImode)
761 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
762 else
763 {
764 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
765 tp = gen_lowpart (mode, tp);
766 }
767 }
768 else
769 {
770 gcc_assert (mode == Pmode);
771 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
772 }
773
774 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
43e9d192
IB
775 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
776 return;
777 }
778
779 case SYMBOL_SMALL_TPREL:
780 {
781 rtx tp = aarch64_load_tp (NULL);
782 emit_insn (gen_tlsle_small (dest, tp, imm));
783 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
784 return;
785 }
786
87dd8ab0
MS
787 case SYMBOL_TINY_GOT:
788 emit_insn (gen_ldr_got_tiny (dest, imm));
789 return;
790
43e9d192
IB
791 default:
792 gcc_unreachable ();
793 }
794}
795
796/* Emit a move from SRC to DEST. Assume that the move expanders can
797 handle all moves if !can_create_pseudo_p (). The distinction is
798 important because, unlike emit_move_insn, the move expanders know
799 how to force Pmode objects into the constant pool even when the
800 constant pool address is not itself legitimate. */
801static rtx
802aarch64_emit_move (rtx dest, rtx src)
803{
804 return (can_create_pseudo_p ()
805 ? emit_move_insn (dest, src)
806 : emit_move_insn_1 (dest, src));
807}
808
030d03b8
RE
809/* Split a 128-bit move operation into two 64-bit move operations,
810 taking care to handle partial overlap of register to register
811 copies. Special cases are needed when moving between GP regs and
812 FP regs. SRC can be a register, constant or memory; DST a register
813 or memory. If either operand is memory it must not have any side
814 effects. */
43e9d192
IB
815void
816aarch64_split_128bit_move (rtx dst, rtx src)
817{
030d03b8
RE
818 rtx dst_lo, dst_hi;
819 rtx src_lo, src_hi;
43e9d192 820
030d03b8 821 enum machine_mode mode = GET_MODE (dst);
12dc6974 822
030d03b8
RE
823 gcc_assert (mode == TImode || mode == TFmode);
824 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
825 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
43e9d192
IB
826
827 if (REG_P (dst) && REG_P (src))
828 {
030d03b8
RE
829 int src_regno = REGNO (src);
830 int dst_regno = REGNO (dst);
43e9d192 831
030d03b8 832 /* Handle FP <-> GP regs. */
43e9d192
IB
833 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
834 {
030d03b8
RE
835 src_lo = gen_lowpart (word_mode, src);
836 src_hi = gen_highpart (word_mode, src);
837
838 if (mode == TImode)
839 {
840 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
841 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
842 }
843 else
844 {
845 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
846 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
847 }
848 return;
43e9d192
IB
849 }
850 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
851 {
030d03b8
RE
852 dst_lo = gen_lowpart (word_mode, dst);
853 dst_hi = gen_highpart (word_mode, dst);
854
855 if (mode == TImode)
856 {
857 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
858 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
859 }
860 else
861 {
862 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
863 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
864 }
865 return;
43e9d192 866 }
43e9d192
IB
867 }
868
030d03b8
RE
869 dst_lo = gen_lowpart (word_mode, dst);
870 dst_hi = gen_highpart (word_mode, dst);
871 src_lo = gen_lowpart (word_mode, src);
872 src_hi = gen_highpart_mode (word_mode, mode, src);
873
874 /* At most one pairing may overlap. */
875 if (reg_overlap_mentioned_p (dst_lo, src_hi))
876 {
877 aarch64_emit_move (dst_hi, src_hi);
878 aarch64_emit_move (dst_lo, src_lo);
879 }
880 else
881 {
882 aarch64_emit_move (dst_lo, src_lo);
883 aarch64_emit_move (dst_hi, src_hi);
884 }
43e9d192
IB
885}
886
887bool
888aarch64_split_128bit_move_p (rtx dst, rtx src)
889{
890 return (! REG_P (src)
891 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
892}
893
8b033a8a
SN
894/* Split a complex SIMD combine. */
895
896void
897aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
898{
899 enum machine_mode src_mode = GET_MODE (src1);
900 enum machine_mode dst_mode = GET_MODE (dst);
901
902 gcc_assert (VECTOR_MODE_P (dst_mode));
903
904 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
905 {
906 rtx (*gen) (rtx, rtx, rtx);
907
908 switch (src_mode)
909 {
910 case V8QImode:
911 gen = gen_aarch64_simd_combinev8qi;
912 break;
913 case V4HImode:
914 gen = gen_aarch64_simd_combinev4hi;
915 break;
916 case V2SImode:
917 gen = gen_aarch64_simd_combinev2si;
918 break;
919 case V2SFmode:
920 gen = gen_aarch64_simd_combinev2sf;
921 break;
922 case DImode:
923 gen = gen_aarch64_simd_combinedi;
924 break;
925 case DFmode:
926 gen = gen_aarch64_simd_combinedf;
927 break;
928 default:
929 gcc_unreachable ();
930 }
931
932 emit_insn (gen (dst, src1, src2));
933 return;
934 }
935}
936
fd4842cd
SN
937/* Split a complex SIMD move. */
938
939void
940aarch64_split_simd_move (rtx dst, rtx src)
941{
942 enum machine_mode src_mode = GET_MODE (src);
943 enum machine_mode dst_mode = GET_MODE (dst);
944
945 gcc_assert (VECTOR_MODE_P (dst_mode));
946
947 if (REG_P (dst) && REG_P (src))
948 {
c59b7e28
SN
949 rtx (*gen) (rtx, rtx);
950
fd4842cd
SN
951 gcc_assert (VECTOR_MODE_P (src_mode));
952
953 switch (src_mode)
954 {
955 case V16QImode:
c59b7e28 956 gen = gen_aarch64_split_simd_movv16qi;
fd4842cd
SN
957 break;
958 case V8HImode:
c59b7e28 959 gen = gen_aarch64_split_simd_movv8hi;
fd4842cd
SN
960 break;
961 case V4SImode:
c59b7e28 962 gen = gen_aarch64_split_simd_movv4si;
fd4842cd
SN
963 break;
964 case V2DImode:
c59b7e28 965 gen = gen_aarch64_split_simd_movv2di;
fd4842cd
SN
966 break;
967 case V4SFmode:
c59b7e28 968 gen = gen_aarch64_split_simd_movv4sf;
fd4842cd
SN
969 break;
970 case V2DFmode:
c59b7e28 971 gen = gen_aarch64_split_simd_movv2df;
fd4842cd
SN
972 break;
973 default:
974 gcc_unreachable ();
975 }
c59b7e28
SN
976
977 emit_insn (gen (dst, src));
fd4842cd
SN
978 return;
979 }
980}
981
43e9d192 982static rtx
e18b4a81 983aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
43e9d192
IB
984{
985 if (can_create_pseudo_p ())
e18b4a81 986 return force_reg (mode, value);
43e9d192
IB
987 else
988 {
989 x = aarch64_emit_move (x, value);
990 return x;
991 }
992}
993
994
995static rtx
996aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
997{
9c023bf0 998 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
43e9d192
IB
999 {
1000 rtx high;
1001 /* Load the full offset into a register. This
1002 might be improvable in the future. */
1003 high = GEN_INT (offset);
1004 offset = 0;
e18b4a81
YZ
1005 high = aarch64_force_temporary (mode, temp, high);
1006 reg = aarch64_force_temporary (mode, temp,
1007 gen_rtx_PLUS (mode, high, reg));
43e9d192
IB
1008 }
1009 return plus_constant (mode, reg, offset);
1010}
1011
1012void
1013aarch64_expand_mov_immediate (rtx dest, rtx imm)
1014{
1015 enum machine_mode mode = GET_MODE (dest);
1016 unsigned HOST_WIDE_INT mask;
1017 int i;
1018 bool first;
1019 unsigned HOST_WIDE_INT val;
1020 bool subtargets;
1021 rtx subtarget;
c747993a 1022 int one_match, zero_match, first_not_ffff_match;
43e9d192
IB
1023
1024 gcc_assert (mode == SImode || mode == DImode);
1025
1026 /* Check on what type of symbol it is. */
1027 if (GET_CODE (imm) == SYMBOL_REF
1028 || GET_CODE (imm) == LABEL_REF
1029 || GET_CODE (imm) == CONST)
1030 {
1031 rtx mem, base, offset;
1032 enum aarch64_symbol_type sty;
1033
1034 /* If we have (const (plus symbol offset)), separate out the offset
1035 before we start classifying the symbol. */
1036 split_const (imm, &base, &offset);
1037
1038 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1039 switch (sty)
1040 {
1041 case SYMBOL_FORCE_TO_MEM:
1042 if (offset != const0_rtx
1043 && targetm.cannot_force_const_mem (mode, imm))
1044 {
aef66c94 1045 gcc_assert (can_create_pseudo_p ());
e18b4a81 1046 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1047 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1048 aarch64_emit_move (dest, base);
1049 return;
1050 }
28514dda 1051 mem = force_const_mem (ptr_mode, imm);
43e9d192 1052 gcc_assert (mem);
28514dda
YZ
1053 if (mode != ptr_mode)
1054 mem = gen_rtx_ZERO_EXTEND (mode, mem);
43e9d192
IB
1055 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1056 return;
1057
1058 case SYMBOL_SMALL_TLSGD:
1059 case SYMBOL_SMALL_TLSDESC:
1060 case SYMBOL_SMALL_GOTTPREL:
1061 case SYMBOL_SMALL_GOT:
87dd8ab0 1062 case SYMBOL_TINY_GOT:
43e9d192
IB
1063 if (offset != const0_rtx)
1064 {
1065 gcc_assert(can_create_pseudo_p ());
e18b4a81 1066 base = aarch64_force_temporary (mode, dest, base);
43e9d192
IB
1067 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1068 aarch64_emit_move (dest, base);
1069 return;
1070 }
1071 /* FALLTHRU */
1072
1073 case SYMBOL_SMALL_TPREL:
1074 case SYMBOL_SMALL_ABSOLUTE:
a5350ddc 1075 case SYMBOL_TINY_ABSOLUTE:
43e9d192
IB
1076 aarch64_load_symref_appropriately (dest, imm, sty);
1077 return;
1078
1079 default:
1080 gcc_unreachable ();
1081 }
1082 }
1083
1084 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1085 {
1086 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1087 return;
1088 }
1089
1090 if (!CONST_INT_P (imm))
1091 {
1092 if (GET_CODE (imm) == HIGH)
1093 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1094 else
1095 {
1096 rtx mem = force_const_mem (mode, imm);
1097 gcc_assert (mem);
1098 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1099 }
1100
1101 return;
1102 }
1103
1104 if (mode == SImode)
1105 {
1106 /* We know we can't do this in 1 insn, and we must be able to do it
1107 in two; so don't mess around looking for sequences that don't buy
1108 us anything. */
1109 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1110 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1111 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1112 return;
1113 }
1114
1115 /* Remaining cases are all for DImode. */
1116
1117 val = INTVAL (imm);
1118 subtargets = optimize && can_create_pseudo_p ();
1119
1120 one_match = 0;
1121 zero_match = 0;
1122 mask = 0xffff;
c747993a 1123 first_not_ffff_match = -1;
43e9d192
IB
1124
1125 for (i = 0; i < 64; i += 16, mask <<= 16)
1126 {
c747993a 1127 if ((val & mask) == mask)
43e9d192 1128 one_match++;
c747993a
IB
1129 else
1130 {
1131 if (first_not_ffff_match < 0)
1132 first_not_ffff_match = i;
1133 if ((val & mask) == 0)
1134 zero_match++;
1135 }
43e9d192
IB
1136 }
1137
1138 if (one_match == 2)
1139 {
c747993a
IB
1140 /* Set one of the quarters and then insert back into result. */
1141 mask = 0xffffll << first_not_ffff_match;
1142 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1143 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1144 GEN_INT ((val >> first_not_ffff_match)
1145 & 0xffff)));
1146 return;
1147 }
1148
43e9d192
IB
1149 if (zero_match == 2)
1150 goto simple_sequence;
1151
1152 mask = 0x0ffff0000UL;
1153 for (i = 16; i < 64; i += 16, mask <<= 16)
1154 {
1155 HOST_WIDE_INT comp = mask & ~(mask - 1);
1156
1157 if (aarch64_uimm12_shift (val - (val & mask)))
1158 {
1159 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1160
1161 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1162 emit_insn (gen_adddi3 (dest, subtarget,
1163 GEN_INT (val - (val & mask))));
1164 return;
1165 }
1166 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1167 {
1168 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1169
1170 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1171 GEN_INT ((val + comp) & mask)));
1172 emit_insn (gen_adddi3 (dest, subtarget,
1173 GEN_INT (val - ((val + comp) & mask))));
1174 return;
1175 }
1176 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1177 {
1178 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1179
1180 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1181 GEN_INT ((val - comp) | ~mask)));
1182 emit_insn (gen_adddi3 (dest, subtarget,
1183 GEN_INT (val - ((val - comp) | ~mask))));
1184 return;
1185 }
1186 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1187 {
1188 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1189
1190 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1191 GEN_INT (val | ~mask)));
1192 emit_insn (gen_adddi3 (dest, subtarget,
1193 GEN_INT (val - (val | ~mask))));
1194 return;
1195 }
1196 }
1197
1198 /* See if we can do it by arithmetically combining two
1199 immediates. */
1200 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1201 {
1202 int j;
1203 mask = 0xffff;
1204
1205 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1206 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1207 {
1208 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1209 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1210 GEN_INT (aarch64_bitmasks[i])));
1211 emit_insn (gen_adddi3 (dest, subtarget,
1212 GEN_INT (val - aarch64_bitmasks[i])));
1213 return;
1214 }
1215
1216 for (j = 0; j < 64; j += 16, mask <<= 16)
1217 {
1218 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1219 {
1220 emit_insn (gen_rtx_SET (VOIDmode, dest,
1221 GEN_INT (aarch64_bitmasks[i])));
1222 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1223 GEN_INT ((val >> j) & 0xffff)));
1224 return;
1225 }
1226 }
1227 }
1228
1229 /* See if we can do it by logically combining two immediates. */
1230 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1231 {
1232 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1233 {
1234 int j;
1235
1236 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1238 {
1239 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1240 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1241 GEN_INT (aarch64_bitmasks[i])));
1242 emit_insn (gen_iordi3 (dest, subtarget,
1243 GEN_INT (aarch64_bitmasks[j])));
1244 return;
1245 }
1246 }
1247 else if ((val & aarch64_bitmasks[i]) == val)
1248 {
1249 int j;
1250
1251 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1252 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1253 {
1254
1255 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1256 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1257 GEN_INT (aarch64_bitmasks[j])));
1258 emit_insn (gen_anddi3 (dest, subtarget,
1259 GEN_INT (aarch64_bitmasks[i])));
1260 return;
1261 }
1262 }
1263 }
1264
2c274197
KT
1265 if (one_match > zero_match)
1266 {
1267 /* Set either first three quarters or all but the third. */
1268 mask = 0xffffll << (16 - first_not_ffff_match);
1269 emit_insn (gen_rtx_SET (VOIDmode, dest,
1270 GEN_INT (val | mask | 0xffffffff00000000ull)));
1271
1272 /* Now insert other two quarters. */
1273 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1274 i < 64; i += 16, mask <<= 16)
1275 {
1276 if ((val & mask) != mask)
1277 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1278 GEN_INT ((val >> i) & 0xffff)));
1279 }
1280 return;
1281 }
1282
43e9d192
IB
1283 simple_sequence:
1284 first = true;
1285 mask = 0xffff;
1286 for (i = 0; i < 64; i += 16, mask <<= 16)
1287 {
1288 if ((val & mask) != 0)
1289 {
1290 if (first)
1291 {
1292 emit_insn (gen_rtx_SET (VOIDmode, dest,
1293 GEN_INT (val & mask)));
1294 first = false;
1295 }
1296 else
1297 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1298 GEN_INT ((val >> i) & 0xffff)));
1299 }
1300 }
1301}
1302
1303static bool
fee9ba42
JW
1304aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1305 tree exp ATTRIBUTE_UNUSED)
43e9d192 1306{
fee9ba42 1307 /* Currently, always true. */
43e9d192
IB
1308 return true;
1309}
1310
1311/* Implement TARGET_PASS_BY_REFERENCE. */
1312
1313static bool
1314aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1315 enum machine_mode mode,
1316 const_tree type,
1317 bool named ATTRIBUTE_UNUSED)
1318{
1319 HOST_WIDE_INT size;
1320 enum machine_mode dummymode;
1321 int nregs;
1322
1323 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1324 size = (mode == BLKmode && type)
1325 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1326
aadc1c43
MHD
1327 /* Aggregates are passed by reference based on their size. */
1328 if (type && AGGREGATE_TYPE_P (type))
43e9d192 1329 {
aadc1c43 1330 size = int_size_in_bytes (type);
43e9d192
IB
1331 }
1332
1333 /* Variable sized arguments are always returned by reference. */
1334 if (size < 0)
1335 return true;
1336
1337 /* Can this be a candidate to be passed in fp/simd register(s)? */
1338 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1339 &dummymode, &nregs,
1340 NULL))
1341 return false;
1342
1343 /* Arguments which are variable sized or larger than 2 registers are
1344 passed by reference unless they are a homogenous floating point
1345 aggregate. */
1346 return size > 2 * UNITS_PER_WORD;
1347}
1348
1349/* Return TRUE if VALTYPE is padded to its least significant bits. */
1350static bool
1351aarch64_return_in_msb (const_tree valtype)
1352{
1353 enum machine_mode dummy_mode;
1354 int dummy_int;
1355
1356 /* Never happens in little-endian mode. */
1357 if (!BYTES_BIG_ENDIAN)
1358 return false;
1359
1360 /* Only composite types smaller than or equal to 16 bytes can
1361 be potentially returned in registers. */
1362 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1363 || int_size_in_bytes (valtype) <= 0
1364 || int_size_in_bytes (valtype) > 16)
1365 return false;
1366
1367 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1368 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1369 is always passed/returned in the least significant bits of fp/simd
1370 register(s). */
1371 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1372 &dummy_mode, &dummy_int, NULL))
1373 return false;
1374
1375 return true;
1376}
1377
1378/* Implement TARGET_FUNCTION_VALUE.
1379 Define how to find the value returned by a function. */
1380
1381static rtx
1382aarch64_function_value (const_tree type, const_tree func,
1383 bool outgoing ATTRIBUTE_UNUSED)
1384{
1385 enum machine_mode mode;
1386 int unsignedp;
1387 int count;
1388 enum machine_mode ag_mode;
1389
1390 mode = TYPE_MODE (type);
1391 if (INTEGRAL_TYPE_P (type))
1392 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1393
1394 if (aarch64_return_in_msb (type))
1395 {
1396 HOST_WIDE_INT size = int_size_in_bytes (type);
1397
1398 if (size % UNITS_PER_WORD != 0)
1399 {
1400 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1401 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1402 }
1403 }
1404
1405 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1406 &ag_mode, &count, NULL))
1407 {
1408 if (!aarch64_composite_type_p (type, mode))
1409 {
1410 gcc_assert (count == 1 && mode == ag_mode);
1411 return gen_rtx_REG (mode, V0_REGNUM);
1412 }
1413 else
1414 {
1415 int i;
1416 rtx par;
1417
1418 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1419 for (i = 0; i < count; i++)
1420 {
1421 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1422 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1423 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1424 XVECEXP (par, 0, i) = tmp;
1425 }
1426 return par;
1427 }
1428 }
1429 else
1430 return gen_rtx_REG (mode, R0_REGNUM);
1431}
1432
1433/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1434 Return true if REGNO is the number of a hard register in which the values
1435 of called function may come back. */
1436
1437static bool
1438aarch64_function_value_regno_p (const unsigned int regno)
1439{
1440 /* Maximum of 16 bytes can be returned in the general registers. Examples
1441 of 16-byte return values are: 128-bit integers and 16-byte small
1442 structures (excluding homogeneous floating-point aggregates). */
1443 if (regno == R0_REGNUM || regno == R1_REGNUM)
1444 return true;
1445
1446 /* Up to four fp/simd registers can return a function value, e.g. a
1447 homogeneous floating-point aggregate having four members. */
1448 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1449 return !TARGET_GENERAL_REGS_ONLY;
1450
1451 return false;
1452}
1453
1454/* Implement TARGET_RETURN_IN_MEMORY.
1455
1456 If the type T of the result of a function is such that
1457 void func (T arg)
1458 would require that arg be passed as a value in a register (or set of
1459 registers) according to the parameter passing rules, then the result
1460 is returned in the same registers as would be used for such an
1461 argument. */
1462
1463static bool
1464aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1465{
1466 HOST_WIDE_INT size;
1467 enum machine_mode ag_mode;
1468 int count;
1469
1470 if (!AGGREGATE_TYPE_P (type)
1471 && TREE_CODE (type) != COMPLEX_TYPE
1472 && TREE_CODE (type) != VECTOR_TYPE)
1473 /* Simple scalar types always returned in registers. */
1474 return false;
1475
1476 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1477 type,
1478 &ag_mode,
1479 &count,
1480 NULL))
1481 return false;
1482
1483 /* Types larger than 2 registers returned in memory. */
1484 size = int_size_in_bytes (type);
1485 return (size < 0 || size > 2 * UNITS_PER_WORD);
1486}
1487
1488static bool
1489aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1490 const_tree type, int *nregs)
1491{
1492 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1493 return aarch64_vfp_is_call_or_return_candidate (mode,
1494 type,
1495 &pcum->aapcs_vfp_rmode,
1496 nregs,
1497 NULL);
1498}
1499
1500/* Given MODE and TYPE of a function argument, return the alignment in
1501 bits. The idea is to suppress any stronger alignment requested by
1502 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1503 This is a helper function for local use only. */
1504
1505static unsigned int
1506aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1507{
1508 unsigned int alignment;
1509
1510 if (type)
1511 {
1512 if (!integer_zerop (TYPE_SIZE (type)))
1513 {
1514 if (TYPE_MODE (type) == mode)
1515 alignment = TYPE_ALIGN (type);
1516 else
1517 alignment = GET_MODE_ALIGNMENT (mode);
1518 }
1519 else
1520 alignment = 0;
1521 }
1522 else
1523 alignment = GET_MODE_ALIGNMENT (mode);
1524
1525 return alignment;
1526}
1527
1528/* Layout a function argument according to the AAPCS64 rules. The rule
1529 numbers refer to the rule numbers in the AAPCS64. */
1530
1531static void
1532aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1533 const_tree type,
1534 bool named ATTRIBUTE_UNUSED)
1535{
1536 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1537 int ncrn, nvrn, nregs;
1538 bool allocate_ncrn, allocate_nvrn;
3abf17cf 1539 HOST_WIDE_INT size;
43e9d192
IB
1540
1541 /* We need to do this once per argument. */
1542 if (pcum->aapcs_arg_processed)
1543 return;
1544
1545 pcum->aapcs_arg_processed = true;
1546
3abf17cf
YZ
1547 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1548 size
1549 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1550 UNITS_PER_WORD);
1551
43e9d192
IB
1552 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1553 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1554 mode,
1555 type,
1556 &nregs);
1557
1558 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1559 The following code thus handles passing by SIMD/FP registers first. */
1560
1561 nvrn = pcum->aapcs_nvrn;
1562
1563 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1564 and homogenous short-vector aggregates (HVA). */
1565 if (allocate_nvrn)
1566 {
1567 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1568 {
1569 pcum->aapcs_nextnvrn = nvrn + nregs;
1570 if (!aarch64_composite_type_p (type, mode))
1571 {
1572 gcc_assert (nregs == 1);
1573 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1574 }
1575 else
1576 {
1577 rtx par;
1578 int i;
1579 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1580 for (i = 0; i < nregs; i++)
1581 {
1582 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1583 V0_REGNUM + nvrn + i);
1584 tmp = gen_rtx_EXPR_LIST
1585 (VOIDmode, tmp,
1586 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1587 XVECEXP (par, 0, i) = tmp;
1588 }
1589 pcum->aapcs_reg = par;
1590 }
1591 return;
1592 }
1593 else
1594 {
1595 /* C.3 NSRN is set to 8. */
1596 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1597 goto on_stack;
1598 }
1599 }
1600
1601 ncrn = pcum->aapcs_ncrn;
3abf17cf 1602 nregs = size / UNITS_PER_WORD;
43e9d192
IB
1603
1604 /* C6 - C9. though the sign and zero extension semantics are
1605 handled elsewhere. This is the case where the argument fits
1606 entirely general registers. */
1607 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1608 {
1609 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1610
1611 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1612
1613 /* C.8 if the argument has an alignment of 16 then the NGRN is
1614 rounded up to the next even number. */
1615 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1616 {
1617 ++ncrn;
1618 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1619 }
1620 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1621 A reg is still generated for it, but the caller should be smart
1622 enough not to use it. */
1623 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1624 {
1625 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1626 }
1627 else
1628 {
1629 rtx par;
1630 int i;
1631
1632 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1633 for (i = 0; i < nregs; i++)
1634 {
1635 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1636 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1637 GEN_INT (i * UNITS_PER_WORD));
1638 XVECEXP (par, 0, i) = tmp;
1639 }
1640 pcum->aapcs_reg = par;
1641 }
1642
1643 pcum->aapcs_nextncrn = ncrn + nregs;
1644 return;
1645 }
1646
1647 /* C.11 */
1648 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1649
1650 /* The argument is passed on stack; record the needed number of words for
3abf17cf 1651 this argument and align the total size if necessary. */
43e9d192 1652on_stack:
3abf17cf 1653 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
43e9d192
IB
1654 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1655 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
3abf17cf 1656 16 / UNITS_PER_WORD);
43e9d192
IB
1657 return;
1658}
1659
1660/* Implement TARGET_FUNCTION_ARG. */
1661
1662static rtx
1663aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1664 const_tree type, bool named)
1665{
1666 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1667 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1668
1669 if (mode == VOIDmode)
1670 return NULL_RTX;
1671
1672 aarch64_layout_arg (pcum_v, mode, type, named);
1673 return pcum->aapcs_reg;
1674}
1675
1676void
1677aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1678 const_tree fntype ATTRIBUTE_UNUSED,
1679 rtx libname ATTRIBUTE_UNUSED,
1680 const_tree fndecl ATTRIBUTE_UNUSED,
1681 unsigned n_named ATTRIBUTE_UNUSED)
1682{
1683 pcum->aapcs_ncrn = 0;
1684 pcum->aapcs_nvrn = 0;
1685 pcum->aapcs_nextncrn = 0;
1686 pcum->aapcs_nextnvrn = 0;
1687 pcum->pcs_variant = ARM_PCS_AAPCS64;
1688 pcum->aapcs_reg = NULL_RTX;
1689 pcum->aapcs_arg_processed = false;
1690 pcum->aapcs_stack_words = 0;
1691 pcum->aapcs_stack_size = 0;
1692
1693 return;
1694}
1695
1696static void
1697aarch64_function_arg_advance (cumulative_args_t pcum_v,
1698 enum machine_mode mode,
1699 const_tree type,
1700 bool named)
1701{
1702 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1703 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1704 {
1705 aarch64_layout_arg (pcum_v, mode, type, named);
1706 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1707 != (pcum->aapcs_stack_words != 0));
1708 pcum->aapcs_arg_processed = false;
1709 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1710 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1711 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1712 pcum->aapcs_stack_words = 0;
1713 pcum->aapcs_reg = NULL_RTX;
1714 }
1715}
1716
1717bool
1718aarch64_function_arg_regno_p (unsigned regno)
1719{
1720 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1721 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1722}
1723
1724/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1725 PARM_BOUNDARY bits of alignment, but will be given anything up
1726 to STACK_BOUNDARY bits if the type requires it. This makes sure
1727 that both before and after the layout of each argument, the Next
1728 Stacked Argument Address (NSAA) will have a minimum alignment of
1729 8 bytes. */
1730
1731static unsigned int
1732aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1733{
1734 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1735
1736 if (alignment < PARM_BOUNDARY)
1737 alignment = PARM_BOUNDARY;
1738 if (alignment > STACK_BOUNDARY)
1739 alignment = STACK_BOUNDARY;
1740 return alignment;
1741}
1742
1743/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1744
1745 Return true if an argument passed on the stack should be padded upwards,
1746 i.e. if the least-significant byte of the stack slot has useful data.
1747
1748 Small aggregate types are placed in the lowest memory address.
1749
1750 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1751
1752bool
1753aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1754{
1755 /* On little-endian targets, the least significant byte of every stack
1756 argument is passed at the lowest byte address of the stack slot. */
1757 if (!BYTES_BIG_ENDIAN)
1758 return true;
1759
00edcfbe 1760 /* Otherwise, integral, floating-point and pointer types are padded downward:
43e9d192
IB
1761 the least significant byte of a stack argument is passed at the highest
1762 byte address of the stack slot. */
1763 if (type
00edcfbe
YZ
1764 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1765 || POINTER_TYPE_P (type))
43e9d192
IB
1766 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1767 return false;
1768
1769 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1770 return true;
1771}
1772
1773/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1774
1775 It specifies padding for the last (may also be the only)
1776 element of a block move between registers and memory. If
1777 assuming the block is in the memory, padding upward means that
1778 the last element is padded after its highest significant byte,
1779 while in downward padding, the last element is padded at the
1780 its least significant byte side.
1781
1782 Small aggregates and small complex types are always padded
1783 upwards.
1784
1785 We don't need to worry about homogeneous floating-point or
1786 short-vector aggregates; their move is not affected by the
1787 padding direction determined here. Regardless of endianness,
1788 each element of such an aggregate is put in the least
1789 significant bits of a fp/simd register.
1790
1791 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1792 register has useful data, and return the opposite if the most
1793 significant byte does. */
1794
1795bool
1796aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1797 bool first ATTRIBUTE_UNUSED)
1798{
1799
1800 /* Small composite types are always padded upward. */
1801 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1802 {
1803 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1804 : GET_MODE_SIZE (mode));
1805 if (size < 2 * UNITS_PER_WORD)
1806 return true;
1807 }
1808
1809 /* Otherwise, use the default padding. */
1810 return !BYTES_BIG_ENDIAN;
1811}
1812
1813static enum machine_mode
1814aarch64_libgcc_cmp_return_mode (void)
1815{
1816 return SImode;
1817}
1818
1819static bool
1820aarch64_frame_pointer_required (void)
1821{
0b7f8166
MS
1822 /* In aarch64_override_options_after_change
1823 flag_omit_leaf_frame_pointer turns off the frame pointer by
1824 default. Turn it back on now if we've not got a leaf
1825 function. */
1826 if (flag_omit_leaf_frame_pointer
1827 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1828 return true;
43e9d192 1829
0b7f8166 1830 return false;
43e9d192
IB
1831}
1832
1833/* Mark the registers that need to be saved by the callee and calculate
1834 the size of the callee-saved registers area and frame record (both FP
1835 and LR may be omitted). */
1836static void
1837aarch64_layout_frame (void)
1838{
1839 HOST_WIDE_INT offset = 0;
1840 int regno;
1841
1842 if (reload_completed && cfun->machine->frame.laid_out)
1843 return;
1844
97826595
MS
1845#define SLOT_NOT_REQUIRED (-2)
1846#define SLOT_REQUIRED (-1)
1847
363ffa50
JW
1848 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1849 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1850
43e9d192
IB
1851 /* First mark all the registers that really need to be saved... */
1852 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1853 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1854
1855 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1856 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
43e9d192
IB
1857
1858 /* ... that includes the eh data registers (if needed)... */
1859 if (crtl->calls_eh_return)
1860 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
97826595
MS
1861 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1862 = SLOT_REQUIRED;
43e9d192
IB
1863
1864 /* ... and any callee saved register that dataflow says is live. */
1865 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1866 if (df_regs_ever_live_p (regno)
1867 && !call_used_regs[regno])
97826595 1868 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1869
1870 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1871 if (df_regs_ever_live_p (regno)
1872 && !call_used_regs[regno])
97826595 1873 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
43e9d192
IB
1874
1875 if (frame_pointer_needed)
1876 {
2e1cdae5 1877 /* FP and LR are placed in the linkage record. */
43e9d192 1878 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
363ffa50 1879 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2e1cdae5 1880 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
363ffa50 1881 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
43e9d192 1882 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2e1cdae5 1883 offset += 2 * UNITS_PER_WORD;
43e9d192
IB
1884 }
1885
1886 /* Now assign stack slots for them. */
2e1cdae5 1887 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
97826595 1888 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1889 {
1890 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1891 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1892 cfun->machine->frame.wb_candidate1 = regno;
1893 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1894 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1895 offset += UNITS_PER_WORD;
1896 }
1897
1898 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
97826595 1899 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
43e9d192
IB
1900 {
1901 cfun->machine->frame.reg_offset[regno] = offset;
363ffa50
JW
1902 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1903 cfun->machine->frame.wb_candidate1 = regno;
1904 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1905 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1906 cfun->machine->frame.wb_candidate2 = regno;
43e9d192
IB
1907 offset += UNITS_PER_WORD;
1908 }
1909
43e9d192
IB
1910 cfun->machine->frame.padding0 =
1911 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1912 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1913
1914 cfun->machine->frame.saved_regs_size = offset;
1c960e02
MS
1915
1916 cfun->machine->frame.hard_fp_offset
1917 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1918 + get_frame_size ()
1919 + cfun->machine->frame.saved_regs_size,
1920 STACK_BOUNDARY / BITS_PER_UNIT);
1921
1922 cfun->machine->frame.frame_size
1923 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1924 + crtl->outgoing_args_size,
1925 STACK_BOUNDARY / BITS_PER_UNIT);
1926
43e9d192
IB
1927 cfun->machine->frame.laid_out = true;
1928}
1929
43e9d192
IB
1930static bool
1931aarch64_register_saved_on_entry (int regno)
1932{
97826595 1933 return cfun->machine->frame.reg_offset[regno] >= 0;
43e9d192
IB
1934}
1935
64dedd72
JW
1936static unsigned
1937aarch64_next_callee_save (unsigned regno, unsigned limit)
1938{
1939 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1940 regno ++;
1941 return regno;
1942}
43e9d192 1943
c5e1f66e
JW
1944static void
1945aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1946 HOST_WIDE_INT adjustment)
1947 {
1948 rtx base_rtx = stack_pointer_rtx;
1949 rtx insn, reg, mem;
1950
1951 reg = gen_rtx_REG (mode, regno);
1952 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1953 plus_constant (Pmode, base_rtx, -adjustment));
1954 mem = gen_rtx_MEM (mode, mem);
1955
1956 insn = emit_move_insn (mem, reg);
1957 RTX_FRAME_RELATED_P (insn) = 1;
1958}
1959
80c11907
JW
1960static rtx
1961aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1962 HOST_WIDE_INT adjustment)
1963{
1964 switch (mode)
1965 {
1966 case DImode:
1967 return gen_storewb_pairdi_di (base, base, reg, reg2,
1968 GEN_INT (-adjustment),
1969 GEN_INT (UNITS_PER_WORD - adjustment));
1970 case DFmode:
1971 return gen_storewb_pairdf_di (base, base, reg, reg2,
1972 GEN_INT (-adjustment),
1973 GEN_INT (UNITS_PER_WORD - adjustment));
1974 default:
1975 gcc_unreachable ();
1976 }
1977}
1978
1979static void
1980aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
1981 unsigned regno2, HOST_WIDE_INT adjustment)
1982{
5d8a22a5 1983 rtx_insn *insn;
80c11907
JW
1984 rtx reg1 = gen_rtx_REG (mode, regno1);
1985 rtx reg2 = gen_rtx_REG (mode, regno2);
1986
1987 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
1988 reg2, adjustment));
1989 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
80c11907
JW
1990 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1991 RTX_FRAME_RELATED_P (insn) = 1;
1992}
1993
159313d9
JW
1994static rtx
1995aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1996 HOST_WIDE_INT adjustment)
1997{
1998 switch (mode)
1999 {
2000 case DImode:
2001 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2002 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2003 case DFmode:
2004 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
3e322b3f 2005 GEN_INT (UNITS_PER_WORD));
159313d9
JW
2006 default:
2007 gcc_unreachable ();
2008 }
2009}
2010
72df5c1f
JW
2011static rtx
2012aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2013 rtx reg2)
2014{
2015 switch (mode)
2016 {
2017 case DImode:
2018 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2019
2020 case DFmode:
2021 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2022
2023 default:
2024 gcc_unreachable ();
2025 }
2026}
2027
2028static rtx
2029aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2030 rtx mem2)
2031{
2032 switch (mode)
2033 {
2034 case DImode:
2035 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2036
2037 case DFmode:
2038 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2039
2040 default:
2041 gcc_unreachable ();
2042 }
2043}
2044
43e9d192 2045
43e9d192 2046static void
8ed2fc62 2047aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
ae13fce3 2048 unsigned start, unsigned limit, bool skip_wb)
43e9d192 2049{
5d8a22a5 2050 rtx_insn *insn;
a007a21c
JW
2051 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2052 ? gen_frame_mem : gen_rtx_MEM);
43e9d192
IB
2053 unsigned regno;
2054 unsigned regno2;
2055
0ec74a1e 2056 for (regno = aarch64_next_callee_save (start, limit);
64dedd72
JW
2057 regno <= limit;
2058 regno = aarch64_next_callee_save (regno + 1, limit))
43e9d192 2059 {
ae13fce3
JW
2060 rtx reg, mem;
2061 HOST_WIDE_INT offset;
64dedd72 2062
ae13fce3
JW
2063 if (skip_wb
2064 && (regno == cfun->machine->frame.wb_candidate1
2065 || regno == cfun->machine->frame.wb_candidate2))
2066 continue;
2067
2068 reg = gen_rtx_REG (mode, regno);
2069 offset = start_offset + cfun->machine->frame.reg_offset[regno];
0ec74a1e
JW
2070 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2071 offset));
64dedd72
JW
2072
2073 regno2 = aarch64_next_callee_save (regno + 1, limit);
2074
2075 if (regno2 <= limit
2076 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2077 == cfun->machine->frame.reg_offset[regno2]))
2078
43e9d192 2079 {
0ec74a1e 2080 rtx reg2 = gen_rtx_REG (mode, regno2);
64dedd72
JW
2081 rtx mem2;
2082
2083 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
8ed2fc62
JW
2084 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2085 offset));
2086 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2087 reg2));
0b4a9743 2088
64dedd72
JW
2089 /* The first part of a frame-related parallel insn is
2090 always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 regno = regno2;
2095 }
2096 else
8ed2fc62
JW
2097 insn = emit_move_insn (mem, reg);
2098
2099 RTX_FRAME_RELATED_P (insn) = 1;
2100 }
2101}
2102
2103static void
2104aarch64_restore_callee_saves (enum machine_mode mode,
2105 HOST_WIDE_INT start_offset, unsigned start,
dd991abb 2106 unsigned limit, bool skip_wb, rtx *cfi_ops)
8ed2fc62 2107{
8ed2fc62
JW
2108 rtx base_rtx = stack_pointer_rtx;
2109 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2110 ? gen_frame_mem : gen_rtx_MEM);
2111 unsigned regno;
2112 unsigned regno2;
2113 HOST_WIDE_INT offset;
2114
2115 for (regno = aarch64_next_callee_save (start, limit);
2116 regno <= limit;
2117 regno = aarch64_next_callee_save (regno + 1, limit))
2118 {
ae13fce3 2119 rtx reg, mem;
8ed2fc62 2120
ae13fce3
JW
2121 if (skip_wb
2122 && (regno == cfun->machine->frame.wb_candidate1
2123 || regno == cfun->machine->frame.wb_candidate2))
2124 continue;
2125
2126 reg = gen_rtx_REG (mode, regno);
8ed2fc62
JW
2127 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2128 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2129
2130 regno2 = aarch64_next_callee_save (regno + 1, limit);
2131
2132 if (regno2 <= limit
2133 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2134 == cfun->machine->frame.reg_offset[regno2]))
64dedd72 2135 {
8ed2fc62
JW
2136 rtx reg2 = gen_rtx_REG (mode, regno2);
2137 rtx mem2;
2138
2139 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2140 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
dd991abb 2141 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
8ed2fc62 2142
dd991abb 2143 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
8ed2fc62 2144 regno = regno2;
43e9d192 2145 }
8ed2fc62 2146 else
dd991abb
RH
2147 emit_move_insn (reg, mem);
2148 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
43e9d192 2149 }
43e9d192
IB
2150}
2151
2152/* AArch64 stack frames generated by this compiler look like:
2153
2154 +-------------------------------+
2155 | |
2156 | incoming stack arguments |
2157 | |
34834420
MS
2158 +-------------------------------+
2159 | | <-- incoming stack pointer (aligned)
43e9d192
IB
2160 | callee-allocated save area |
2161 | for register varargs |
2162 | |
34834420
MS
2163 +-------------------------------+
2164 | local variables | <-- frame_pointer_rtx
43e9d192
IB
2165 | |
2166 +-------------------------------+
454fdba9
RL
2167 | padding0 | \
2168 +-------------------------------+ |
454fdba9 2169 | callee-saved registers | | frame.saved_regs_size
454fdba9
RL
2170 +-------------------------------+ |
2171 | LR' | |
2172 +-------------------------------+ |
34834420
MS
2173 | FP' | / <- hard_frame_pointer_rtx (aligned)
2174 +-------------------------------+
43e9d192
IB
2175 | dynamic allocation |
2176 +-------------------------------+
34834420
MS
2177 | padding |
2178 +-------------------------------+
2179 | outgoing stack arguments | <-- arg_pointer
2180 | |
2181 +-------------------------------+
2182 | | <-- stack_pointer_rtx (aligned)
43e9d192 2183
34834420
MS
2184 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2185 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2186 unchanged. */
43e9d192
IB
2187
2188/* Generate the prologue instructions for entry into a function.
2189 Establish the stack frame by decreasing the stack pointer with a
2190 properly calculated size and, if necessary, create a frame record
2191 filled with the values of LR and previous frame pointer. The
6991c977 2192 current FP is also set up if it is in use. */
43e9d192
IB
2193
2194void
2195aarch64_expand_prologue (void)
2196{
2197 /* sub sp, sp, #<frame_size>
2198 stp {fp, lr}, [sp, #<frame_size> - 16]
2199 add fp, sp, #<frame_size> - hardfp_offset
2200 stp {cs_reg}, [fp, #-16] etc.
2201
2202 sub sp, sp, <final_adjustment_if_any>
2203 */
43e9d192 2204 HOST_WIDE_INT frame_size, offset;
1c960e02 2205 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
dd991abb 2206 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2207 rtx_insn *insn;
43e9d192
IB
2208
2209 aarch64_layout_frame ();
43e9d192 2210
dd991abb
RH
2211 offset = frame_size = cfun->machine->frame.frame_size;
2212 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2213 fp_offset = frame_size - hard_fp_offset;
43e9d192 2214
dd991abb
RH
2215 if (flag_stack_usage_info)
2216 current_function_static_stack_size = frame_size;
43e9d192 2217
44c0e7b9 2218 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2219 if (offset >= 512)
2220 {
2221 /* When the frame has a large size, an initial decrease is done on
2222 the stack pointer to jump over the callee-allocated save area for
2223 register varargs, the local variable area and/or the callee-saved
2224 register area. This will allow the pre-index write-back
2225 store pair instructions to be used for setting up the stack frame
2226 efficiently. */
dd991abb 2227 offset = hard_fp_offset;
43e9d192
IB
2228 if (offset >= 512)
2229 offset = cfun->machine->frame.saved_regs_size;
2230
2231 frame_size -= (offset + crtl->outgoing_args_size);
2232 fp_offset = 0;
2233
2234 if (frame_size >= 0x1000000)
2235 {
2236 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2237 emit_move_insn (op0, GEN_INT (-frame_size));
dd991abb
RH
2238 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2239
2240 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2241 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2242 plus_constant (Pmode, stack_pointer_rtx,
2243 -frame_size)));
2244 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2245 }
2246 else if (frame_size > 0)
2247 {
dd991abb
RH
2248 int hi_ofs = frame_size & 0xfff000;
2249 int lo_ofs = frame_size & 0x000fff;
2250
2251 if (hi_ofs)
43e9d192
IB
2252 {
2253 insn = emit_insn (gen_add2_insn
dd991abb 2254 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
43e9d192
IB
2255 RTX_FRAME_RELATED_P (insn) = 1;
2256 }
dd991abb 2257 if (lo_ofs)
43e9d192
IB
2258 {
2259 insn = emit_insn (gen_add2_insn
dd991abb 2260 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
43e9d192
IB
2261 RTX_FRAME_RELATED_P (insn) = 1;
2262 }
2263 }
2264 }
2265 else
2266 frame_size = -1;
2267
2268 if (offset > 0)
2269 {
ae13fce3
JW
2270 bool skip_wb = false;
2271
43e9d192
IB
2272 if (frame_pointer_needed)
2273 {
c5e1f66e
JW
2274 skip_wb = true;
2275
43e9d192
IB
2276 if (fp_offset)
2277 {
2278 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2279 GEN_INT (-offset)));
2280 RTX_FRAME_RELATED_P (insn) = 1;
80c11907
JW
2281
2282 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
c5e1f66e 2283 R30_REGNUM, false);
43e9d192
IB
2284 }
2285 else
80c11907 2286 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
43e9d192
IB
2287
2288 /* Set up frame pointer to point to the location of the
2289 previous frame pointer on the stack. */
2290 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2291 stack_pointer_rtx,
2292 GEN_INT (fp_offset)));
43e9d192 2293 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2294 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
43e9d192
IB
2295 }
2296 else
2297 {
c5e1f66e
JW
2298 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2299 unsigned reg2 = cfun->machine->frame.wb_candidate2;
80c11907 2300
c5e1f66e
JW
2301 if (fp_offset
2302 || reg1 == FIRST_PSEUDO_REGISTER
2303 || (reg2 == FIRST_PSEUDO_REGISTER
2304 && offset >= 256))
2305 {
2306 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2307 GEN_INT (-offset)));
2308 RTX_FRAME_RELATED_P (insn) = 1;
2309 }
2310 else
2311 {
2312 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2313
2314 skip_wb = true;
2315
2316 if (reg2 == FIRST_PSEUDO_REGISTER)
2317 aarch64_pushwb_single_reg (mode1, reg1, offset);
2318 else
2319 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2320 }
43e9d192
IB
2321 }
2322
c5e1f66e
JW
2323 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2324 skip_wb);
ae13fce3
JW
2325 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2326 skip_wb);
43e9d192
IB
2327 }
2328
2329 /* when offset >= 512,
2330 sub sp, sp, #<outgoing_args_size> */
2331 if (frame_size > -1)
2332 {
2333 if (crtl->outgoing_args_size > 0)
2334 {
2335 insn = emit_insn (gen_add2_insn
2336 (stack_pointer_rtx,
2337 GEN_INT (- crtl->outgoing_args_size)));
2338 RTX_FRAME_RELATED_P (insn) = 1;
2339 }
2340 }
2341}
2342
4f942779
RL
2343/* Return TRUE if we can use a simple_return insn.
2344
2345 This function checks whether the callee saved stack is empty, which
2346 means no restore actions are need. The pro_and_epilogue will use
2347 this to check whether shrink-wrapping opt is feasible. */
2348
2349bool
2350aarch64_use_return_insn_p (void)
2351{
2352 if (!reload_completed)
2353 return false;
2354
2355 if (crtl->profile)
2356 return false;
2357
2358 aarch64_layout_frame ();
2359
2360 return cfun->machine->frame.frame_size == 0;
2361}
2362
43e9d192
IB
2363/* Generate the epilogue instructions for returning from a function. */
2364void
2365aarch64_expand_epilogue (bool for_sibcall)
2366{
1c960e02 2367 HOST_WIDE_INT frame_size, offset;
43e9d192 2368 HOST_WIDE_INT fp_offset;
dd991abb 2369 HOST_WIDE_INT hard_fp_offset;
5d8a22a5 2370 rtx_insn *insn;
43e9d192
IB
2371
2372 aarch64_layout_frame ();
43e9d192 2373
1c960e02 2374 offset = frame_size = cfun->machine->frame.frame_size;
dd991abb
RH
2375 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2376 fp_offset = frame_size - hard_fp_offset;
44c0e7b9
YZ
2377
2378 /* Store pairs and load pairs have a range only -512 to 504. */
43e9d192
IB
2379 if (offset >= 512)
2380 {
dd991abb 2381 offset = hard_fp_offset;
43e9d192
IB
2382 if (offset >= 512)
2383 offset = cfun->machine->frame.saved_regs_size;
2384
2385 frame_size -= (offset + crtl->outgoing_args_size);
2386 fp_offset = 0;
2387 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2388 {
2389 insn = emit_insn (gen_add2_insn
2390 (stack_pointer_rtx,
2391 GEN_INT (crtl->outgoing_args_size)));
2392 RTX_FRAME_RELATED_P (insn) = 1;
2393 }
2394 }
2395 else
2396 frame_size = -1;
2397
2398 /* If there were outgoing arguments or we've done dynamic stack
2399 allocation, then restore the stack pointer from the frame
2400 pointer. This is at most one insn and more efficient than using
2401 GCC's internal mechanism. */
2402 if (frame_pointer_needed
2403 && (crtl->outgoing_args_size || cfun->calls_alloca))
2404 {
2405 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2406 hard_frame_pointer_rtx,
8f454e9f
JW
2407 GEN_INT (0)));
2408 offset = offset - fp_offset;
43e9d192
IB
2409 }
2410
43e9d192
IB
2411 if (offset > 0)
2412 {
4b92caa1
JW
2413 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2414 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2415 bool skip_wb = true;
dd991abb 2416 rtx cfi_ops = NULL;
4b92caa1 2417
43e9d192 2418 if (frame_pointer_needed)
4b92caa1
JW
2419 fp_offset = 0;
2420 else if (fp_offset
2421 || reg1 == FIRST_PSEUDO_REGISTER
2422 || (reg2 == FIRST_PSEUDO_REGISTER
2423 && offset >= 256))
2424 skip_wb = false;
2425
2426 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
dd991abb 2427 skip_wb, &cfi_ops);
4b92caa1 2428 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
dd991abb 2429 skip_wb, &cfi_ops);
4b92caa1
JW
2430
2431 if (skip_wb)
43e9d192 2432 {
4b92caa1 2433 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
dd991abb 2434 rtx rreg1 = gen_rtx_REG (mode1, reg1);
4b92caa1 2435
dd991abb 2436 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
4b92caa1 2437 if (reg2 == FIRST_PSEUDO_REGISTER)
dd991abb
RH
2438 {
2439 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2440 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2441 mem = gen_rtx_MEM (mode1, mem);
2442 insn = emit_move_insn (rreg1, mem);
2443 }
4b92caa1
JW
2444 else
2445 {
dd991abb 2446 rtx rreg2 = gen_rtx_REG (mode1, reg2);
4b92caa1 2447
dd991abb
RH
2448 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2449 insn = emit_insn (aarch64_gen_loadwb_pair
2450 (mode1, stack_pointer_rtx, rreg1,
2451 rreg2, offset));
4b92caa1 2452 }
43e9d192 2453 }
43e9d192
IB
2454 else
2455 {
2456 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2457 GEN_INT (offset)));
43e9d192 2458 }
43e9d192 2459
dd991abb
RH
2460 /* Reset the CFA to be SP + FRAME_SIZE. */
2461 rtx new_cfa = stack_pointer_rtx;
2462 if (frame_size > 0)
2463 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2464 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2465 REG_NOTES (insn) = cfi_ops;
43e9d192 2466 RTX_FRAME_RELATED_P (insn) = 1;
43e9d192
IB
2467 }
2468
dd991abb 2469 if (frame_size > 0)
43e9d192
IB
2470 {
2471 if (frame_size >= 0x1000000)
2472 {
2473 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2474 emit_move_insn (op0, GEN_INT (frame_size));
dd991abb 2475 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
43e9d192 2476 }
dd991abb 2477 else
43e9d192 2478 {
dd991abb
RH
2479 int hi_ofs = frame_size & 0xfff000;
2480 int lo_ofs = frame_size & 0x000fff;
2481
2482 if (hi_ofs && lo_ofs)
43e9d192
IB
2483 {
2484 insn = emit_insn (gen_add2_insn
dd991abb 2485 (stack_pointer_rtx, GEN_INT (hi_ofs)));
43e9d192 2486 RTX_FRAME_RELATED_P (insn) = 1;
dd991abb 2487 frame_size = lo_ofs;
43e9d192 2488 }
dd991abb
RH
2489 insn = emit_insn (gen_add2_insn
2490 (stack_pointer_rtx, GEN_INT (frame_size)));
43e9d192
IB
2491 }
2492
dd991abb
RH
2493 /* Reset the CFA to be SP + 0. */
2494 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2495 RTX_FRAME_RELATED_P (insn) = 1;
2496 }
2497
2498 /* Stack adjustment for exception handler. */
2499 if (crtl->calls_eh_return)
2500 {
2501 /* We need to unwind the stack by the offset computed by
2502 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2503 to be SP; letting the CFA move during this adjustment
2504 is just as correct as retaining the CFA from the body
2505 of the function. Therefore, do nothing special. */
2506 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
43e9d192
IB
2507 }
2508
2509 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2510 if (!for_sibcall)
2511 emit_jump_insn (ret_rtx);
2512}
2513
2514/* Return the place to copy the exception unwinding return address to.
2515 This will probably be a stack slot, but could (in theory be the
2516 return register). */
2517rtx
2518aarch64_final_eh_return_addr (void)
2519{
1c960e02
MS
2520 HOST_WIDE_INT fp_offset;
2521
43e9d192 2522 aarch64_layout_frame ();
1c960e02
MS
2523
2524 fp_offset = cfun->machine->frame.frame_size
2525 - cfun->machine->frame.hard_fp_offset;
43e9d192
IB
2526
2527 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2528 return gen_rtx_REG (DImode, LR_REGNUM);
2529
2530 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2531 result in a store to save LR introduced by builtin_eh_return () being
2532 incorrectly deleted because the alias is not detected.
2533 So in the calculation of the address to copy the exception unwinding
2534 return address to, we note 2 cases.
2535 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2536 we return a SP-relative location since all the addresses are SP-relative
2537 in this case. This prevents the store from being optimized away.
2538 If the fp_offset is not 0, then the addresses will be FP-relative and
2539 therefore we return a FP-relative location. */
2540
2541 if (frame_pointer_needed)
2542 {
2543 if (fp_offset)
2544 return gen_frame_mem (DImode,
2545 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2546 else
2547 return gen_frame_mem (DImode,
2548 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2549 }
2550
2551 /* If FP is not needed, we calculate the location of LR, which would be
2552 at the top of the saved registers block. */
2553
2554 return gen_frame_mem (DImode,
2555 plus_constant (Pmode,
2556 stack_pointer_rtx,
2557 fp_offset
2558 + cfun->machine->frame.saved_regs_size
2559 - 2 * UNITS_PER_WORD));
2560}
2561
9dfc162c
JG
2562/* Possibly output code to build up a constant in a register. For
2563 the benefit of the costs infrastructure, returns the number of
2564 instructions which would be emitted. GENERATE inhibits or
2565 enables code generation. */
2566
2567static int
2568aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
43e9d192 2569{
9dfc162c
JG
2570 int insns = 0;
2571
43e9d192 2572 if (aarch64_bitmask_imm (val, DImode))
9dfc162c
JG
2573 {
2574 if (generate)
2575 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2576 insns = 1;
2577 }
43e9d192
IB
2578 else
2579 {
2580 int i;
2581 int ncount = 0;
2582 int zcount = 0;
2583 HOST_WIDE_INT valp = val >> 16;
2584 HOST_WIDE_INT valm;
2585 HOST_WIDE_INT tval;
2586
2587 for (i = 16; i < 64; i += 16)
2588 {
2589 valm = (valp & 0xffff);
2590
2591 if (valm != 0)
2592 ++ zcount;
2593
2594 if (valm != 0xffff)
2595 ++ ncount;
2596
2597 valp >>= 16;
2598 }
2599
2600 /* zcount contains the number of additional MOVK instructions
2601 required if the constant is built up with an initial MOVZ instruction,
2602 while ncount is the number of MOVK instructions required if starting
2603 with a MOVN instruction. Choose the sequence that yields the fewest
2604 number of instructions, preferring MOVZ instructions when they are both
2605 the same. */
2606 if (ncount < zcount)
2607 {
9dfc162c
JG
2608 if (generate)
2609 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2610 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
43e9d192 2611 tval = 0xffff;
9dfc162c 2612 insns++;
43e9d192
IB
2613 }
2614 else
2615 {
9dfc162c
JG
2616 if (generate)
2617 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2618 GEN_INT (val & 0xffff));
43e9d192 2619 tval = 0;
9dfc162c 2620 insns++;
43e9d192
IB
2621 }
2622
2623 val >>= 16;
2624
2625 for (i = 16; i < 64; i += 16)
2626 {
2627 if ((val & 0xffff) != tval)
9dfc162c
JG
2628 {
2629 if (generate)
2630 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2631 GEN_INT (i),
2632 GEN_INT (val & 0xffff)));
2633 insns++;
2634 }
43e9d192
IB
2635 val >>= 16;
2636 }
2637 }
9dfc162c 2638 return insns;
43e9d192
IB
2639}
2640
2641static void
d9600ae5 2642aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
43e9d192
IB
2643{
2644 HOST_WIDE_INT mdelta = delta;
d9600ae5
SN
2645 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2646 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
43e9d192
IB
2647
2648 if (mdelta < 0)
2649 mdelta = -mdelta;
2650
2651 if (mdelta >= 4096 * 4096)
2652 {
9dfc162c 2653 (void) aarch64_build_constant (scratchreg, delta, true);
d9600ae5 2654 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
43e9d192
IB
2655 }
2656 else if (mdelta > 0)
2657 {
43e9d192 2658 if (mdelta >= 4096)
d9600ae5
SN
2659 {
2660 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2661 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2662 if (delta < 0)
2663 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2664 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2665 else
2666 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2667 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2668 }
43e9d192 2669 if (mdelta % 4096 != 0)
d9600ae5
SN
2670 {
2671 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2672 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2673 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2674 }
43e9d192
IB
2675 }
2676}
2677
2678/* Output code to add DELTA to the first argument, and then jump
2679 to FUNCTION. Used for C++ multiple inheritance. */
2680static void
2681aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2682 HOST_WIDE_INT delta,
2683 HOST_WIDE_INT vcall_offset,
2684 tree function)
2685{
2686 /* The this pointer is always in x0. Note that this differs from
2687 Arm where the this pointer maybe bumped to r1 if r0 is required
2688 to return a pointer to an aggregate. On AArch64 a result value
2689 pointer will be in x8. */
2690 int this_regno = R0_REGNUM;
5d8a22a5
DM
2691 rtx this_rtx, temp0, temp1, addr, funexp;
2692 rtx_insn *insn;
43e9d192 2693
75f1d6fc
SN
2694 reload_completed = 1;
2695 emit_note (NOTE_INSN_PROLOGUE_END);
43e9d192
IB
2696
2697 if (vcall_offset == 0)
d9600ae5 2698 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2699 else
2700 {
28514dda 2701 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
43e9d192 2702
75f1d6fc
SN
2703 this_rtx = gen_rtx_REG (Pmode, this_regno);
2704 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2705 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
43e9d192 2706
75f1d6fc
SN
2707 addr = this_rtx;
2708 if (delta != 0)
2709 {
2710 if (delta >= -256 && delta < 256)
2711 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2712 plus_constant (Pmode, this_rtx, delta));
2713 else
d9600ae5 2714 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
43e9d192
IB
2715 }
2716
28514dda
YZ
2717 if (Pmode == ptr_mode)
2718 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2719 else
2720 aarch64_emit_move (temp0,
2721 gen_rtx_ZERO_EXTEND (Pmode,
2722 gen_rtx_MEM (ptr_mode, addr)));
75f1d6fc 2723
28514dda 2724 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
75f1d6fc 2725 addr = plus_constant (Pmode, temp0, vcall_offset);
43e9d192
IB
2726 else
2727 {
9dfc162c 2728 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
75f1d6fc 2729 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
43e9d192
IB
2730 }
2731
28514dda
YZ
2732 if (Pmode == ptr_mode)
2733 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2734 else
2735 aarch64_emit_move (temp1,
2736 gen_rtx_SIGN_EXTEND (Pmode,
2737 gen_rtx_MEM (ptr_mode, addr)));
2738
75f1d6fc 2739 emit_insn (gen_add2_insn (this_rtx, temp1));
43e9d192
IB
2740 }
2741
75f1d6fc
SN
2742 /* Generate a tail call to the target function. */
2743 if (!TREE_USED (function))
2744 {
2745 assemble_external (function);
2746 TREE_USED (function) = 1;
2747 }
2748 funexp = XEXP (DECL_RTL (function), 0);
2749 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2750 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2751 SIBLING_CALL_P (insn) = 1;
2752
2753 insn = get_insns ();
2754 shorten_branches (insn);
2755 final_start_function (insn, file, 1);
2756 final (insn, file, 1);
43e9d192 2757 final_end_function ();
75f1d6fc
SN
2758
2759 /* Stop pretending to be a post-reload pass. */
2760 reload_completed = 0;
43e9d192
IB
2761}
2762
43e9d192
IB
2763static int
2764aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2765{
2766 if (GET_CODE (*x) == SYMBOL_REF)
2767 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2768
2769 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2770 TLS offsets, not real symbol references. */
2771 if (GET_CODE (*x) == UNSPEC
2772 && XINT (*x, 1) == UNSPEC_TLS)
2773 return -1;
2774
2775 return 0;
2776}
2777
2778static bool
2779aarch64_tls_referenced_p (rtx x)
2780{
2781 if (!TARGET_HAVE_TLS)
2782 return false;
2783
2784 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2785}
2786
2787
2788static int
2789aarch64_bitmasks_cmp (const void *i1, const void *i2)
2790{
2791 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2792 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2793
2794 if (*imm1 < *imm2)
2795 return -1;
2796 if (*imm1 > *imm2)
2797 return +1;
2798 return 0;
2799}
2800
2801
2802static void
2803aarch64_build_bitmask_table (void)
2804{
2805 unsigned HOST_WIDE_INT mask, imm;
2806 unsigned int log_e, e, s, r;
2807 unsigned int nimms = 0;
2808
2809 for (log_e = 1; log_e <= 6; log_e++)
2810 {
2811 e = 1 << log_e;
2812 if (e == 64)
2813 mask = ~(HOST_WIDE_INT) 0;
2814 else
2815 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2816 for (s = 1; s < e; s++)
2817 {
2818 for (r = 0; r < e; r++)
2819 {
2820 /* set s consecutive bits to 1 (s < 64) */
2821 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2822 /* rotate right by r */
2823 if (r != 0)
2824 imm = ((imm >> r) | (imm << (e - r))) & mask;
2825 /* replicate the constant depending on SIMD size */
2826 switch (log_e) {
2827 case 1: imm |= (imm << 2);
2828 case 2: imm |= (imm << 4);
2829 case 3: imm |= (imm << 8);
2830 case 4: imm |= (imm << 16);
2831 case 5: imm |= (imm << 32);
2832 case 6:
2833 break;
2834 default:
2835 gcc_unreachable ();
2836 }
2837 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2838 aarch64_bitmasks[nimms++] = imm;
2839 }
2840 }
2841 }
2842
2843 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2844 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2845 aarch64_bitmasks_cmp);
2846}
2847
2848
2849/* Return true if val can be encoded as a 12-bit unsigned immediate with
2850 a left shift of 0 or 12 bits. */
2851bool
2852aarch64_uimm12_shift (HOST_WIDE_INT val)
2853{
2854 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2855 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2856 );
2857}
2858
2859
2860/* Return true if val is an immediate that can be loaded into a
2861 register by a MOVZ instruction. */
2862static bool
2863aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2864{
2865 if (GET_MODE_SIZE (mode) > 4)
2866 {
2867 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2868 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2869 return 1;
2870 }
2871 else
2872 {
2873 /* Ignore sign extension. */
2874 val &= (HOST_WIDE_INT) 0xffffffff;
2875 }
2876 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2877 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2878}
2879
2880
2881/* Return true if val is a valid bitmask immediate. */
2882bool
2883aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2884{
2885 if (GET_MODE_SIZE (mode) < 8)
2886 {
2887 /* Replicate bit pattern. */
2888 val &= (HOST_WIDE_INT) 0xffffffff;
2889 val |= val << 32;
2890 }
2891 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2892 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2893}
2894
2895
2896/* Return true if val is an immediate that can be loaded into a
2897 register in a single instruction. */
2898bool
2899aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2900{
2901 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2902 return 1;
2903 return aarch64_bitmask_imm (val, mode);
2904}
2905
2906static bool
2907aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2908{
2909 rtx base, offset;
7eda14e1 2910
43e9d192
IB
2911 if (GET_CODE (x) == HIGH)
2912 return true;
2913
2914 split_const (x, &base, &offset);
2915 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
28514dda
YZ
2916 {
2917 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2918 != SYMBOL_FORCE_TO_MEM)
2919 return true;
2920 else
2921 /* Avoid generating a 64-bit relocation in ILP32; leave
2922 to aarch64_expand_mov_immediate to handle it properly. */
2923 return mode != ptr_mode;
2924 }
43e9d192
IB
2925
2926 return aarch64_tls_referenced_p (x);
2927}
2928
2929/* Return true if register REGNO is a valid index register.
2930 STRICT_P is true if REG_OK_STRICT is in effect. */
2931
2932bool
2933aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2934{
2935 if (!HARD_REGISTER_NUM_P (regno))
2936 {
2937 if (!strict_p)
2938 return true;
2939
2940 if (!reg_renumber)
2941 return false;
2942
2943 regno = reg_renumber[regno];
2944 }
2945 return GP_REGNUM_P (regno);
2946}
2947
2948/* Return true if register REGNO is a valid base register for mode MODE.
2949 STRICT_P is true if REG_OK_STRICT is in effect. */
2950
2951bool
2952aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2953{
2954 if (!HARD_REGISTER_NUM_P (regno))
2955 {
2956 if (!strict_p)
2957 return true;
2958
2959 if (!reg_renumber)
2960 return false;
2961
2962 regno = reg_renumber[regno];
2963 }
2964
2965 /* The fake registers will be eliminated to either the stack or
2966 hard frame pointer, both of which are usually valid base registers.
2967 Reload deals with the cases where the eliminated form isn't valid. */
2968 return (GP_REGNUM_P (regno)
2969 || regno == SP_REGNUM
2970 || regno == FRAME_POINTER_REGNUM
2971 || regno == ARG_POINTER_REGNUM);
2972}
2973
2974/* Return true if X is a valid base register for mode MODE.
2975 STRICT_P is true if REG_OK_STRICT is in effect. */
2976
2977static bool
2978aarch64_base_register_rtx_p (rtx x, bool strict_p)
2979{
2980 if (!strict_p && GET_CODE (x) == SUBREG)
2981 x = SUBREG_REG (x);
2982
2983 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2984}
2985
2986/* Return true if address offset is a valid index. If it is, fill in INFO
2987 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2988
2989static bool
2990aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2991 enum machine_mode mode, bool strict_p)
2992{
2993 enum aarch64_address_type type;
2994 rtx index;
2995 int shift;
2996
2997 /* (reg:P) */
2998 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2999 && GET_MODE (x) == Pmode)
3000 {
3001 type = ADDRESS_REG_REG;
3002 index = x;
3003 shift = 0;
3004 }
3005 /* (sign_extend:DI (reg:SI)) */
3006 else if ((GET_CODE (x) == SIGN_EXTEND
3007 || GET_CODE (x) == ZERO_EXTEND)
3008 && GET_MODE (x) == DImode
3009 && GET_MODE (XEXP (x, 0)) == SImode)
3010 {
3011 type = (GET_CODE (x) == SIGN_EXTEND)
3012 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3013 index = XEXP (x, 0);
3014 shift = 0;
3015 }
3016 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3017 else if (GET_CODE (x) == MULT
3018 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3019 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3020 && GET_MODE (XEXP (x, 0)) == DImode
3021 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3022 && CONST_INT_P (XEXP (x, 1)))
3023 {
3024 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3025 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3026 index = XEXP (XEXP (x, 0), 0);
3027 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3028 }
3029 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3030 else if (GET_CODE (x) == ASHIFT
3031 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3032 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3033 && GET_MODE (XEXP (x, 0)) == DImode
3034 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3035 && CONST_INT_P (XEXP (x, 1)))
3036 {
3037 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3038 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3039 index = XEXP (XEXP (x, 0), 0);
3040 shift = INTVAL (XEXP (x, 1));
3041 }
3042 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3043 else if ((GET_CODE (x) == SIGN_EXTRACT
3044 || GET_CODE (x) == ZERO_EXTRACT)
3045 && GET_MODE (x) == DImode
3046 && GET_CODE (XEXP (x, 0)) == MULT
3047 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3048 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3049 {
3050 type = (GET_CODE (x) == SIGN_EXTRACT)
3051 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3052 index = XEXP (XEXP (x, 0), 0);
3053 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3054 if (INTVAL (XEXP (x, 1)) != 32 + shift
3055 || INTVAL (XEXP (x, 2)) != 0)
3056 shift = -1;
3057 }
3058 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3059 (const_int 0xffffffff<<shift)) */
3060 else if (GET_CODE (x) == AND
3061 && GET_MODE (x) == DImode
3062 && GET_CODE (XEXP (x, 0)) == MULT
3063 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3064 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3065 && CONST_INT_P (XEXP (x, 1)))
3066 {
3067 type = ADDRESS_REG_UXTW;
3068 index = XEXP (XEXP (x, 0), 0);
3069 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3070 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3071 shift = -1;
3072 }
3073 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3074 else if ((GET_CODE (x) == SIGN_EXTRACT
3075 || GET_CODE (x) == ZERO_EXTRACT)
3076 && GET_MODE (x) == DImode
3077 && GET_CODE (XEXP (x, 0)) == ASHIFT
3078 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3079 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3080 {
3081 type = (GET_CODE (x) == SIGN_EXTRACT)
3082 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3083 index = XEXP (XEXP (x, 0), 0);
3084 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3085 if (INTVAL (XEXP (x, 1)) != 32 + shift
3086 || INTVAL (XEXP (x, 2)) != 0)
3087 shift = -1;
3088 }
3089 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3090 (const_int 0xffffffff<<shift)) */
3091 else if (GET_CODE (x) == AND
3092 && GET_MODE (x) == DImode
3093 && GET_CODE (XEXP (x, 0)) == ASHIFT
3094 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3095 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3096 && CONST_INT_P (XEXP (x, 1)))
3097 {
3098 type = ADDRESS_REG_UXTW;
3099 index = XEXP (XEXP (x, 0), 0);
3100 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3101 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3102 shift = -1;
3103 }
3104 /* (mult:P (reg:P) (const_int scale)) */
3105 else if (GET_CODE (x) == MULT
3106 && GET_MODE (x) == Pmode
3107 && GET_MODE (XEXP (x, 0)) == Pmode
3108 && CONST_INT_P (XEXP (x, 1)))
3109 {
3110 type = ADDRESS_REG_REG;
3111 index = XEXP (x, 0);
3112 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3113 }
3114 /* (ashift:P (reg:P) (const_int shift)) */
3115 else if (GET_CODE (x) == ASHIFT
3116 && GET_MODE (x) == Pmode
3117 && GET_MODE (XEXP (x, 0)) == Pmode
3118 && CONST_INT_P (XEXP (x, 1)))
3119 {
3120 type = ADDRESS_REG_REG;
3121 index = XEXP (x, 0);
3122 shift = INTVAL (XEXP (x, 1));
3123 }
3124 else
3125 return false;
3126
3127 if (GET_CODE (index) == SUBREG)
3128 index = SUBREG_REG (index);
3129
3130 if ((shift == 0 ||
3131 (shift > 0 && shift <= 3
3132 && (1 << shift) == GET_MODE_SIZE (mode)))
3133 && REG_P (index)
3134 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3135 {
3136 info->type = type;
3137 info->offset = index;
3138 info->shift = shift;
3139 return true;
3140 }
3141
3142 return false;
3143}
3144
44707478
JW
3145bool
3146aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
43e9d192
IB
3147{
3148 return (offset >= -64 * GET_MODE_SIZE (mode)
3149 && offset < 64 * GET_MODE_SIZE (mode)
3150 && offset % GET_MODE_SIZE (mode) == 0);
3151}
3152
3153static inline bool
3154offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3155 HOST_WIDE_INT offset)
3156{
3157 return offset >= -256 && offset < 256;
3158}
3159
3160static inline bool
3161offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3162{
3163 return (offset >= 0
3164 && offset < 4096 * GET_MODE_SIZE (mode)
3165 && offset % GET_MODE_SIZE (mode) == 0);
3166}
3167
3168/* Return true if X is a valid address for machine mode MODE. If it is,
3169 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3170 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3171
3172static bool
3173aarch64_classify_address (struct aarch64_address_info *info,
3174 rtx x, enum machine_mode mode,
3175 RTX_CODE outer_code, bool strict_p)
3176{
3177 enum rtx_code code = GET_CODE (x);
3178 rtx op0, op1;
3179 bool allow_reg_index_p =
348d4b0a
BC
3180 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3181 || aarch64_vector_mode_supported_p (mode));
43e9d192
IB
3182 /* Don't support anything other than POST_INC or REG addressing for
3183 AdvSIMD. */
348d4b0a 3184 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
3185 && (code != POST_INC && code != REG))
3186 return false;
3187
3188 switch (code)
3189 {
3190 case REG:
3191 case SUBREG:
3192 info->type = ADDRESS_REG_IMM;
3193 info->base = x;
3194 info->offset = const0_rtx;
3195 return aarch64_base_register_rtx_p (x, strict_p);
3196
3197 case PLUS:
3198 op0 = XEXP (x, 0);
3199 op1 = XEXP (x, 1);
15c0c5c9
JW
3200
3201 if (! strict_p
4aa81c2e 3202 && REG_P (op0)
15c0c5c9
JW
3203 && (op0 == virtual_stack_vars_rtx
3204 || op0 == frame_pointer_rtx
3205 || op0 == arg_pointer_rtx)
4aa81c2e 3206 && CONST_INT_P (op1))
15c0c5c9
JW
3207 {
3208 info->type = ADDRESS_REG_IMM;
3209 info->base = op0;
3210 info->offset = op1;
3211
3212 return true;
3213 }
3214
43e9d192
IB
3215 if (GET_MODE_SIZE (mode) != 0
3216 && CONST_INT_P (op1)
3217 && aarch64_base_register_rtx_p (op0, strict_p))
3218 {
3219 HOST_WIDE_INT offset = INTVAL (op1);
3220
3221 info->type = ADDRESS_REG_IMM;
3222 info->base = op0;
3223 info->offset = op1;
3224
3225 /* TImode and TFmode values are allowed in both pairs of X
3226 registers and individual Q registers. The available
3227 address modes are:
3228 X,X: 7-bit signed scaled offset
3229 Q: 9-bit signed offset
3230 We conservatively require an offset representable in either mode.
3231 */
3232 if (mode == TImode || mode == TFmode)
44707478 3233 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3234 && offset_9bit_signed_unscaled_p (mode, offset));
3235
3236 if (outer_code == PARALLEL)
3237 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3238 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3239 else
3240 return (offset_9bit_signed_unscaled_p (mode, offset)
3241 || offset_12bit_unsigned_scaled_p (mode, offset));
3242 }
3243
3244 if (allow_reg_index_p)
3245 {
3246 /* Look for base + (scaled/extended) index register. */
3247 if (aarch64_base_register_rtx_p (op0, strict_p)
3248 && aarch64_classify_index (info, op1, mode, strict_p))
3249 {
3250 info->base = op0;
3251 return true;
3252 }
3253 if (aarch64_base_register_rtx_p (op1, strict_p)
3254 && aarch64_classify_index (info, op0, mode, strict_p))
3255 {
3256 info->base = op1;
3257 return true;
3258 }
3259 }
3260
3261 return false;
3262
3263 case POST_INC:
3264 case POST_DEC:
3265 case PRE_INC:
3266 case PRE_DEC:
3267 info->type = ADDRESS_REG_WB;
3268 info->base = XEXP (x, 0);
3269 info->offset = NULL_RTX;
3270 return aarch64_base_register_rtx_p (info->base, strict_p);
3271
3272 case POST_MODIFY:
3273 case PRE_MODIFY:
3274 info->type = ADDRESS_REG_WB;
3275 info->base = XEXP (x, 0);
3276 if (GET_CODE (XEXP (x, 1)) == PLUS
3277 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3278 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3279 && aarch64_base_register_rtx_p (info->base, strict_p))
3280 {
3281 HOST_WIDE_INT offset;
3282 info->offset = XEXP (XEXP (x, 1), 1);
3283 offset = INTVAL (info->offset);
3284
3285 /* TImode and TFmode values are allowed in both pairs of X
3286 registers and individual Q registers. The available
3287 address modes are:
3288 X,X: 7-bit signed scaled offset
3289 Q: 9-bit signed offset
3290 We conservatively require an offset representable in either mode.
3291 */
3292 if (mode == TImode || mode == TFmode)
44707478 3293 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
43e9d192
IB
3294 && offset_9bit_signed_unscaled_p (mode, offset));
3295
3296 if (outer_code == PARALLEL)
3297 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
44707478 3298 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
43e9d192
IB
3299 else
3300 return offset_9bit_signed_unscaled_p (mode, offset);
3301 }
3302 return false;
3303
3304 case CONST:
3305 case SYMBOL_REF:
3306 case LABEL_REF:
79517551
SN
3307 /* load literal: pc-relative constant pool entry. Only supported
3308 for SI mode or larger. */
43e9d192 3309 info->type = ADDRESS_SYMBOLIC;
79517551 3310 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
43e9d192
IB
3311 {
3312 rtx sym, addend;
3313
3314 split_const (x, &sym, &addend);
3315 return (GET_CODE (sym) == LABEL_REF
3316 || (GET_CODE (sym) == SYMBOL_REF
3317 && CONSTANT_POOL_ADDRESS_P (sym)));
3318 }
3319 return false;
3320
3321 case LO_SUM:
3322 info->type = ADDRESS_LO_SUM;
3323 info->base = XEXP (x, 0);
3324 info->offset = XEXP (x, 1);
3325 if (allow_reg_index_p
3326 && aarch64_base_register_rtx_p (info->base, strict_p))
3327 {
3328 rtx sym, offs;
3329 split_const (info->offset, &sym, &offs);
3330 if (GET_CODE (sym) == SYMBOL_REF
3331 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3332 == SYMBOL_SMALL_ABSOLUTE))
3333 {
3334 /* The symbol and offset must be aligned to the access size. */
3335 unsigned int align;
3336 unsigned int ref_size;
3337
3338 if (CONSTANT_POOL_ADDRESS_P (sym))
3339 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3340 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3341 {
3342 tree exp = SYMBOL_REF_DECL (sym);
3343 align = TYPE_ALIGN (TREE_TYPE (exp));
3344 align = CONSTANT_ALIGNMENT (exp, align);
3345 }
3346 else if (SYMBOL_REF_DECL (sym))
3347 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
6c031d8d
KV
3348 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3349 && SYMBOL_REF_BLOCK (sym) != NULL)
3350 align = SYMBOL_REF_BLOCK (sym)->alignment;
43e9d192
IB
3351 else
3352 align = BITS_PER_UNIT;
3353
3354 ref_size = GET_MODE_SIZE (mode);
3355 if (ref_size == 0)
3356 ref_size = GET_MODE_SIZE (DImode);
3357
3358 return ((INTVAL (offs) & (ref_size - 1)) == 0
3359 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3360 }
3361 }
3362 return false;
3363
3364 default:
3365 return false;
3366 }
3367}
3368
3369bool
3370aarch64_symbolic_address_p (rtx x)
3371{
3372 rtx offset;
3373
3374 split_const (x, &x, &offset);
3375 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3376}
3377
3378/* Classify the base of symbolic expression X, given that X appears in
3379 context CONTEXT. */
da4f13a4
MS
3380
3381enum aarch64_symbol_type
3382aarch64_classify_symbolic_expression (rtx x,
3383 enum aarch64_symbol_context context)
43e9d192
IB
3384{
3385 rtx offset;
da4f13a4 3386
43e9d192
IB
3387 split_const (x, &x, &offset);
3388 return aarch64_classify_symbol (x, context);
3389}
3390
3391
3392/* Return TRUE if X is a legitimate address for accessing memory in
3393 mode MODE. */
3394static bool
3395aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3396{
3397 struct aarch64_address_info addr;
3398
3399 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3400}
3401
3402/* Return TRUE if X is a legitimate address for accessing memory in
3403 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3404 pair operation. */
3405bool
3406aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
aef66c94 3407 RTX_CODE outer_code, bool strict_p)
43e9d192
IB
3408{
3409 struct aarch64_address_info addr;
3410
3411 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3412}
3413
3414/* Return TRUE if rtx X is immediate constant 0.0 */
3415bool
3520f7cc 3416aarch64_float_const_zero_rtx_p (rtx x)
43e9d192
IB
3417{
3418 REAL_VALUE_TYPE r;
3419
3420 if (GET_MODE (x) == VOIDmode)
3421 return false;
3422
3423 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3424 if (REAL_VALUE_MINUS_ZERO (r))
3425 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3426 return REAL_VALUES_EQUAL (r, dconst0);
3427}
3428
70f09188
AP
3429/* Return the fixed registers used for condition codes. */
3430
3431static bool
3432aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3433{
3434 *p1 = CC_REGNUM;
3435 *p2 = INVALID_REGNUM;
3436 return true;
3437}
3438
78607708
TV
3439/* Emit call insn with PAT and do aarch64-specific handling. */
3440
d07a3fed 3441void
78607708
TV
3442aarch64_emit_call_insn (rtx pat)
3443{
3444 rtx insn = emit_call_insn (pat);
3445
3446 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3447 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3448 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3449}
3450
43e9d192
IB
3451enum machine_mode
3452aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3453{
3454 /* All floating point compares return CCFP if it is an equality
3455 comparison, and CCFPE otherwise. */
3456 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3457 {
3458 switch (code)
3459 {
3460 case EQ:
3461 case NE:
3462 case UNORDERED:
3463 case ORDERED:
3464 case UNLT:
3465 case UNLE:
3466 case UNGT:
3467 case UNGE:
3468 case UNEQ:
3469 case LTGT:
3470 return CCFPmode;
3471
3472 case LT:
3473 case LE:
3474 case GT:
3475 case GE:
3476 return CCFPEmode;
3477
3478 default:
3479 gcc_unreachable ();
3480 }
3481 }
3482
3483 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3484 && y == const0_rtx
3485 && (code == EQ || code == NE || code == LT || code == GE)
b056c910
N
3486 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3487 || GET_CODE (x) == NEG))
43e9d192
IB
3488 return CC_NZmode;
3489
1c992d1e 3490 /* A compare with a shifted operand. Because of canonicalization,
43e9d192
IB
3491 the comparison will have to be swapped when we emit the assembly
3492 code. */
3493 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3494 && (REG_P (y) || GET_CODE (y) == SUBREG)
43e9d192
IB
3495 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3496 || GET_CODE (x) == LSHIFTRT
1c992d1e 3497 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
43e9d192
IB
3498 return CC_SWPmode;
3499
1c992d1e
RE
3500 /* Similarly for a negated operand, but we can only do this for
3501 equalities. */
3502 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
4aa81c2e 3503 && (REG_P (y) || GET_CODE (y) == SUBREG)
1c992d1e
RE
3504 && (code == EQ || code == NE)
3505 && GET_CODE (x) == NEG)
3506 return CC_Zmode;
3507
43e9d192
IB
3508 /* A compare of a mode narrower than SI mode against zero can be done
3509 by extending the value in the comparison. */
3510 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3511 && y == const0_rtx)
3512 /* Only use sign-extension if we really need it. */
3513 return ((code == GT || code == GE || code == LE || code == LT)
3514 ? CC_SESWPmode : CC_ZESWPmode);
3515
3516 /* For everything else, return CCmode. */
3517 return CCmode;
3518}
3519
cd5660ab 3520int
43e9d192
IB
3521aarch64_get_condition_code (rtx x)
3522{
3523 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3524 enum rtx_code comp_code = GET_CODE (x);
3525
3526 if (GET_MODE_CLASS (mode) != MODE_CC)
3527 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3528
3529 switch (mode)
3530 {
3531 case CCFPmode:
3532 case CCFPEmode:
3533 switch (comp_code)
3534 {
3535 case GE: return AARCH64_GE;
3536 case GT: return AARCH64_GT;
3537 case LE: return AARCH64_LS;
3538 case LT: return AARCH64_MI;
3539 case NE: return AARCH64_NE;
3540 case EQ: return AARCH64_EQ;
3541 case ORDERED: return AARCH64_VC;
3542 case UNORDERED: return AARCH64_VS;
3543 case UNLT: return AARCH64_LT;
3544 case UNLE: return AARCH64_LE;
3545 case UNGT: return AARCH64_HI;
3546 case UNGE: return AARCH64_PL;
cd5660ab 3547 default: return -1;
43e9d192
IB
3548 }
3549 break;
3550
3551 case CCmode:
3552 switch (comp_code)
3553 {
3554 case NE: return AARCH64_NE;
3555 case EQ: return AARCH64_EQ;
3556 case GE: return AARCH64_GE;
3557 case GT: return AARCH64_GT;
3558 case LE: return AARCH64_LE;
3559 case LT: return AARCH64_LT;
3560 case GEU: return AARCH64_CS;
3561 case GTU: return AARCH64_HI;
3562 case LEU: return AARCH64_LS;
3563 case LTU: return AARCH64_CC;
cd5660ab 3564 default: return -1;
43e9d192
IB
3565 }
3566 break;
3567
3568 case CC_SWPmode:
3569 case CC_ZESWPmode:
3570 case CC_SESWPmode:
3571 switch (comp_code)
3572 {
3573 case NE: return AARCH64_NE;
3574 case EQ: return AARCH64_EQ;
3575 case GE: return AARCH64_LE;
3576 case GT: return AARCH64_LT;
3577 case LE: return AARCH64_GE;
3578 case LT: return AARCH64_GT;
3579 case GEU: return AARCH64_LS;
3580 case GTU: return AARCH64_CC;
3581 case LEU: return AARCH64_CS;
3582 case LTU: return AARCH64_HI;
cd5660ab 3583 default: return -1;
43e9d192
IB
3584 }
3585 break;
3586
3587 case CC_NZmode:
3588 switch (comp_code)
3589 {
3590 case NE: return AARCH64_NE;
3591 case EQ: return AARCH64_EQ;
3592 case GE: return AARCH64_PL;
3593 case LT: return AARCH64_MI;
cd5660ab 3594 default: return -1;
43e9d192
IB
3595 }
3596 break;
3597
1c992d1e
RE
3598 case CC_Zmode:
3599 switch (comp_code)
3600 {
3601 case NE: return AARCH64_NE;
3602 case EQ: return AARCH64_EQ;
cd5660ab 3603 default: return -1;
1c992d1e
RE
3604 }
3605 break;
3606
43e9d192 3607 default:
cd5660ab 3608 return -1;
43e9d192
IB
3609 break;
3610 }
3611}
3612
ddeabd3e
AL
3613bool
3614aarch64_const_vec_all_same_in_range_p (rtx x,
3615 HOST_WIDE_INT minval,
3616 HOST_WIDE_INT maxval)
3617{
3618 HOST_WIDE_INT firstval;
3619 int count, i;
3620
3621 if (GET_CODE (x) != CONST_VECTOR
3622 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3623 return false;
3624
3625 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3626 if (firstval < minval || firstval > maxval)
3627 return false;
3628
3629 count = CONST_VECTOR_NUNITS (x);
3630 for (i = 1; i < count; i++)
3631 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3632 return false;
3633
3634 return true;
3635}
3636
3637bool
3638aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3639{
3640 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3641}
3642
43e9d192
IB
3643static unsigned
3644bit_count (unsigned HOST_WIDE_INT value)
3645{
3646 unsigned count = 0;
3647
3648 while (value)
3649 {
3650 count++;
3651 value &= value - 1;
3652 }
3653
3654 return count;
3655}
3656
3657void
3658aarch64_print_operand (FILE *f, rtx x, char code)
3659{
3660 switch (code)
3661 {
f541a481
KT
3662 /* An integer or symbol address without a preceding # sign. */
3663 case 'c':
3664 switch (GET_CODE (x))
3665 {
3666 case CONST_INT:
3667 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3668 break;
3669
3670 case SYMBOL_REF:
3671 output_addr_const (f, x);
3672 break;
3673
3674 case CONST:
3675 if (GET_CODE (XEXP (x, 0)) == PLUS
3676 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3677 {
3678 output_addr_const (f, x);
3679 break;
3680 }
3681 /* Fall through. */
3682
3683 default:
3684 output_operand_lossage ("Unsupported operand for code '%c'", code);
3685 }
3686 break;
3687
43e9d192
IB
3688 case 'e':
3689 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3690 {
3691 int n;
3692
4aa81c2e 3693 if (!CONST_INT_P (x)
43e9d192
IB
3694 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3695 {
3696 output_operand_lossage ("invalid operand for '%%%c'", code);
3697 return;
3698 }
3699
3700 switch (n)
3701 {
3702 case 3:
3703 fputc ('b', f);
3704 break;
3705 case 4:
3706 fputc ('h', f);
3707 break;
3708 case 5:
3709 fputc ('w', f);
3710 break;
3711 default:
3712 output_operand_lossage ("invalid operand for '%%%c'", code);
3713 return;
3714 }
3715 }
3716 break;
3717
3718 case 'p':
3719 {
3720 int n;
3721
3722 /* Print N such that 2^N == X. */
4aa81c2e 3723 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
43e9d192
IB
3724 {
3725 output_operand_lossage ("invalid operand for '%%%c'", code);
3726 return;
3727 }
3728
3729 asm_fprintf (f, "%d", n);
3730 }
3731 break;
3732
3733 case 'P':
3734 /* Print the number of non-zero bits in X (a const_int). */
4aa81c2e 3735 if (!CONST_INT_P (x))
43e9d192
IB
3736 {
3737 output_operand_lossage ("invalid operand for '%%%c'", code);
3738 return;
3739 }
3740
3741 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3742 break;
3743
3744 case 'H':
3745 /* Print the higher numbered register of a pair (TImode) of regs. */
4aa81c2e 3746 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
43e9d192
IB
3747 {
3748 output_operand_lossage ("invalid operand for '%%%c'", code);
3749 return;
3750 }
3751
01a3a324 3752 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
43e9d192
IB
3753 break;
3754
43e9d192 3755 case 'm':
cd5660ab
KT
3756 {
3757 int cond_code;
3758 /* Print a condition (eq, ne, etc). */
43e9d192 3759
cd5660ab
KT
3760 /* CONST_TRUE_RTX means always -- that's the default. */
3761 if (x == const_true_rtx)
43e9d192 3762 return;
43e9d192 3763
cd5660ab
KT
3764 if (!COMPARISON_P (x))
3765 {
3766 output_operand_lossage ("invalid operand for '%%%c'", code);
3767 return;
3768 }
3769
3770 cond_code = aarch64_get_condition_code (x);
3771 gcc_assert (cond_code >= 0);
3772 fputs (aarch64_condition_codes[cond_code], f);
3773 }
43e9d192
IB
3774 break;
3775
3776 case 'M':
cd5660ab
KT
3777 {
3778 int cond_code;
3779 /* Print the inverse of a condition (eq <-> ne, etc). */
43e9d192 3780
cd5660ab
KT
3781 /* CONST_TRUE_RTX means never -- that's the default. */
3782 if (x == const_true_rtx)
3783 {
3784 fputs ("nv", f);
3785 return;
3786 }
43e9d192 3787
cd5660ab
KT
3788 if (!COMPARISON_P (x))
3789 {
3790 output_operand_lossage ("invalid operand for '%%%c'", code);
3791 return;
3792 }
3793 cond_code = aarch64_get_condition_code (x);
3794 gcc_assert (cond_code >= 0);
3795 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3796 (cond_code)], f);
3797 }
43e9d192
IB
3798 break;
3799
3800 case 'b':
3801 case 'h':
3802 case 's':
3803 case 'd':
3804 case 'q':
3805 /* Print a scalar FP/SIMD register name. */
3806 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3807 {
3808 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3809 return;
3810 }
50ce6f88 3811 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
43e9d192
IB
3812 break;
3813
3814 case 'S':
3815 case 'T':
3816 case 'U':
3817 case 'V':
3818 /* Print the first FP/SIMD register name in a list. */
3819 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3820 {
3821 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3822 return;
3823 }
50ce6f88 3824 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
43e9d192
IB
3825 break;
3826
a05c0ddf 3827 case 'X':
50d38551 3828 /* Print bottom 16 bits of integer constant in hex. */
4aa81c2e 3829 if (!CONST_INT_P (x))
a05c0ddf
IB
3830 {
3831 output_operand_lossage ("invalid operand for '%%%c'", code);
3832 return;
3833 }
50d38551 3834 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
a05c0ddf
IB
3835 break;
3836
43e9d192
IB
3837 case 'w':
3838 case 'x':
3839 /* Print a general register name or the zero register (32-bit or
3840 64-bit). */
3520f7cc
JG
3841 if (x == const0_rtx
3842 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
43e9d192 3843 {
50ce6f88 3844 asm_fprintf (f, "%czr", code);
43e9d192
IB
3845 break;
3846 }
3847
3848 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3849 {
50ce6f88 3850 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
43e9d192
IB
3851 break;
3852 }
3853
3854 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3855 {
50ce6f88 3856 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
43e9d192
IB
3857 break;
3858 }
3859
3860 /* Fall through */
3861
3862 case 0:
3863 /* Print a normal operand, if it's a general register, then we
3864 assume DImode. */
3865 if (x == NULL)
3866 {
3867 output_operand_lossage ("missing operand");
3868 return;
3869 }
3870
3871 switch (GET_CODE (x))
3872 {
3873 case REG:
01a3a324 3874 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
43e9d192
IB
3875 break;
3876
3877 case MEM:
3878 aarch64_memory_reference_mode = GET_MODE (x);
3879 output_address (XEXP (x, 0));
3880 break;
3881
3882 case LABEL_REF:
3883 case SYMBOL_REF:
3884 output_addr_const (asm_out_file, x);
3885 break;
3886
3887 case CONST_INT:
3888 asm_fprintf (f, "%wd", INTVAL (x));
3889 break;
3890
3891 case CONST_VECTOR:
3520f7cc
JG
3892 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3893 {
ddeabd3e
AL
3894 gcc_assert (
3895 aarch64_const_vec_all_same_in_range_p (x,
3896 HOST_WIDE_INT_MIN,
3897 HOST_WIDE_INT_MAX));
3520f7cc
JG
3898 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3899 }
3900 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3901 {
3902 fputc ('0', f);
3903 }
3904 else
3905 gcc_unreachable ();
43e9d192
IB
3906 break;
3907
3520f7cc
JG
3908 case CONST_DOUBLE:
3909 /* CONST_DOUBLE can represent a double-width integer.
3910 In this case, the mode of x is VOIDmode. */
3911 if (GET_MODE (x) == VOIDmode)
3912 ; /* Do Nothing. */
3913 else if (aarch64_float_const_zero_rtx_p (x))
3914 {
3915 fputc ('0', f);
3916 break;
3917 }
3918 else if (aarch64_float_const_representable_p (x))
3919 {
3920#define buf_size 20
3921 char float_buf[buf_size] = {'\0'};
3922 REAL_VALUE_TYPE r;
3923 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3924 real_to_decimal_for_mode (float_buf, &r,
3925 buf_size, buf_size,
3926 1, GET_MODE (x));
3927 asm_fprintf (asm_out_file, "%s", float_buf);
3928 break;
3929#undef buf_size
3930 }
3931 output_operand_lossage ("invalid constant");
3932 return;
43e9d192
IB
3933 default:
3934 output_operand_lossage ("invalid operand");
3935 return;
3936 }
3937 break;
3938
3939 case 'A':
3940 if (GET_CODE (x) == HIGH)
3941 x = XEXP (x, 0);
3942
3943 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3944 {
3945 case SYMBOL_SMALL_GOT:
3946 asm_fprintf (asm_out_file, ":got:");
3947 break;
3948
3949 case SYMBOL_SMALL_TLSGD:
3950 asm_fprintf (asm_out_file, ":tlsgd:");
3951 break;
3952
3953 case SYMBOL_SMALL_TLSDESC:
3954 asm_fprintf (asm_out_file, ":tlsdesc:");
3955 break;
3956
3957 case SYMBOL_SMALL_GOTTPREL:
3958 asm_fprintf (asm_out_file, ":gottprel:");
3959 break;
3960
3961 case SYMBOL_SMALL_TPREL:
3962 asm_fprintf (asm_out_file, ":tprel:");
3963 break;
3964
87dd8ab0
MS
3965 case SYMBOL_TINY_GOT:
3966 gcc_unreachable ();
3967 break;
3968
43e9d192
IB
3969 default:
3970 break;
3971 }
3972 output_addr_const (asm_out_file, x);
3973 break;
3974
3975 case 'L':
3976 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3977 {
3978 case SYMBOL_SMALL_GOT:
3979 asm_fprintf (asm_out_file, ":lo12:");
3980 break;
3981
3982 case SYMBOL_SMALL_TLSGD:
3983 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3984 break;
3985
3986 case SYMBOL_SMALL_TLSDESC:
3987 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3988 break;
3989
3990 case SYMBOL_SMALL_GOTTPREL:
3991 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3992 break;
3993
3994 case SYMBOL_SMALL_TPREL:
3995 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3996 break;
3997
87dd8ab0
MS
3998 case SYMBOL_TINY_GOT:
3999 asm_fprintf (asm_out_file, ":got:");
4000 break;
4001
43e9d192
IB
4002 default:
4003 break;
4004 }
4005 output_addr_const (asm_out_file, x);
4006 break;
4007
4008 case 'G':
4009
4010 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4011 {
4012 case SYMBOL_SMALL_TPREL:
4013 asm_fprintf (asm_out_file, ":tprel_hi12:");
4014 break;
4015 default:
4016 break;
4017 }
4018 output_addr_const (asm_out_file, x);
4019 break;
4020
4021 default:
4022 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4023 return;
4024 }
4025}
4026
4027void
4028aarch64_print_operand_address (FILE *f, rtx x)
4029{
4030 struct aarch64_address_info addr;
4031
4032 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4033 MEM, true))
4034 switch (addr.type)
4035 {
4036 case ADDRESS_REG_IMM:
4037 if (addr.offset == const0_rtx)
01a3a324 4038 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
43e9d192 4039 else
16a3246f 4040 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
43e9d192
IB
4041 INTVAL (addr.offset));
4042 return;
4043
4044 case ADDRESS_REG_REG:
4045 if (addr.shift == 0)
16a3246f 4046 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
01a3a324 4047 reg_names [REGNO (addr.offset)]);
43e9d192 4048 else
16a3246f 4049 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
01a3a324 4050 reg_names [REGNO (addr.offset)], addr.shift);
43e9d192
IB
4051 return;
4052
4053 case ADDRESS_REG_UXTW:
4054 if (addr.shift == 0)
16a3246f 4055 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4056 REGNO (addr.offset) - R0_REGNUM);
4057 else
16a3246f 4058 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4059 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4060 return;
4061
4062 case ADDRESS_REG_SXTW:
4063 if (addr.shift == 0)
16a3246f 4064 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
43e9d192
IB
4065 REGNO (addr.offset) - R0_REGNUM);
4066 else
16a3246f 4067 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
43e9d192
IB
4068 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4069 return;
4070
4071 case ADDRESS_REG_WB:
4072 switch (GET_CODE (x))
4073 {
4074 case PRE_INC:
16a3246f 4075 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4076 GET_MODE_SIZE (aarch64_memory_reference_mode));
4077 return;
4078 case POST_INC:
16a3246f 4079 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
43e9d192
IB
4080 GET_MODE_SIZE (aarch64_memory_reference_mode));
4081 return;
4082 case PRE_DEC:
16a3246f 4083 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4084 GET_MODE_SIZE (aarch64_memory_reference_mode));
4085 return;
4086 case POST_DEC:
16a3246f 4087 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
43e9d192
IB
4088 GET_MODE_SIZE (aarch64_memory_reference_mode));
4089 return;
4090 case PRE_MODIFY:
16a3246f 4091 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
43e9d192
IB
4092 INTVAL (addr.offset));
4093 return;
4094 case POST_MODIFY:
16a3246f 4095 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
43e9d192
IB
4096 INTVAL (addr.offset));
4097 return;
4098 default:
4099 break;
4100 }
4101 break;
4102
4103 case ADDRESS_LO_SUM:
16a3246f 4104 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
43e9d192
IB
4105 output_addr_const (f, addr.offset);
4106 asm_fprintf (f, "]");
4107 return;
4108
4109 case ADDRESS_SYMBOLIC:
4110 break;
4111 }
4112
4113 output_addr_const (f, x);
4114}
4115
43e9d192
IB
4116bool
4117aarch64_label_mentioned_p (rtx x)
4118{
4119 const char *fmt;
4120 int i;
4121
4122 if (GET_CODE (x) == LABEL_REF)
4123 return true;
4124
4125 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4126 referencing instruction, but they are constant offsets, not
4127 symbols. */
4128 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4129 return false;
4130
4131 fmt = GET_RTX_FORMAT (GET_CODE (x));
4132 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4133 {
4134 if (fmt[i] == 'E')
4135 {
4136 int j;
4137
4138 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4139 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4140 return 1;
4141 }
4142 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4143 return 1;
4144 }
4145
4146 return 0;
4147}
4148
4149/* Implement REGNO_REG_CLASS. */
4150
4151enum reg_class
4152aarch64_regno_regclass (unsigned regno)
4153{
4154 if (GP_REGNUM_P (regno))
a4a182c6 4155 return GENERAL_REGS;
43e9d192
IB
4156
4157 if (regno == SP_REGNUM)
4158 return STACK_REG;
4159
4160 if (regno == FRAME_POINTER_REGNUM
4161 || regno == ARG_POINTER_REGNUM)
f24bb080 4162 return POINTER_REGS;
43e9d192
IB
4163
4164 if (FP_REGNUM_P (regno))
4165 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4166
4167 return NO_REGS;
4168}
4169
4170/* Try a machine-dependent way of reloading an illegitimate address
4171 operand. If we find one, push the reload and return the new rtx. */
4172
4173rtx
4174aarch64_legitimize_reload_address (rtx *x_p,
4175 enum machine_mode mode,
4176 int opnum, int type,
4177 int ind_levels ATTRIBUTE_UNUSED)
4178{
4179 rtx x = *x_p;
4180
348d4b0a
BC
4181 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4182 if (aarch64_vect_struct_mode_p (mode)
43e9d192
IB
4183 && GET_CODE (x) == PLUS
4184 && REG_P (XEXP (x, 0))
4185 && CONST_INT_P (XEXP (x, 1)))
4186 {
4187 rtx orig_rtx = x;
4188 x = copy_rtx (x);
4189 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4190 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4191 opnum, (enum reload_type) type);
4192 return x;
4193 }
4194
4195 /* We must recognize output that we have already generated ourselves. */
4196 if (GET_CODE (x) == PLUS
4197 && GET_CODE (XEXP (x, 0)) == PLUS
4198 && REG_P (XEXP (XEXP (x, 0), 0))
4199 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4200 && CONST_INT_P (XEXP (x, 1)))
4201 {
4202 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4203 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4204 opnum, (enum reload_type) type);
4205 return x;
4206 }
4207
4208 /* We wish to handle large displacements off a base register by splitting
4209 the addend across an add and the mem insn. This can cut the number of
4210 extra insns needed from 3 to 1. It is only useful for load/store of a
4211 single register with 12 bit offset field. */
4212 if (GET_CODE (x) == PLUS
4213 && REG_P (XEXP (x, 0))
4214 && CONST_INT_P (XEXP (x, 1))
4215 && HARD_REGISTER_P (XEXP (x, 0))
4216 && mode != TImode
4217 && mode != TFmode
4218 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4219 {
4220 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4221 HOST_WIDE_INT low = val & 0xfff;
4222 HOST_WIDE_INT high = val - low;
4223 HOST_WIDE_INT offs;
4224 rtx cst;
28514dda
YZ
4225 enum machine_mode xmode = GET_MODE (x);
4226
4227 /* In ILP32, xmode can be either DImode or SImode. */
4228 gcc_assert (xmode == DImode || xmode == SImode);
43e9d192
IB
4229
4230 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4231 BLKmode alignment. */
4232 if (GET_MODE_SIZE (mode) == 0)
4233 return NULL_RTX;
4234
4235 offs = low % GET_MODE_SIZE (mode);
4236
4237 /* Align misaligned offset by adjusting high part to compensate. */
4238 if (offs != 0)
4239 {
4240 if (aarch64_uimm12_shift (high + offs))
4241 {
4242 /* Align down. */
4243 low = low - offs;
4244 high = high + offs;
4245 }
4246 else
4247 {
4248 /* Align up. */
4249 offs = GET_MODE_SIZE (mode) - offs;
4250 low = low + offs;
4251 high = high + (low & 0x1000) - offs;
4252 low &= 0xfff;
4253 }
4254 }
4255
4256 /* Check for overflow. */
4257 if (high + low != val)
4258 return NULL_RTX;
4259
4260 cst = GEN_INT (high);
4261 if (!aarch64_uimm12_shift (high))
28514dda 4262 cst = force_const_mem (xmode, cst);
43e9d192
IB
4263
4264 /* Reload high part into base reg, leaving the low part
acf67b8e
KT
4265 in the mem instruction.
4266 Note that replacing this gen_rtx_PLUS with plus_constant is
4267 wrong in this case because we rely on the
4268 (plus (plus reg c1) c2) structure being preserved so that
4269 XEXP (*p, 0) in push_reload below uses the correct term. */
4d0e5b54
JG
4270 x = gen_rtx_PLUS (xmode,
4271 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4272 GEN_INT (low));
43e9d192
IB
4273
4274 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
28514dda 4275 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
43e9d192
IB
4276 opnum, (enum reload_type) type);
4277 return x;
4278 }
4279
4280 return NULL_RTX;
4281}
4282
4283
4284static reg_class_t
4285aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4286 reg_class_t rclass,
4287 enum machine_mode mode,
4288 secondary_reload_info *sri)
4289{
43e9d192
IB
4290 /* Without the TARGET_SIMD instructions we cannot move a Q register
4291 to a Q register directly. We need a scratch. */
4292 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4293 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4294 && reg_class_subset_p (rclass, FP_REGS))
4295 {
4296 if (mode == TFmode)
4297 sri->icode = CODE_FOR_aarch64_reload_movtf;
4298 else if (mode == TImode)
4299 sri->icode = CODE_FOR_aarch64_reload_movti;
4300 return NO_REGS;
4301 }
4302
4303 /* A TFmode or TImode memory access should be handled via an FP_REGS
4304 because AArch64 has richer addressing modes for LDR/STR instructions
4305 than LDP/STP instructions. */
a4a182c6 4306 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
43e9d192
IB
4307 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4308 return FP_REGS;
4309
4310 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
a4a182c6 4311 return GENERAL_REGS;
43e9d192
IB
4312
4313 return NO_REGS;
4314}
4315
4316static bool
4317aarch64_can_eliminate (const int from, const int to)
4318{
4319 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4320 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4321
4322 if (frame_pointer_needed)
4323 {
4324 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4325 return true;
4326 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4327 return false;
4328 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4329 && !cfun->calls_alloca)
4330 return true;
4331 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4332 return true;
0b7f8166
MS
4333
4334 return false;
43e9d192 4335 }
777e6976 4336
43e9d192
IB
4337 return true;
4338}
4339
4340HOST_WIDE_INT
4341aarch64_initial_elimination_offset (unsigned from, unsigned to)
4342{
43e9d192 4343 aarch64_layout_frame ();
78c29983
MS
4344
4345 if (to == HARD_FRAME_POINTER_REGNUM)
4346 {
4347 if (from == ARG_POINTER_REGNUM)
1c960e02 4348 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
78c29983
MS
4349
4350 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4351 return (cfun->machine->frame.hard_fp_offset
4352 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4353 }
4354
4355 if (to == STACK_POINTER_REGNUM)
4356 {
4357 if (from == FRAME_POINTER_REGNUM)
1c960e02
MS
4358 return (cfun->machine->frame.frame_size
4359 - cfun->machine->frame.saved_varargs_size);
78c29983
MS
4360 }
4361
1c960e02 4362 return cfun->machine->frame.frame_size;
43e9d192
IB
4363}
4364
43e9d192
IB
4365/* Implement RETURN_ADDR_RTX. We do not support moving back to a
4366 previous frame. */
4367
4368rtx
4369aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4370{
4371 if (count != 0)
4372 return const0_rtx;
4373 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4374}
4375
4376
4377static void
4378aarch64_asm_trampoline_template (FILE *f)
4379{
28514dda
YZ
4380 if (TARGET_ILP32)
4381 {
4382 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4383 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4384 }
4385 else
4386 {
4387 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4388 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4389 }
01a3a324 4390 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
43e9d192 4391 assemble_aligned_integer (4, const0_rtx);
28514dda
YZ
4392 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4393 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
43e9d192
IB
4394}
4395
4396static void
4397aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4398{
4399 rtx fnaddr, mem, a_tramp;
28514dda 4400 const int tramp_code_sz = 16;
43e9d192
IB
4401
4402 /* Don't need to copy the trailing D-words, we fill those in below. */
4403 emit_block_move (m_tramp, assemble_trampoline_template (),
28514dda
YZ
4404 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4405 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
43e9d192 4406 fnaddr = XEXP (DECL_RTL (fndecl), 0);
28514dda
YZ
4407 if (GET_MODE (fnaddr) != ptr_mode)
4408 fnaddr = convert_memory_address (ptr_mode, fnaddr);
43e9d192
IB
4409 emit_move_insn (mem, fnaddr);
4410
28514dda 4411 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
43e9d192
IB
4412 emit_move_insn (mem, chain_value);
4413
4414 /* XXX We should really define a "clear_cache" pattern and use
4415 gen_clear_cache(). */
4416 a_tramp = XEXP (m_tramp, 0);
4417 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
28514dda
YZ
4418 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4419 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4420 ptr_mode);
43e9d192
IB
4421}
4422
4423static unsigned char
4424aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4425{
4426 switch (regclass)
4427 {
fee9ba42 4428 case CALLER_SAVE_REGS:
43e9d192
IB
4429 case POINTER_REGS:
4430 case GENERAL_REGS:
4431 case ALL_REGS:
4432 case FP_REGS:
4433 case FP_LO_REGS:
4434 return
4435 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
aef66c94 4436 (GET_MODE_SIZE (mode) + 7) / 8;
43e9d192
IB
4437 case STACK_REG:
4438 return 1;
4439
4440 case NO_REGS:
4441 return 0;
4442
4443 default:
4444 break;
4445 }
4446 gcc_unreachable ();
4447}
4448
4449static reg_class_t
78d8b9f0 4450aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
43e9d192 4451{
51bb310d 4452 if (regclass == POINTER_REGS)
78d8b9f0
IB
4453 return GENERAL_REGS;
4454
51bb310d
MS
4455 if (regclass == STACK_REG)
4456 {
4457 if (REG_P(x)
4458 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4459 return regclass;
4460
4461 return NO_REGS;
4462 }
4463
78d8b9f0
IB
4464 /* If it's an integer immediate that MOVI can't handle, then
4465 FP_REGS is not an option, so we return NO_REGS instead. */
4466 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4467 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4468 return NO_REGS;
4469
27bd251b
IB
4470 /* Register eliminiation can result in a request for
4471 SP+constant->FP_REGS. We cannot support such operations which
4472 use SP as source and an FP_REG as destination, so reject out
4473 right now. */
4474 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4475 {
4476 rtx lhs = XEXP (x, 0);
4477
4478 /* Look through a possible SUBREG introduced by ILP32. */
4479 if (GET_CODE (lhs) == SUBREG)
4480 lhs = SUBREG_REG (lhs);
4481
4482 gcc_assert (REG_P (lhs));
4483 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4484 POINTER_REGS));
4485 return NO_REGS;
4486 }
4487
78d8b9f0 4488 return regclass;
43e9d192
IB
4489}
4490
4491void
4492aarch64_asm_output_labelref (FILE* f, const char *name)
4493{
4494 asm_fprintf (f, "%U%s", name);
4495}
4496
4497static void
4498aarch64_elf_asm_constructor (rtx symbol, int priority)
4499{
4500 if (priority == DEFAULT_INIT_PRIORITY)
4501 default_ctor_section_asm_out_constructor (symbol, priority);
4502 else
4503 {
4504 section *s;
4505 char buf[18];
4506 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4507 s = get_section (buf, SECTION_WRITE, NULL);
4508 switch_to_section (s);
4509 assemble_align (POINTER_SIZE);
28514dda 4510 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4511 }
4512}
4513
4514static void
4515aarch64_elf_asm_destructor (rtx symbol, int priority)
4516{
4517 if (priority == DEFAULT_INIT_PRIORITY)
4518 default_dtor_section_asm_out_destructor (symbol, priority);
4519 else
4520 {
4521 section *s;
4522 char buf[18];
4523 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4524 s = get_section (buf, SECTION_WRITE, NULL);
4525 switch_to_section (s);
4526 assemble_align (POINTER_SIZE);
28514dda 4527 assemble_aligned_integer (POINTER_BYTES, symbol);
43e9d192
IB
4528 }
4529}
4530
4531const char*
4532aarch64_output_casesi (rtx *operands)
4533{
4534 char buf[100];
4535 char label[100];
b32d5189 4536 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
43e9d192
IB
4537 int index;
4538 static const char *const patterns[4][2] =
4539 {
4540 {
4541 "ldrb\t%w3, [%0,%w1,uxtw]",
4542 "add\t%3, %4, %w3, sxtb #2"
4543 },
4544 {
4545 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4546 "add\t%3, %4, %w3, sxth #2"
4547 },
4548 {
4549 "ldr\t%w3, [%0,%w1,uxtw #2]",
4550 "add\t%3, %4, %w3, sxtw #2"
4551 },
4552 /* We assume that DImode is only generated when not optimizing and
4553 that we don't really need 64-bit address offsets. That would
4554 imply an object file with 8GB of code in a single function! */
4555 {
4556 "ldr\t%w3, [%0,%w1,uxtw #2]",
4557 "add\t%3, %4, %w3, sxtw #2"
4558 }
4559 };
4560
4561 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4562
4563 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4564
4565 gcc_assert (index >= 0 && index <= 3);
4566
4567 /* Need to implement table size reduction, by chaning the code below. */
4568 output_asm_insn (patterns[index][0], operands);
4569 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4570 snprintf (buf, sizeof (buf),
4571 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4572 output_asm_insn (buf, operands);
4573 output_asm_insn (patterns[index][1], operands);
4574 output_asm_insn ("br\t%3", operands);
4575 assemble_label (asm_out_file, label);
4576 return "";
4577}
4578
4579
4580/* Return size in bits of an arithmetic operand which is shifted/scaled and
4581 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4582 operator. */
4583
4584int
4585aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4586{
4587 if (shift >= 0 && shift <= 3)
4588 {
4589 int size;
4590 for (size = 8; size <= 32; size *= 2)
4591 {
4592 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4593 if (mask == bits << shift)
4594 return size;
4595 }
4596 }
4597 return 0;
4598}
4599
4600static bool
4601aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4602 const_rtx x ATTRIBUTE_UNUSED)
4603{
4604 /* We can't use blocks for constants when we're using a per-function
4605 constant pool. */
4606 return false;
4607}
4608
4609static section *
4610aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4611 rtx x ATTRIBUTE_UNUSED,
4612 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4613{
4614 /* Force all constant pool entries into the current function section. */
4615 return function_section (current_function_decl);
4616}
4617
4618
4619/* Costs. */
4620
4621/* Helper function for rtx cost calculation. Strip a shift expression
4622 from X. Returns the inner operand if successful, or the original
4623 expression on failure. */
4624static rtx
4625aarch64_strip_shift (rtx x)
4626{
4627 rtx op = x;
4628
57b77d46
RE
4629 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4630 we can convert both to ROR during final output. */
43e9d192
IB
4631 if ((GET_CODE (op) == ASHIFT
4632 || GET_CODE (op) == ASHIFTRT
57b77d46
RE
4633 || GET_CODE (op) == LSHIFTRT
4634 || GET_CODE (op) == ROTATERT
4635 || GET_CODE (op) == ROTATE)
43e9d192
IB
4636 && CONST_INT_P (XEXP (op, 1)))
4637 return XEXP (op, 0);
4638
4639 if (GET_CODE (op) == MULT
4640 && CONST_INT_P (XEXP (op, 1))
4641 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4642 return XEXP (op, 0);
4643
4644 return x;
4645}
4646
4745e701 4647/* Helper function for rtx cost calculation. Strip an extend
43e9d192
IB
4648 expression from X. Returns the inner operand if successful, or the
4649 original expression on failure. We deal with a number of possible
4650 canonicalization variations here. */
4651static rtx
4745e701 4652aarch64_strip_extend (rtx x)
43e9d192
IB
4653{
4654 rtx op = x;
4655
4656 /* Zero and sign extraction of a widened value. */
4657 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4658 && XEXP (op, 2) == const0_rtx
4745e701 4659 && GET_CODE (XEXP (op, 0)) == MULT
43e9d192
IB
4660 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4661 XEXP (op, 1)))
4662 return XEXP (XEXP (op, 0), 0);
4663
4664 /* It can also be represented (for zero-extend) as an AND with an
4665 immediate. */
4666 if (GET_CODE (op) == AND
4667 && GET_CODE (XEXP (op, 0)) == MULT
4668 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4669 && CONST_INT_P (XEXP (op, 1))
4670 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4671 INTVAL (XEXP (op, 1))) != 0)
4672 return XEXP (XEXP (op, 0), 0);
4673
4674 /* Now handle extended register, as this may also have an optional
4675 left shift by 1..4. */
4676 if (GET_CODE (op) == ASHIFT
4677 && CONST_INT_P (XEXP (op, 1))
4678 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4679 op = XEXP (op, 0);
4680
4681 if (GET_CODE (op) == ZERO_EXTEND
4682 || GET_CODE (op) == SIGN_EXTEND)
4683 op = XEXP (op, 0);
4684
4685 if (op != x)
4686 return op;
4687
4745e701
JG
4688 return x;
4689}
4690
4691/* Helper function for rtx cost calculation. Calculate the cost of
4692 a MULT, which may be part of a multiply-accumulate rtx. Return
4693 the calculated cost of the expression, recursing manually in to
4694 operands where needed. */
4695
4696static int
4697aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4698{
4699 rtx op0, op1;
4700 const struct cpu_cost_table *extra_cost
4701 = aarch64_tune_params->insn_extra_cost;
4702 int cost = 0;
4703 bool maybe_fma = (outer == PLUS || outer == MINUS);
4704 enum machine_mode mode = GET_MODE (x);
4705
4706 gcc_checking_assert (code == MULT);
4707
4708 op0 = XEXP (x, 0);
4709 op1 = XEXP (x, 1);
4710
4711 if (VECTOR_MODE_P (mode))
4712 mode = GET_MODE_INNER (mode);
4713
4714 /* Integer multiply/fma. */
4715 if (GET_MODE_CLASS (mode) == MODE_INT)
4716 {
4717 /* The multiply will be canonicalized as a shift, cost it as such. */
4718 if (CONST_INT_P (op1)
4719 && exact_log2 (INTVAL (op1)) > 0)
4720 {
4721 if (speed)
4722 {
4723 if (maybe_fma)
4724 /* ADD (shifted register). */
4725 cost += extra_cost->alu.arith_shift;
4726 else
4727 /* LSL (immediate). */
4728 cost += extra_cost->alu.shift;
4729 }
4730
4731 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4732
4733 return cost;
4734 }
4735
4736 /* Integer multiplies or FMAs have zero/sign extending variants. */
4737 if ((GET_CODE (op0) == ZERO_EXTEND
4738 && GET_CODE (op1) == ZERO_EXTEND)
4739 || (GET_CODE (op0) == SIGN_EXTEND
4740 && GET_CODE (op1) == SIGN_EXTEND))
4741 {
4742 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4743 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4744
4745 if (speed)
4746 {
4747 if (maybe_fma)
4748 /* MADD/SMADDL/UMADDL. */
4749 cost += extra_cost->mult[0].extend_add;
4750 else
4751 /* MUL/SMULL/UMULL. */
4752 cost += extra_cost->mult[0].extend;
4753 }
4754
4755 return cost;
4756 }
4757
4758 /* This is either an integer multiply or an FMA. In both cases
4759 we want to recurse and cost the operands. */
4760 cost += rtx_cost (op0, MULT, 0, speed)
4761 + rtx_cost (op1, MULT, 1, speed);
4762
4763 if (speed)
4764 {
4765 if (maybe_fma)
4766 /* MADD. */
4767 cost += extra_cost->mult[mode == DImode].add;
4768 else
4769 /* MUL. */
4770 cost += extra_cost->mult[mode == DImode].simple;
4771 }
4772
4773 return cost;
4774 }
4775 else
4776 {
4777 if (speed)
4778 {
3d840f7d 4779 /* Floating-point FMA/FMUL can also support negations of the
4745e701
JG
4780 operands. */
4781 if (GET_CODE (op0) == NEG)
3d840f7d 4782 op0 = XEXP (op0, 0);
4745e701 4783 if (GET_CODE (op1) == NEG)
3d840f7d 4784 op1 = XEXP (op1, 0);
4745e701
JG
4785
4786 if (maybe_fma)
4787 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4788 cost += extra_cost->fp[mode == DFmode].fma;
4789 else
3d840f7d 4790 /* FMUL/FNMUL. */
4745e701
JG
4791 cost += extra_cost->fp[mode == DFmode].mult;
4792 }
4793
4794 cost += rtx_cost (op0, MULT, 0, speed)
4795 + rtx_cost (op1, MULT, 1, speed);
4796 return cost;
4797 }
43e9d192
IB
4798}
4799
67747367
JG
4800static int
4801aarch64_address_cost (rtx x,
4802 enum machine_mode mode,
4803 addr_space_t as ATTRIBUTE_UNUSED,
4804 bool speed)
4805{
4806 enum rtx_code c = GET_CODE (x);
4807 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4808 struct aarch64_address_info info;
4809 int cost = 0;
4810 info.shift = 0;
4811
4812 if (!aarch64_classify_address (&info, x, mode, c, false))
4813 {
4814 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4815 {
4816 /* This is a CONST or SYMBOL ref which will be split
4817 in a different way depending on the code model in use.
4818 Cost it through the generic infrastructure. */
4819 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4820 /* Divide through by the cost of one instruction to
4821 bring it to the same units as the address costs. */
4822 cost_symbol_ref /= COSTS_N_INSNS (1);
4823 /* The cost is then the cost of preparing the address,
4824 followed by an immediate (possibly 0) offset. */
4825 return cost_symbol_ref + addr_cost->imm_offset;
4826 }
4827 else
4828 {
4829 /* This is most likely a jump table from a case
4830 statement. */
4831 return addr_cost->register_offset;
4832 }
4833 }
4834
4835 switch (info.type)
4836 {
4837 case ADDRESS_LO_SUM:
4838 case ADDRESS_SYMBOLIC:
4839 case ADDRESS_REG_IMM:
4840 cost += addr_cost->imm_offset;
4841 break;
4842
4843 case ADDRESS_REG_WB:
4844 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4845 cost += addr_cost->pre_modify;
4846 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4847 cost += addr_cost->post_modify;
4848 else
4849 gcc_unreachable ();
4850
4851 break;
4852
4853 case ADDRESS_REG_REG:
4854 cost += addr_cost->register_offset;
4855 break;
4856
4857 case ADDRESS_REG_UXTW:
4858 case ADDRESS_REG_SXTW:
4859 cost += addr_cost->register_extend;
4860 break;
4861
4862 default:
4863 gcc_unreachable ();
4864 }
4865
4866
4867 if (info.shift > 0)
4868 {
4869 /* For the sake of calculating the cost of the shifted register
4870 component, we can treat same sized modes in the same way. */
4871 switch (GET_MODE_BITSIZE (mode))
4872 {
4873 case 16:
4874 cost += addr_cost->addr_scale_costs.hi;
4875 break;
4876
4877 case 32:
4878 cost += addr_cost->addr_scale_costs.si;
4879 break;
4880
4881 case 64:
4882 cost += addr_cost->addr_scale_costs.di;
4883 break;
4884
4885 /* We can't tell, or this is a 128-bit vector. */
4886 default:
4887 cost += addr_cost->addr_scale_costs.ti;
4888 break;
4889 }
4890 }
4891
4892 return cost;
4893}
4894
7cc2145f
JG
4895/* Return true if the RTX X in mode MODE is a zero or sign extract
4896 usable in an ADD or SUB (extended register) instruction. */
4897static bool
4898aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4899{
4900 /* Catch add with a sign extract.
4901 This is add_<optab><mode>_multp2. */
4902 if (GET_CODE (x) == SIGN_EXTRACT
4903 || GET_CODE (x) == ZERO_EXTRACT)
4904 {
4905 rtx op0 = XEXP (x, 0);
4906 rtx op1 = XEXP (x, 1);
4907 rtx op2 = XEXP (x, 2);
4908
4909 if (GET_CODE (op0) == MULT
4910 && CONST_INT_P (op1)
4911 && op2 == const0_rtx
4912 && CONST_INT_P (XEXP (op0, 1))
4913 && aarch64_is_extend_from_extract (mode,
4914 XEXP (op0, 1),
4915 op1))
4916 {
4917 return true;
4918 }
4919 }
4920
4921 return false;
4922}
4923
61263118
KT
4924static bool
4925aarch64_frint_unspec_p (unsigned int u)
4926{
4927 switch (u)
4928 {
4929 case UNSPEC_FRINTZ:
4930 case UNSPEC_FRINTP:
4931 case UNSPEC_FRINTM:
4932 case UNSPEC_FRINTA:
4933 case UNSPEC_FRINTN:
4934 case UNSPEC_FRINTX:
4935 case UNSPEC_FRINTI:
4936 return true;
4937
4938 default:
4939 return false;
4940 }
4941}
4942
2d5ffe46
AP
4943/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4944 storing it in *COST. Result is true if the total cost of the operation
4945 has now been calculated. */
4946static bool
4947aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4948{
b9e3afe9
AP
4949 rtx inner;
4950 rtx comparator;
4951 enum rtx_code cmpcode;
4952
4953 if (COMPARISON_P (op0))
4954 {
4955 inner = XEXP (op0, 0);
4956 comparator = XEXP (op0, 1);
4957 cmpcode = GET_CODE (op0);
4958 }
4959 else
4960 {
4961 inner = op0;
4962 comparator = const0_rtx;
4963 cmpcode = NE;
4964 }
4965
2d5ffe46
AP
4966 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4967 {
4968 /* Conditional branch. */
b9e3afe9 4969 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
4970 return true;
4971 else
4972 {
b9e3afe9 4973 if (cmpcode == NE || cmpcode == EQ)
2d5ffe46 4974 {
2d5ffe46
AP
4975 if (comparator == const0_rtx)
4976 {
4977 /* TBZ/TBNZ/CBZ/CBNZ. */
4978 if (GET_CODE (inner) == ZERO_EXTRACT)
4979 /* TBZ/TBNZ. */
4980 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4981 0, speed);
4982 else
4983 /* CBZ/CBNZ. */
b9e3afe9 4984 *cost += rtx_cost (inner, cmpcode, 0, speed);
2d5ffe46
AP
4985
4986 return true;
4987 }
4988 }
b9e3afe9 4989 else if (cmpcode == LT || cmpcode == GE)
2d5ffe46 4990 {
2d5ffe46
AP
4991 /* TBZ/TBNZ. */
4992 if (comparator == const0_rtx)
4993 return true;
4994 }
4995 }
4996 }
b9e3afe9 4997 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
2d5ffe46
AP
4998 {
4999 /* It's a conditional operation based on the status flags,
5000 so it must be some flavor of CSEL. */
5001
5002 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5003 if (GET_CODE (op1) == NEG
5004 || GET_CODE (op1) == NOT
5005 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5006 op1 = XEXP (op1, 0);
5007
5008 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5009 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5010 return true;
5011 }
5012
5013 /* We don't know what this is, cost all operands. */
5014 return false;
5015}
5016
43e9d192
IB
5017/* Calculate the cost of calculating X, storing it in *COST. Result
5018 is true if the total cost of the operation has now been calculated. */
5019static bool
5020aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5021 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5022{
a8eecd00 5023 rtx op0, op1, op2;
73250c4c 5024 const struct cpu_cost_table *extra_cost
43e9d192 5025 = aarch64_tune_params->insn_extra_cost;
9dfc162c 5026 enum machine_mode mode = GET_MODE (x);
43e9d192 5027
7fc5ef02
JG
5028 /* By default, assume that everything has equivalent cost to the
5029 cheapest instruction. Any additional costs are applied as a delta
5030 above this default. */
5031 *cost = COSTS_N_INSNS (1);
5032
5033 /* TODO: The cost infrastructure currently does not handle
5034 vector operations. Assume that all vector operations
5035 are equally expensive. */
5036 if (VECTOR_MODE_P (mode))
5037 {
5038 if (speed)
5039 *cost += extra_cost->vect.alu;
5040 return true;
5041 }
5042
43e9d192
IB
5043 switch (code)
5044 {
5045 case SET:
ba123b0d
JG
5046 /* The cost depends entirely on the operands to SET. */
5047 *cost = 0;
43e9d192
IB
5048 op0 = SET_DEST (x);
5049 op1 = SET_SRC (x);
5050
5051 switch (GET_CODE (op0))
5052 {
5053 case MEM:
5054 if (speed)
2961177e
JG
5055 {
5056 rtx address = XEXP (op0, 0);
5057 if (GET_MODE_CLASS (mode) == MODE_INT)
5058 *cost += extra_cost->ldst.store;
5059 else if (mode == SFmode)
5060 *cost += extra_cost->ldst.storef;
5061 else if (mode == DFmode)
5062 *cost += extra_cost->ldst.stored;
5063
5064 *cost +=
5065 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5066 0, speed));
5067 }
43e9d192 5068
ba123b0d 5069 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5070 return true;
5071
5072 case SUBREG:
5073 if (! REG_P (SUBREG_REG (op0)))
5074 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
ba123b0d 5075
43e9d192
IB
5076 /* Fall through. */
5077 case REG:
ba123b0d
JG
5078 /* const0_rtx is in general free, but we will use an
5079 instruction to set a register to 0. */
5080 if (REG_P (op1) || op1 == const0_rtx)
5081 {
5082 /* The cost is 1 per register copied. */
5083 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5084 / UNITS_PER_WORD;
5085 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5086 }
5087 else
5088 /* Cost is just the cost of the RHS of the set. */
5089 *cost += rtx_cost (op1, SET, 1, speed);
43e9d192
IB
5090 return true;
5091
ba123b0d 5092 case ZERO_EXTRACT:
43e9d192 5093 case SIGN_EXTRACT:
ba123b0d
JG
5094 /* Bit-field insertion. Strip any redundant widening of
5095 the RHS to meet the width of the target. */
43e9d192
IB
5096 if (GET_CODE (op1) == SUBREG)
5097 op1 = SUBREG_REG (op1);
5098 if ((GET_CODE (op1) == ZERO_EXTEND
5099 || GET_CODE (op1) == SIGN_EXTEND)
4aa81c2e 5100 && CONST_INT_P (XEXP (op0, 1))
43e9d192
IB
5101 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5102 >= INTVAL (XEXP (op0, 1))))
5103 op1 = XEXP (op1, 0);
ba123b0d
JG
5104
5105 if (CONST_INT_P (op1))
5106 {
5107 /* MOV immediate is assumed to always be cheap. */
5108 *cost = COSTS_N_INSNS (1);
5109 }
5110 else
5111 {
5112 /* BFM. */
5113 if (speed)
5114 *cost += extra_cost->alu.bfi;
5115 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5116 }
5117
43e9d192
IB
5118 return true;
5119
5120 default:
ba123b0d
JG
5121 /* We can't make sense of this, assume default cost. */
5122 *cost = COSTS_N_INSNS (1);
61263118 5123 return false;
43e9d192
IB
5124 }
5125 return false;
5126
9dfc162c
JG
5127 case CONST_INT:
5128 /* If an instruction can incorporate a constant within the
5129 instruction, the instruction's expression avoids calling
5130 rtx_cost() on the constant. If rtx_cost() is called on a
5131 constant, then it is usually because the constant must be
5132 moved into a register by one or more instructions.
5133
5134 The exception is constant 0, which can be expressed
5135 as XZR/WZR and is therefore free. The exception to this is
5136 if we have (set (reg) (const0_rtx)) in which case we must cost
5137 the move. However, we can catch that when we cost the SET, so
5138 we don't need to consider that here. */
5139 if (x == const0_rtx)
5140 *cost = 0;
5141 else
5142 {
5143 /* To an approximation, building any other constant is
5144 proportionally expensive to the number of instructions
5145 required to build that constant. This is true whether we
5146 are compiling for SPEED or otherwise. */
5147 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5148 INTVAL (x),
5149 false));
5150 }
5151 return true;
5152
5153 case CONST_DOUBLE:
5154 if (speed)
5155 {
5156 /* mov[df,sf]_aarch64. */
5157 if (aarch64_float_const_representable_p (x))
5158 /* FMOV (scalar immediate). */
5159 *cost += extra_cost->fp[mode == DFmode].fpconst;
5160 else if (!aarch64_float_const_zero_rtx_p (x))
5161 {
5162 /* This will be a load from memory. */
5163 if (mode == DFmode)
5164 *cost += extra_cost->ldst.loadd;
5165 else
5166 *cost += extra_cost->ldst.loadf;
5167 }
5168 else
5169 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5170 or MOV v0.s[0], wzr - neither of which are modeled by the
5171 cost tables. Just use the default cost. */
5172 {
5173 }
5174 }
5175
5176 return true;
5177
43e9d192
IB
5178 case MEM:
5179 if (speed)
2961177e
JG
5180 {
5181 /* For loads we want the base cost of a load, plus an
5182 approximation for the additional cost of the addressing
5183 mode. */
5184 rtx address = XEXP (x, 0);
5185 if (GET_MODE_CLASS (mode) == MODE_INT)
5186 *cost += extra_cost->ldst.load;
5187 else if (mode == SFmode)
5188 *cost += extra_cost->ldst.loadf;
5189 else if (mode == DFmode)
5190 *cost += extra_cost->ldst.loadd;
5191
5192 *cost +=
5193 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5194 0, speed));
5195 }
43e9d192
IB
5196
5197 return true;
5198
5199 case NEG:
4745e701
JG
5200 op0 = XEXP (x, 0);
5201
5202 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5203 {
5204 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5205 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5206 {
5207 /* CSETM. */
5208 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5209 return true;
5210 }
5211
5212 /* Cost this as SUB wzr, X. */
5213 op0 = CONST0_RTX (GET_MODE (x));
5214 op1 = XEXP (x, 0);
5215 goto cost_minus;
5216 }
5217
5218 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5219 {
5220 /* Support (neg(fma...)) as a single instruction only if
5221 sign of zeros is unimportant. This matches the decision
5222 making in aarch64.md. */
5223 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5224 {
5225 /* FNMADD. */
5226 *cost = rtx_cost (op0, NEG, 0, speed);
5227 return true;
5228 }
5229 if (speed)
5230 /* FNEG. */
5231 *cost += extra_cost->fp[mode == DFmode].neg;
5232 return false;
5233 }
5234
5235 return false;
43e9d192 5236
781aeb73
KT
5237 case CLRSB:
5238 case CLZ:
5239 if (speed)
5240 *cost += extra_cost->alu.clz;
5241
5242 return false;
5243
43e9d192
IB
5244 case COMPARE:
5245 op0 = XEXP (x, 0);
5246 op1 = XEXP (x, 1);
5247
5248 if (op1 == const0_rtx
5249 && GET_CODE (op0) == AND)
5250 {
5251 x = op0;
5252 goto cost_logic;
5253 }
5254
a8eecd00
JG
5255 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5256 {
5257 /* TODO: A write to the CC flags possibly costs extra, this
5258 needs encoding in the cost tables. */
5259
5260 /* CC_ZESWPmode supports zero extend for free. */
5261 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5262 op0 = XEXP (op0, 0);
5263
5264 /* ANDS. */
5265 if (GET_CODE (op0) == AND)
5266 {
5267 x = op0;
5268 goto cost_logic;
5269 }
5270
5271 if (GET_CODE (op0) == PLUS)
5272 {
5273 /* ADDS (and CMN alias). */
5274 x = op0;
5275 goto cost_plus;
5276 }
5277
5278 if (GET_CODE (op0) == MINUS)
5279 {
5280 /* SUBS. */
5281 x = op0;
5282 goto cost_minus;
5283 }
5284
5285 if (GET_CODE (op1) == NEG)
5286 {
5287 /* CMN. */
5288 if (speed)
5289 *cost += extra_cost->alu.arith;
5290
5291 *cost += rtx_cost (op0, COMPARE, 0, speed);
5292 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5293 return true;
5294 }
5295
5296 /* CMP.
5297
5298 Compare can freely swap the order of operands, and
5299 canonicalization puts the more complex operation first.
5300 But the integer MINUS logic expects the shift/extend
5301 operation in op1. */
5302 if (! (REG_P (op0)
5303 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5304 {
5305 op0 = XEXP (x, 1);
5306 op1 = XEXP (x, 0);
5307 }
5308 goto cost_minus;
5309 }
5310
5311 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5312 {
5313 /* FCMP. */
5314 if (speed)
5315 *cost += extra_cost->fp[mode == DFmode].compare;
5316
5317 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5318 {
5319 /* FCMP supports constant 0.0 for no extra cost. */
5320 return true;
5321 }
5322 return false;
5323 }
5324
5325 return false;
43e9d192
IB
5326
5327 case MINUS:
4745e701
JG
5328 {
5329 op0 = XEXP (x, 0);
5330 op1 = XEXP (x, 1);
5331
5332cost_minus:
5333 /* Detect valid immediates. */
5334 if ((GET_MODE_CLASS (mode) == MODE_INT
5335 || (GET_MODE_CLASS (mode) == MODE_CC
5336 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5337 && CONST_INT_P (op1)
5338 && aarch64_uimm12_shift (INTVAL (op1)))
5339 {
5340 *cost += rtx_cost (op0, MINUS, 0, speed);
43e9d192 5341
4745e701
JG
5342 if (speed)
5343 /* SUB(S) (immediate). */
5344 *cost += extra_cost->alu.arith;
5345 return true;
5346
5347 }
5348
7cc2145f
JG
5349 /* Look for SUB (extended register). */
5350 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5351 {
5352 if (speed)
5353 *cost += extra_cost->alu.arith_shift;
5354
5355 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5356 (enum rtx_code) GET_CODE (op1),
5357 0, speed);
5358 return true;
5359 }
5360
4745e701
JG
5361 rtx new_op1 = aarch64_strip_extend (op1);
5362
5363 /* Cost this as an FMA-alike operation. */
5364 if ((GET_CODE (new_op1) == MULT
5365 || GET_CODE (new_op1) == ASHIFT)
5366 && code != COMPARE)
5367 {
5368 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5369 (enum rtx_code) code,
5370 speed);
43e9d192 5371 *cost += rtx_cost (op0, MINUS, 0, speed);
4745e701
JG
5372 return true;
5373 }
43e9d192 5374
4745e701 5375 *cost += rtx_cost (new_op1, MINUS, 1, speed);
43e9d192 5376
4745e701
JG
5377 if (speed)
5378 {
5379 if (GET_MODE_CLASS (mode) == MODE_INT)
5380 /* SUB(S). */
5381 *cost += extra_cost->alu.arith;
5382 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5383 /* FSUB. */
5384 *cost += extra_cost->fp[mode == DFmode].addsub;
5385 }
5386 return true;
5387 }
43e9d192
IB
5388
5389 case PLUS:
4745e701
JG
5390 {
5391 rtx new_op0;
43e9d192 5392
4745e701
JG
5393 op0 = XEXP (x, 0);
5394 op1 = XEXP (x, 1);
43e9d192 5395
a8eecd00 5396cost_plus:
4745e701
JG
5397 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5398 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5399 {
5400 /* CSINC. */
5401 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5402 *cost += rtx_cost (op1, PLUS, 1, speed);
5403 return true;
5404 }
43e9d192 5405
4745e701
JG
5406 if (GET_MODE_CLASS (mode) == MODE_INT
5407 && CONST_INT_P (op1)
5408 && aarch64_uimm12_shift (INTVAL (op1)))
5409 {
5410 *cost += rtx_cost (op0, PLUS, 0, speed);
43e9d192 5411
4745e701
JG
5412 if (speed)
5413 /* ADD (immediate). */
5414 *cost += extra_cost->alu.arith;
5415 return true;
5416 }
5417
7cc2145f
JG
5418 /* Look for ADD (extended register). */
5419 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5420 {
5421 if (speed)
5422 *cost += extra_cost->alu.arith_shift;
5423
5424 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5425 (enum rtx_code) GET_CODE (op0),
5426 0, speed);
5427 return true;
5428 }
5429
4745e701
JG
5430 /* Strip any extend, leave shifts behind as we will
5431 cost them through mult_cost. */
5432 new_op0 = aarch64_strip_extend (op0);
5433
5434 if (GET_CODE (new_op0) == MULT
5435 || GET_CODE (new_op0) == ASHIFT)
5436 {
5437 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5438 speed);
5439 *cost += rtx_cost (op1, PLUS, 1, speed);
5440 return true;
5441 }
5442
5443 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5444 + rtx_cost (op1, PLUS, 1, speed));
5445
5446 if (speed)
5447 {
5448 if (GET_MODE_CLASS (mode) == MODE_INT)
5449 /* ADD. */
5450 *cost += extra_cost->alu.arith;
5451 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5452 /* FADD. */
5453 *cost += extra_cost->fp[mode == DFmode].addsub;
5454 }
5455 return true;
5456 }
43e9d192 5457
18b42b2a
KT
5458 case BSWAP:
5459 *cost = COSTS_N_INSNS (1);
5460
5461 if (speed)
5462 *cost += extra_cost->alu.rev;
5463
5464 return false;
5465
43e9d192 5466 case IOR:
f7d5cf8d
KT
5467 if (aarch_rev16_p (x))
5468 {
5469 *cost = COSTS_N_INSNS (1);
5470
5471 if (speed)
5472 *cost += extra_cost->alu.rev;
5473
5474 return true;
5475 }
5476 /* Fall through. */
43e9d192
IB
5477 case XOR:
5478 case AND:
5479 cost_logic:
5480 op0 = XEXP (x, 0);
5481 op1 = XEXP (x, 1);
5482
268c3b47
JG
5483 if (code == AND
5484 && GET_CODE (op0) == MULT
5485 && CONST_INT_P (XEXP (op0, 1))
5486 && CONST_INT_P (op1)
5487 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5488 INTVAL (op1)) != 0)
5489 {
5490 /* This is a UBFM/SBFM. */
5491 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5492 if (speed)
5493 *cost += extra_cost->alu.bfx;
5494 return true;
5495 }
5496
43e9d192
IB
5497 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5498 {
268c3b47
JG
5499 /* We possibly get the immediate for free, this is not
5500 modelled. */
43e9d192
IB
5501 if (CONST_INT_P (op1)
5502 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5503 {
268c3b47
JG
5504 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5505
5506 if (speed)
5507 *cost += extra_cost->alu.logical;
5508
5509 return true;
43e9d192
IB
5510 }
5511 else
5512 {
268c3b47
JG
5513 rtx new_op0 = op0;
5514
5515 /* Handle ORN, EON, or BIC. */
43e9d192
IB
5516 if (GET_CODE (op0) == NOT)
5517 op0 = XEXP (op0, 0);
268c3b47
JG
5518
5519 new_op0 = aarch64_strip_shift (op0);
5520
5521 /* If we had a shift on op0 then this is a logical-shift-
5522 by-register/immediate operation. Otherwise, this is just
5523 a logical operation. */
5524 if (speed)
5525 {
5526 if (new_op0 != op0)
5527 {
5528 /* Shift by immediate. */
5529 if (CONST_INT_P (XEXP (op0, 1)))
5530 *cost += extra_cost->alu.log_shift;
5531 else
5532 *cost += extra_cost->alu.log_shift_reg;
5533 }
5534 else
5535 *cost += extra_cost->alu.logical;
5536 }
5537
5538 /* In both cases we want to cost both operands. */
5539 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5540 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5541
5542 return true;
43e9d192 5543 }
43e9d192
IB
5544 }
5545 return false;
5546
268c3b47
JG
5547 case NOT:
5548 /* MVN. */
5549 if (speed)
5550 *cost += extra_cost->alu.logical;
5551
5552 /* The logical instruction could have the shifted register form,
5553 but the cost is the same if the shift is processed as a separate
5554 instruction, so we don't bother with it here. */
5555 return false;
5556
43e9d192 5557 case ZERO_EXTEND:
b1685e62
JG
5558
5559 op0 = XEXP (x, 0);
5560 /* If a value is written in SI mode, then zero extended to DI
5561 mode, the operation will in general be free as a write to
5562 a 'w' register implicitly zeroes the upper bits of an 'x'
5563 register. However, if this is
5564
5565 (set (reg) (zero_extend (reg)))
5566
5567 we must cost the explicit register move. */
5568 if (mode == DImode
5569 && GET_MODE (op0) == SImode
5570 && outer == SET)
5571 {
5572 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5573
5574 if (!op_cost && speed)
5575 /* MOV. */
5576 *cost += extra_cost->alu.extend;
5577 else
5578 /* Free, the cost is that of the SI mode operation. */
5579 *cost = op_cost;
5580
5581 return true;
5582 }
5583 else if (MEM_P (XEXP (x, 0)))
43e9d192 5584 {
b1685e62
JG
5585 /* All loads can zero extend to any size for free. */
5586 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
43e9d192
IB
5587 return true;
5588 }
b1685e62
JG
5589
5590 /* UXTB/UXTH. */
5591 if (speed)
5592 *cost += extra_cost->alu.extend;
5593
43e9d192
IB
5594 return false;
5595
5596 case SIGN_EXTEND:
b1685e62 5597 if (MEM_P (XEXP (x, 0)))
43e9d192 5598 {
b1685e62
JG
5599 /* LDRSH. */
5600 if (speed)
5601 {
5602 rtx address = XEXP (XEXP (x, 0), 0);
5603 *cost += extra_cost->ldst.load_sign_extend;
5604
5605 *cost +=
5606 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5607 0, speed));
5608 }
43e9d192
IB
5609 return true;
5610 }
b1685e62
JG
5611
5612 if (speed)
5613 *cost += extra_cost->alu.extend;
43e9d192
IB
5614 return false;
5615
ba0cfa17
JG
5616 case ASHIFT:
5617 op0 = XEXP (x, 0);
5618 op1 = XEXP (x, 1);
5619
5620 if (CONST_INT_P (op1))
5621 {
5622 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5623 aliases. */
5624 if (speed)
5625 *cost += extra_cost->alu.shift;
5626
5627 /* We can incorporate zero/sign extend for free. */
5628 if (GET_CODE (op0) == ZERO_EXTEND
5629 || GET_CODE (op0) == SIGN_EXTEND)
5630 op0 = XEXP (op0, 0);
5631
5632 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5633 return true;
5634 }
5635 else
5636 {
5637 /* LSLV. */
5638 if (speed)
5639 *cost += extra_cost->alu.shift_reg;
5640
5641 return false; /* All arguments need to be in registers. */
5642 }
5643
43e9d192 5644 case ROTATE:
43e9d192
IB
5645 case ROTATERT:
5646 case LSHIFTRT:
43e9d192 5647 case ASHIFTRT:
ba0cfa17
JG
5648 op0 = XEXP (x, 0);
5649 op1 = XEXP (x, 1);
43e9d192 5650
ba0cfa17
JG
5651 if (CONST_INT_P (op1))
5652 {
5653 /* ASR (immediate) and friends. */
5654 if (speed)
5655 *cost += extra_cost->alu.shift;
43e9d192 5656
ba0cfa17
JG
5657 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5658 return true;
5659 }
5660 else
5661 {
5662
5663 /* ASR (register) and friends. */
5664 if (speed)
5665 *cost += extra_cost->alu.shift_reg;
5666
5667 return false; /* All arguments need to be in registers. */
5668 }
43e9d192 5669
909734be
JG
5670 case SYMBOL_REF:
5671
5672 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5673 {
5674 /* LDR. */
5675 if (speed)
5676 *cost += extra_cost->ldst.load;
5677 }
5678 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5679 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5680 {
5681 /* ADRP, followed by ADD. */
5682 *cost += COSTS_N_INSNS (1);
5683 if (speed)
5684 *cost += 2 * extra_cost->alu.arith;
5685 }
5686 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5687 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5688 {
5689 /* ADR. */
5690 if (speed)
5691 *cost += extra_cost->alu.arith;
5692 }
5693
5694 if (flag_pic)
5695 {
5696 /* One extra load instruction, after accessing the GOT. */
5697 *cost += COSTS_N_INSNS (1);
5698 if (speed)
5699 *cost += extra_cost->ldst.load;
5700 }
43e9d192
IB
5701 return true;
5702
909734be 5703 case HIGH:
43e9d192 5704 case LO_SUM:
909734be
JG
5705 /* ADRP/ADD (immediate). */
5706 if (speed)
5707 *cost += extra_cost->alu.arith;
43e9d192
IB
5708 return true;
5709
5710 case ZERO_EXTRACT:
5711 case SIGN_EXTRACT:
7cc2145f
JG
5712 /* UBFX/SBFX. */
5713 if (speed)
5714 *cost += extra_cost->alu.bfx;
5715
5716 /* We can trust that the immediates used will be correct (there
5717 are no by-register forms), so we need only cost op0. */
5718 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
43e9d192
IB
5719 return true;
5720
5721 case MULT:
4745e701
JG
5722 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5723 /* aarch64_rtx_mult_cost always handles recursion to its
5724 operands. */
5725 return true;
43e9d192
IB
5726
5727 case MOD:
5728 case UMOD:
43e9d192
IB
5729 if (speed)
5730 {
5731 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
73250c4c
KT
5732 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5733 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
43e9d192 5734 else if (GET_MODE (x) == DFmode)
73250c4c
KT
5735 *cost += (extra_cost->fp[1].mult
5736 + extra_cost->fp[1].div);
43e9d192 5737 else if (GET_MODE (x) == SFmode)
73250c4c
KT
5738 *cost += (extra_cost->fp[0].mult
5739 + extra_cost->fp[0].div);
43e9d192
IB
5740 }
5741 return false; /* All arguments need to be in registers. */
5742
5743 case DIV:
5744 case UDIV:
4105fe38 5745 case SQRT:
43e9d192
IB
5746 if (speed)
5747 {
4105fe38
JG
5748 if (GET_MODE_CLASS (mode) == MODE_INT)
5749 /* There is no integer SQRT, so only DIV and UDIV can get
5750 here. */
5751 *cost += extra_cost->mult[mode == DImode].idiv;
5752 else
5753 *cost += extra_cost->fp[mode == DFmode].div;
43e9d192
IB
5754 }
5755 return false; /* All arguments need to be in registers. */
5756
a8eecd00 5757 case IF_THEN_ELSE:
2d5ffe46
AP
5758 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5759 XEXP (x, 2), cost, speed);
a8eecd00
JG
5760
5761 case EQ:
5762 case NE:
5763 case GT:
5764 case GTU:
5765 case LT:
5766 case LTU:
5767 case GE:
5768 case GEU:
5769 case LE:
5770 case LEU:
5771
5772 return false; /* All arguments must be in registers. */
5773
b292109f
JG
5774 case FMA:
5775 op0 = XEXP (x, 0);
5776 op1 = XEXP (x, 1);
5777 op2 = XEXP (x, 2);
5778
5779 if (speed)
5780 *cost += extra_cost->fp[mode == DFmode].fma;
5781
5782 /* FMSUB, FNMADD, and FNMSUB are free. */
5783 if (GET_CODE (op0) == NEG)
5784 op0 = XEXP (op0, 0);
5785
5786 if (GET_CODE (op2) == NEG)
5787 op2 = XEXP (op2, 0);
5788
5789 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5790 and the by-element operand as operand 0. */
5791 if (GET_CODE (op1) == NEG)
5792 op1 = XEXP (op1, 0);
5793
5794 /* Catch vector-by-element operations. The by-element operand can
5795 either be (vec_duplicate (vec_select (x))) or just
5796 (vec_select (x)), depending on whether we are multiplying by
5797 a vector or a scalar.
5798
5799 Canonicalization is not very good in these cases, FMA4 will put the
5800 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5801 if (GET_CODE (op0) == VEC_DUPLICATE)
5802 op0 = XEXP (op0, 0);
5803 else if (GET_CODE (op1) == VEC_DUPLICATE)
5804 op1 = XEXP (op1, 0);
5805
5806 if (GET_CODE (op0) == VEC_SELECT)
5807 op0 = XEXP (op0, 0);
5808 else if (GET_CODE (op1) == VEC_SELECT)
5809 op1 = XEXP (op1, 0);
5810
5811 /* If the remaining parameters are not registers,
5812 get the cost to put them into registers. */
5813 *cost += rtx_cost (op0, FMA, 0, speed);
5814 *cost += rtx_cost (op1, FMA, 1, speed);
5815 *cost += rtx_cost (op2, FMA, 2, speed);
5816 return true;
5817
5818 case FLOAT_EXTEND:
5819 if (speed)
5820 *cost += extra_cost->fp[mode == DFmode].widen;
5821 return false;
5822
5823 case FLOAT_TRUNCATE:
5824 if (speed)
5825 *cost += extra_cost->fp[mode == DFmode].narrow;
5826 return false;
5827
61263118
KT
5828 case FIX:
5829 case UNSIGNED_FIX:
5830 x = XEXP (x, 0);
5831 /* Strip the rounding part. They will all be implemented
5832 by the fcvt* family of instructions anyway. */
5833 if (GET_CODE (x) == UNSPEC)
5834 {
5835 unsigned int uns_code = XINT (x, 1);
5836
5837 if (uns_code == UNSPEC_FRINTA
5838 || uns_code == UNSPEC_FRINTM
5839 || uns_code == UNSPEC_FRINTN
5840 || uns_code == UNSPEC_FRINTP
5841 || uns_code == UNSPEC_FRINTZ)
5842 x = XVECEXP (x, 0, 0);
5843 }
5844
5845 if (speed)
5846 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5847
5848 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5849 return true;
5850
b292109f
JG
5851 case ABS:
5852 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5853 {
5854 /* FABS and FNEG are analogous. */
5855 if (speed)
5856 *cost += extra_cost->fp[mode == DFmode].neg;
5857 }
5858 else
5859 {
5860 /* Integer ABS will either be split to
5861 two arithmetic instructions, or will be an ABS
5862 (scalar), which we don't model. */
5863 *cost = COSTS_N_INSNS (2);
5864 if (speed)
5865 *cost += 2 * extra_cost->alu.arith;
5866 }
5867 return false;
5868
5869 case SMAX:
5870 case SMIN:
5871 if (speed)
5872 {
5873 /* FMAXNM/FMINNM/FMAX/FMIN.
5874 TODO: This may not be accurate for all implementations, but
5875 we do not model this in the cost tables. */
5876 *cost += extra_cost->fp[mode == DFmode].addsub;
5877 }
5878 return false;
5879
61263118
KT
5880 case UNSPEC:
5881 /* The floating point round to integer frint* instructions. */
5882 if (aarch64_frint_unspec_p (XINT (x, 1)))
5883 {
5884 if (speed)
5885 *cost += extra_cost->fp[mode == DFmode].roundint;
5886
5887 return false;
5888 }
781aeb73
KT
5889
5890 if (XINT (x, 1) == UNSPEC_RBIT)
5891 {
5892 if (speed)
5893 *cost += extra_cost->alu.rev;
5894
5895 return false;
5896 }
61263118
KT
5897 break;
5898
fb620c4a
JG
5899 case TRUNCATE:
5900
5901 /* Decompose <su>muldi3_highpart. */
5902 if (/* (truncate:DI */
5903 mode == DImode
5904 /* (lshiftrt:TI */
5905 && GET_MODE (XEXP (x, 0)) == TImode
5906 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5907 /* (mult:TI */
5908 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5909 /* (ANY_EXTEND:TI (reg:DI))
5910 (ANY_EXTEND:TI (reg:DI))) */
5911 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5912 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5913 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5914 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5915 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5916 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5917 /* (const_int 64) */
5918 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5919 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5920 {
5921 /* UMULH/SMULH. */
5922 if (speed)
5923 *cost += extra_cost->mult[mode == DImode].extend;
5924 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5925 MULT, 0, speed);
5926 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5927 MULT, 1, speed);
5928 return true;
5929 }
5930
5931 /* Fall through. */
43e9d192 5932 default:
61263118 5933 break;
43e9d192 5934 }
61263118
KT
5935
5936 if (dump_file && (dump_flags & TDF_DETAILS))
5937 fprintf (dump_file,
5938 "\nFailed to cost RTX. Assuming default cost.\n");
5939
5940 return true;
43e9d192
IB
5941}
5942
0ee859b5
JG
5943/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5944 calculated for X. This cost is stored in *COST. Returns true
5945 if the total cost of X was calculated. */
5946static bool
5947aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5948 int param, int *cost, bool speed)
5949{
5950 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5951
5952 if (dump_file && (dump_flags & TDF_DETAILS))
5953 {
5954 print_rtl_single (dump_file, x);
5955 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5956 speed ? "Hot" : "Cold",
5957 *cost, result ? "final" : "partial");
5958 }
5959
5960 return result;
5961}
5962
43e9d192 5963static int
8a3a7e67
RH
5964aarch64_register_move_cost (enum machine_mode mode,
5965 reg_class_t from_i, reg_class_t to_i)
43e9d192 5966{
8a3a7e67
RH
5967 enum reg_class from = (enum reg_class) from_i;
5968 enum reg_class to = (enum reg_class) to_i;
43e9d192
IB
5969 const struct cpu_regmove_cost *regmove_cost
5970 = aarch64_tune_params->regmove_cost;
5971
3be07662
WD
5972 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
5973 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
5974 to = GENERAL_REGS;
5975
5976 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
5977 from = GENERAL_REGS;
5978
6ee70f81
AP
5979 /* Moving between GPR and stack cost is the same as GP2GP. */
5980 if ((from == GENERAL_REGS && to == STACK_REG)
5981 || (to == GENERAL_REGS && from == STACK_REG))
5982 return regmove_cost->GP2GP;
5983
5984 /* To/From the stack register, we move via the gprs. */
5985 if (to == STACK_REG || from == STACK_REG)
5986 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5987 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5988
8919453c
WD
5989 if (GET_MODE_SIZE (mode) == 16)
5990 {
5991 /* 128-bit operations on general registers require 2 instructions. */
5992 if (from == GENERAL_REGS && to == GENERAL_REGS)
5993 return regmove_cost->GP2GP * 2;
5994 else if (from == GENERAL_REGS)
5995 return regmove_cost->GP2FP * 2;
5996 else if (to == GENERAL_REGS)
5997 return regmove_cost->FP2GP * 2;
5998
5999 /* When AdvSIMD instructions are disabled it is not possible to move
6000 a 128-bit value directly between Q registers. This is handled in
6001 secondary reload. A general register is used as a scratch to move
6002 the upper DI value and the lower DI value is moved directly,
6003 hence the cost is the sum of three moves. */
6004 if (! TARGET_SIMD)
6005 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6006
6007 return regmove_cost->FP2FP;
6008 }
6009
43e9d192
IB
6010 if (from == GENERAL_REGS && to == GENERAL_REGS)
6011 return regmove_cost->GP2GP;
6012 else if (from == GENERAL_REGS)
6013 return regmove_cost->GP2FP;
6014 else if (to == GENERAL_REGS)
6015 return regmove_cost->FP2GP;
6016
43e9d192
IB
6017 return regmove_cost->FP2FP;
6018}
6019
6020static int
6021aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
6022 reg_class_t rclass ATTRIBUTE_UNUSED,
6023 bool in ATTRIBUTE_UNUSED)
6024{
6025 return aarch64_tune_params->memmov_cost;
6026}
6027
d126a4ae
AP
6028/* Return the number of instructions that can be issued per cycle. */
6029static int
6030aarch64_sched_issue_rate (void)
6031{
6032 return aarch64_tune_params->issue_rate;
6033}
6034
8990e73a
TB
6035/* Vectorizer cost model target hooks. */
6036
6037/* Implement targetm.vectorize.builtin_vectorization_cost. */
6038static int
6039aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6040 tree vectype,
6041 int misalign ATTRIBUTE_UNUSED)
6042{
6043 unsigned elements;
6044
6045 switch (type_of_cost)
6046 {
6047 case scalar_stmt:
6048 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6049
6050 case scalar_load:
6051 return aarch64_tune_params->vec_costs->scalar_load_cost;
6052
6053 case scalar_store:
6054 return aarch64_tune_params->vec_costs->scalar_store_cost;
6055
6056 case vector_stmt:
6057 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6058
6059 case vector_load:
6060 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6061
6062 case vector_store:
6063 return aarch64_tune_params->vec_costs->vec_store_cost;
6064
6065 case vec_to_scalar:
6066 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6067
6068 case scalar_to_vec:
6069 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6070
6071 case unaligned_load:
6072 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6073
6074 case unaligned_store:
6075 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6076
6077 case cond_branch_taken:
6078 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6079
6080 case cond_branch_not_taken:
6081 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6082
6083 case vec_perm:
6084 case vec_promote_demote:
6085 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6086
6087 case vec_construct:
6088 elements = TYPE_VECTOR_SUBPARTS (vectype);
6089 return elements / 2 + 1;
6090
6091 default:
6092 gcc_unreachable ();
6093 }
6094}
6095
6096/* Implement targetm.vectorize.add_stmt_cost. */
6097static unsigned
6098aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6099 struct _stmt_vec_info *stmt_info, int misalign,
6100 enum vect_cost_model_location where)
6101{
6102 unsigned *cost = (unsigned *) data;
6103 unsigned retval = 0;
6104
6105 if (flag_vect_cost_model)
6106 {
6107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6108 int stmt_cost =
6109 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6110
6111 /* Statements in an inner loop relative to the loop being
6112 vectorized are weighted more heavily. The value here is
6113 a function (linear for now) of the loop nest level. */
6114 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6115 {
6116 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6117 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6118 unsigned nest_level = loop_depth (loop);
6119
6120 count *= nest_level;
6121 }
6122
6123 retval = (unsigned) (count * stmt_cost);
6124 cost[where] += retval;
6125 }
6126
6127 return retval;
6128}
6129
43e9d192
IB
6130static void initialize_aarch64_code_model (void);
6131
6132/* Parse the architecture extension string. */
6133
6134static void
6135aarch64_parse_extension (char *str)
6136{
6137 /* The extension string is parsed left to right. */
6138 const struct aarch64_option_extension *opt = NULL;
6139
6140 /* Flag to say whether we are adding or removing an extension. */
6141 int adding_ext = -1;
6142
6143 while (str != NULL && *str != 0)
6144 {
6145 char *ext;
6146 size_t len;
6147
6148 str++;
6149 ext = strchr (str, '+');
6150
6151 if (ext != NULL)
6152 len = ext - str;
6153 else
6154 len = strlen (str);
6155
6156 if (len >= 2 && strncmp (str, "no", 2) == 0)
6157 {
6158 adding_ext = 0;
6159 len -= 2;
6160 str += 2;
6161 }
6162 else if (len > 0)
6163 adding_ext = 1;
6164
6165 if (len == 0)
6166 {
6167 error ("missing feature modifier after %qs", "+no");
6168 return;
6169 }
6170
6171 /* Scan over the extensions table trying to find an exact match. */
6172 for (opt = all_extensions; opt->name != NULL; opt++)
6173 {
6174 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6175 {
6176 /* Add or remove the extension. */
6177 if (adding_ext)
6178 aarch64_isa_flags |= opt->flags_on;
6179 else
6180 aarch64_isa_flags &= ~(opt->flags_off);
6181 break;
6182 }
6183 }
6184
6185 if (opt->name == NULL)
6186 {
6187 /* Extension not found in list. */
6188 error ("unknown feature modifier %qs", str);
6189 return;
6190 }
6191
6192 str = ext;
6193 };
6194
6195 return;
6196}
6197
6198/* Parse the ARCH string. */
6199
6200static void
6201aarch64_parse_arch (void)
6202{
6203 char *ext;
6204 const struct processor *arch;
6205 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6206 size_t len;
6207
6208 strcpy (str, aarch64_arch_string);
6209
6210 ext = strchr (str, '+');
6211
6212 if (ext != NULL)
6213 len = ext - str;
6214 else
6215 len = strlen (str);
6216
6217 if (len == 0)
6218 {
6219 error ("missing arch name in -march=%qs", str);
6220 return;
6221 }
6222
6223 /* Loop through the list of supported ARCHs to find a match. */
6224 for (arch = all_architectures; arch->name != NULL; arch++)
6225 {
6226 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6227 {
6228 selected_arch = arch;
6229 aarch64_isa_flags = selected_arch->flags;
ffee7aa9
JG
6230
6231 if (!selected_cpu)
6232 selected_cpu = &all_cores[selected_arch->core];
43e9d192
IB
6233
6234 if (ext != NULL)
6235 {
6236 /* ARCH string contains at least one extension. */
6237 aarch64_parse_extension (ext);
6238 }
6239
ffee7aa9
JG
6240 if (strcmp (selected_arch->arch, selected_cpu->arch))
6241 {
6242 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6243 selected_cpu->name, selected_arch->name);
6244 }
6245
43e9d192
IB
6246 return;
6247 }
6248 }
6249
6250 /* ARCH name not found in list. */
6251 error ("unknown value %qs for -march", str);
6252 return;
6253}
6254
6255/* Parse the CPU string. */
6256
6257static void
6258aarch64_parse_cpu (void)
6259{
6260 char *ext;
6261 const struct processor *cpu;
6262 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6263 size_t len;
6264
6265 strcpy (str, aarch64_cpu_string);
6266
6267 ext = strchr (str, '+');
6268
6269 if (ext != NULL)
6270 len = ext - str;
6271 else
6272 len = strlen (str);
6273
6274 if (len == 0)
6275 {
6276 error ("missing cpu name in -mcpu=%qs", str);
6277 return;
6278 }
6279
6280 /* Loop through the list of supported CPUs to find a match. */
6281 for (cpu = all_cores; cpu->name != NULL; cpu++)
6282 {
6283 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6284 {
6285 selected_cpu = cpu;
192ed1dd 6286 selected_tune = cpu;
43e9d192
IB
6287 aarch64_isa_flags = selected_cpu->flags;
6288
6289 if (ext != NULL)
6290 {
6291 /* CPU string contains at least one extension. */
6292 aarch64_parse_extension (ext);
6293 }
6294
6295 return;
6296 }
6297 }
6298
6299 /* CPU name not found in list. */
6300 error ("unknown value %qs for -mcpu", str);
6301 return;
6302}
6303
6304/* Parse the TUNE string. */
6305
6306static void
6307aarch64_parse_tune (void)
6308{
6309 const struct processor *cpu;
6310 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6311 strcpy (str, aarch64_tune_string);
6312
6313 /* Loop through the list of supported CPUs to find a match. */
6314 for (cpu = all_cores; cpu->name != NULL; cpu++)
6315 {
6316 if (strcmp (cpu->name, str) == 0)
6317 {
6318 selected_tune = cpu;
6319 return;
6320 }
6321 }
6322
6323 /* CPU name not found in list. */
6324 error ("unknown value %qs for -mtune", str);
6325 return;
6326}
6327
6328
6329/* Implement TARGET_OPTION_OVERRIDE. */
6330
6331static void
6332aarch64_override_options (void)
6333{
ffee7aa9
JG
6334 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6335 If either of -march or -mtune is given, they override their
6336 respective component of -mcpu.
43e9d192 6337
ffee7aa9
JG
6338 So, first parse AARCH64_CPU_STRING, then the others, be careful
6339 with -march as, if -mcpu is not present on the command line, march
6340 must set a sensible default CPU. */
6341 if (aarch64_cpu_string)
43e9d192 6342 {
ffee7aa9 6343 aarch64_parse_cpu ();
43e9d192
IB
6344 }
6345
ffee7aa9 6346 if (aarch64_arch_string)
43e9d192 6347 {
ffee7aa9 6348 aarch64_parse_arch ();
43e9d192
IB
6349 }
6350
6351 if (aarch64_tune_string)
6352 {
6353 aarch64_parse_tune ();
6354 }
6355
63892fa2
KV
6356#ifndef HAVE_AS_MABI_OPTION
6357 /* The compiler may have been configured with 2.23.* binutils, which does
6358 not have support for ILP32. */
6359 if (TARGET_ILP32)
6360 error ("Assembler does not support -mabi=ilp32");
6361#endif
6362
43e9d192
IB
6363 initialize_aarch64_code_model ();
6364
6365 aarch64_build_bitmask_table ();
6366
6367 /* This target defaults to strict volatile bitfields. */
6368 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6369 flag_strict_volatile_bitfields = 1;
6370
6371 /* If the user did not specify a processor, choose the default
6372 one for them. This will be the CPU set during configuration using
a3cd0246 6373 --with-cpu, otherwise it is "generic". */
43e9d192
IB
6374 if (!selected_cpu)
6375 {
6376 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6377 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6378 }
6379
6380 gcc_assert (selected_cpu);
6381
6382 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6383 if (!selected_tune)
6384 selected_tune = &all_cores[selected_cpu->core];
6385
6386 aarch64_tune_flags = selected_tune->flags;
6387 aarch64_tune = selected_tune->core;
6388 aarch64_tune_params = selected_tune->tune;
6389
6390 aarch64_override_options_after_change ();
6391}
6392
6393/* Implement targetm.override_options_after_change. */
6394
6395static void
6396aarch64_override_options_after_change (void)
6397{
0b7f8166
MS
6398 if (flag_omit_frame_pointer)
6399 flag_omit_leaf_frame_pointer = false;
6400 else if (flag_omit_leaf_frame_pointer)
6401 flag_omit_frame_pointer = true;
43e9d192
IB
6402}
6403
6404static struct machine_function *
6405aarch64_init_machine_status (void)
6406{
6407 struct machine_function *machine;
766090c2 6408 machine = ggc_cleared_alloc<machine_function> ();
43e9d192
IB
6409 return machine;
6410}
6411
6412void
6413aarch64_init_expanders (void)
6414{
6415 init_machine_status = aarch64_init_machine_status;
6416}
6417
6418/* A checking mechanism for the implementation of the various code models. */
6419static void
6420initialize_aarch64_code_model (void)
6421{
6422 if (flag_pic)
6423 {
6424 switch (aarch64_cmodel_var)
6425 {
6426 case AARCH64_CMODEL_TINY:
6427 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6428 break;
6429 case AARCH64_CMODEL_SMALL:
6430 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6431 break;
6432 case AARCH64_CMODEL_LARGE:
6433 sorry ("code model %qs with -f%s", "large",
6434 flag_pic > 1 ? "PIC" : "pic");
6435 default:
6436 gcc_unreachable ();
6437 }
6438 }
6439 else
6440 aarch64_cmodel = aarch64_cmodel_var;
6441}
6442
6443/* Return true if SYMBOL_REF X binds locally. */
6444
6445static bool
6446aarch64_symbol_binds_local_p (const_rtx x)
6447{
6448 return (SYMBOL_REF_DECL (x)
6449 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6450 : SYMBOL_REF_LOCAL_P (x));
6451}
6452
6453/* Return true if SYMBOL_REF X is thread local */
6454static bool
6455aarch64_tls_symbol_p (rtx x)
6456{
6457 if (! TARGET_HAVE_TLS)
6458 return false;
6459
6460 if (GET_CODE (x) != SYMBOL_REF)
6461 return false;
6462
6463 return SYMBOL_REF_TLS_MODEL (x) != 0;
6464}
6465
6466/* Classify a TLS symbol into one of the TLS kinds. */
6467enum aarch64_symbol_type
6468aarch64_classify_tls_symbol (rtx x)
6469{
6470 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6471
6472 switch (tls_kind)
6473 {
6474 case TLS_MODEL_GLOBAL_DYNAMIC:
6475 case TLS_MODEL_LOCAL_DYNAMIC:
6476 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6477
6478 case TLS_MODEL_INITIAL_EXEC:
6479 return SYMBOL_SMALL_GOTTPREL;
6480
6481 case TLS_MODEL_LOCAL_EXEC:
6482 return SYMBOL_SMALL_TPREL;
6483
6484 case TLS_MODEL_EMULATED:
6485 case TLS_MODEL_NONE:
6486 return SYMBOL_FORCE_TO_MEM;
6487
6488 default:
6489 gcc_unreachable ();
6490 }
6491}
6492
6493/* Return the method that should be used to access SYMBOL_REF or
6494 LABEL_REF X in context CONTEXT. */
17f4d4bf 6495
43e9d192
IB
6496enum aarch64_symbol_type
6497aarch64_classify_symbol (rtx x,
6498 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6499{
6500 if (GET_CODE (x) == LABEL_REF)
6501 {
6502 switch (aarch64_cmodel)
6503 {
6504 case AARCH64_CMODEL_LARGE:
6505 return SYMBOL_FORCE_TO_MEM;
6506
6507 case AARCH64_CMODEL_TINY_PIC:
6508 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6509 return SYMBOL_TINY_ABSOLUTE;
6510
43e9d192
IB
6511 case AARCH64_CMODEL_SMALL_PIC:
6512 case AARCH64_CMODEL_SMALL:
6513 return SYMBOL_SMALL_ABSOLUTE;
6514
6515 default:
6516 gcc_unreachable ();
6517 }
6518 }
6519
17f4d4bf 6520 if (GET_CODE (x) == SYMBOL_REF)
43e9d192 6521 {
4a985a37
MS
6522 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6523 return SYMBOL_FORCE_TO_MEM;
43e9d192
IB
6524
6525 if (aarch64_tls_symbol_p (x))
6526 return aarch64_classify_tls_symbol (x);
6527
17f4d4bf
CSS
6528 switch (aarch64_cmodel)
6529 {
6530 case AARCH64_CMODEL_TINY:
a5350ddc
CSS
6531 if (SYMBOL_REF_WEAK (x))
6532 return SYMBOL_FORCE_TO_MEM;
6533 return SYMBOL_TINY_ABSOLUTE;
6534
17f4d4bf
CSS
6535 case AARCH64_CMODEL_SMALL:
6536 if (SYMBOL_REF_WEAK (x))
6537 return SYMBOL_FORCE_TO_MEM;
6538 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6539
17f4d4bf 6540 case AARCH64_CMODEL_TINY_PIC:
38e6c9a6 6541 if (!aarch64_symbol_binds_local_p (x))
87dd8ab0 6542 return SYMBOL_TINY_GOT;
38e6c9a6
MS
6543 return SYMBOL_TINY_ABSOLUTE;
6544
17f4d4bf
CSS
6545 case AARCH64_CMODEL_SMALL_PIC:
6546 if (!aarch64_symbol_binds_local_p (x))
6547 return SYMBOL_SMALL_GOT;
6548 return SYMBOL_SMALL_ABSOLUTE;
43e9d192 6549
17f4d4bf
CSS
6550 default:
6551 gcc_unreachable ();
6552 }
43e9d192 6553 }
17f4d4bf 6554
43e9d192
IB
6555 /* By default push everything into the constant pool. */
6556 return SYMBOL_FORCE_TO_MEM;
6557}
6558
43e9d192
IB
6559bool
6560aarch64_constant_address_p (rtx x)
6561{
6562 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6563}
6564
6565bool
6566aarch64_legitimate_pic_operand_p (rtx x)
6567{
6568 if (GET_CODE (x) == SYMBOL_REF
6569 || (GET_CODE (x) == CONST
6570 && GET_CODE (XEXP (x, 0)) == PLUS
6571 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6572 return false;
6573
6574 return true;
6575}
6576
3520f7cc
JG
6577/* Return true if X holds either a quarter-precision or
6578 floating-point +0.0 constant. */
6579static bool
6580aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6581{
6582 if (!CONST_DOUBLE_P (x))
6583 return false;
6584
6585 /* TODO: We could handle moving 0.0 to a TFmode register,
6586 but first we would like to refactor the movtf_aarch64
6587 to be more amicable to split moves properly and
6588 correctly gate on TARGET_SIMD. For now - reject all
6589 constants which are not to SFmode or DFmode registers. */
6590 if (!(mode == SFmode || mode == DFmode))
6591 return false;
6592
6593 if (aarch64_float_const_zero_rtx_p (x))
6594 return true;
6595 return aarch64_float_const_representable_p (x);
6596}
6597
43e9d192
IB
6598static bool
6599aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6600{
6601 /* Do not allow vector struct mode constants. We could support
6602 0 and -1 easily, but they need support in aarch64-simd.md. */
6603 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6604 return false;
6605
6606 /* This could probably go away because
6607 we now decompose CONST_INTs according to expand_mov_immediate. */
6608 if ((GET_CODE (x) == CONST_VECTOR
48063b9d 6609 && aarch64_simd_valid_immediate (x, mode, false, NULL))
3520f7cc
JG
6610 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6611 return !targetm.cannot_force_const_mem (mode, x);
43e9d192
IB
6612
6613 if (GET_CODE (x) == HIGH
6614 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6615 return true;
6616
6617 return aarch64_constant_address_p (x);
6618}
6619
a5bc806c 6620rtx
43e9d192
IB
6621aarch64_load_tp (rtx target)
6622{
6623 if (!target
6624 || GET_MODE (target) != Pmode
6625 || !register_operand (target, Pmode))
6626 target = gen_reg_rtx (Pmode);
6627
6628 /* Can return in any reg. */
6629 emit_insn (gen_aarch64_load_tp_hard (target));
6630 return target;
6631}
6632
43e9d192
IB
6633/* On AAPCS systems, this is the "struct __va_list". */
6634static GTY(()) tree va_list_type;
6635
6636/* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6637 Return the type to use as __builtin_va_list.
6638
6639 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6640
6641 struct __va_list
6642 {
6643 void *__stack;
6644 void *__gr_top;
6645 void *__vr_top;
6646 int __gr_offs;
6647 int __vr_offs;
6648 }; */
6649
6650static tree
6651aarch64_build_builtin_va_list (void)
6652{
6653 tree va_list_name;
6654 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6655
6656 /* Create the type. */
6657 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6658 /* Give it the required name. */
6659 va_list_name = build_decl (BUILTINS_LOCATION,
6660 TYPE_DECL,
6661 get_identifier ("__va_list"),
6662 va_list_type);
6663 DECL_ARTIFICIAL (va_list_name) = 1;
6664 TYPE_NAME (va_list_type) = va_list_name;
665c56c6 6665 TYPE_STUB_DECL (va_list_type) = va_list_name;
43e9d192
IB
6666
6667 /* Create the fields. */
6668 f_stack = build_decl (BUILTINS_LOCATION,
6669 FIELD_DECL, get_identifier ("__stack"),
6670 ptr_type_node);
6671 f_grtop = build_decl (BUILTINS_LOCATION,
6672 FIELD_DECL, get_identifier ("__gr_top"),
6673 ptr_type_node);
6674 f_vrtop = build_decl (BUILTINS_LOCATION,
6675 FIELD_DECL, get_identifier ("__vr_top"),
6676 ptr_type_node);
6677 f_groff = build_decl (BUILTINS_LOCATION,
6678 FIELD_DECL, get_identifier ("__gr_offs"),
6679 integer_type_node);
6680 f_vroff = build_decl (BUILTINS_LOCATION,
6681 FIELD_DECL, get_identifier ("__vr_offs"),
6682 integer_type_node);
6683
6684 DECL_ARTIFICIAL (f_stack) = 1;
6685 DECL_ARTIFICIAL (f_grtop) = 1;
6686 DECL_ARTIFICIAL (f_vrtop) = 1;
6687 DECL_ARTIFICIAL (f_groff) = 1;
6688 DECL_ARTIFICIAL (f_vroff) = 1;
6689
6690 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6691 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6692 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6693 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6694 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6695
6696 TYPE_FIELDS (va_list_type) = f_stack;
6697 DECL_CHAIN (f_stack) = f_grtop;
6698 DECL_CHAIN (f_grtop) = f_vrtop;
6699 DECL_CHAIN (f_vrtop) = f_groff;
6700 DECL_CHAIN (f_groff) = f_vroff;
6701
6702 /* Compute its layout. */
6703 layout_type (va_list_type);
6704
6705 return va_list_type;
6706}
6707
6708/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6709static void
6710aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6711{
6712 const CUMULATIVE_ARGS *cum;
6713 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6714 tree stack, grtop, vrtop, groff, vroff;
6715 tree t;
6716 int gr_save_area_size;
6717 int vr_save_area_size;
6718 int vr_offset;
6719
6720 cum = &crtl->args.info;
6721 gr_save_area_size
6722 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6723 vr_save_area_size
6724 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6725
6726 if (TARGET_GENERAL_REGS_ONLY)
6727 {
6728 if (cum->aapcs_nvrn > 0)
6729 sorry ("%qs and floating point or vector arguments",
6730 "-mgeneral-regs-only");
6731 vr_save_area_size = 0;
6732 }
6733
6734 f_stack = TYPE_FIELDS (va_list_type_node);
6735 f_grtop = DECL_CHAIN (f_stack);
6736 f_vrtop = DECL_CHAIN (f_grtop);
6737 f_groff = DECL_CHAIN (f_vrtop);
6738 f_vroff = DECL_CHAIN (f_groff);
6739
6740 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6741 NULL_TREE);
6742 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6743 NULL_TREE);
6744 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6745 NULL_TREE);
6746 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6747 NULL_TREE);
6748 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6749 NULL_TREE);
6750
6751 /* Emit code to initialize STACK, which points to the next varargs stack
6752 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6753 by named arguments. STACK is 8-byte aligned. */
6754 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6755 if (cum->aapcs_stack_size > 0)
6756 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6757 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6758 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6759
6760 /* Emit code to initialize GRTOP, the top of the GR save area.
6761 virtual_incoming_args_rtx should have been 16 byte aligned. */
6762 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6763 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6764 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6765
6766 /* Emit code to initialize VRTOP, the top of the VR save area.
6767 This address is gr_save_area_bytes below GRTOP, rounded
6768 down to the next 16-byte boundary. */
6769 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6770 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6771 STACK_BOUNDARY / BITS_PER_UNIT);
6772
6773 if (vr_offset)
6774 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6775 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6777
6778 /* Emit code to initialize GROFF, the offset from GRTOP of the
6779 next GPR argument. */
6780 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6781 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6782 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6783
6784 /* Likewise emit code to initialize VROFF, the offset from FTOP
6785 of the next VR argument. */
6786 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6787 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6788 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6789}
6790
6791/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6792
6793static tree
6794aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6795 gimple_seq *post_p ATTRIBUTE_UNUSED)
6796{
6797 tree addr;
6798 bool indirect_p;
6799 bool is_ha; /* is HFA or HVA. */
6800 bool dw_align; /* double-word align. */
6801 enum machine_mode ag_mode = VOIDmode;
6802 int nregs;
6803 enum machine_mode mode;
6804
6805 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6806 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6807 HOST_WIDE_INT size, rsize, adjust, align;
6808 tree t, u, cond1, cond2;
6809
6810 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6811 if (indirect_p)
6812 type = build_pointer_type (type);
6813
6814 mode = TYPE_MODE (type);
6815
6816 f_stack = TYPE_FIELDS (va_list_type_node);
6817 f_grtop = DECL_CHAIN (f_stack);
6818 f_vrtop = DECL_CHAIN (f_grtop);
6819 f_groff = DECL_CHAIN (f_vrtop);
6820 f_vroff = DECL_CHAIN (f_groff);
6821
6822 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6823 f_stack, NULL_TREE);
6824 size = int_size_in_bytes (type);
6825 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6826
6827 dw_align = false;
6828 adjust = 0;
6829 if (aarch64_vfp_is_call_or_return_candidate (mode,
6830 type,
6831 &ag_mode,
6832 &nregs,
6833 &is_ha))
6834 {
6835 /* TYPE passed in fp/simd registers. */
6836 if (TARGET_GENERAL_REGS_ONLY)
6837 sorry ("%qs and floating point or vector arguments",
6838 "-mgeneral-regs-only");
6839
6840 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6841 unshare_expr (valist), f_vrtop, NULL_TREE);
6842 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6843 unshare_expr (valist), f_vroff, NULL_TREE);
6844
6845 rsize = nregs * UNITS_PER_VREG;
6846
6847 if (is_ha)
6848 {
6849 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6850 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6851 }
6852 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6853 && size < UNITS_PER_VREG)
6854 {
6855 adjust = UNITS_PER_VREG - size;
6856 }
6857 }
6858 else
6859 {
6860 /* TYPE passed in general registers. */
6861 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6862 unshare_expr (valist), f_grtop, NULL_TREE);
6863 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6864 unshare_expr (valist), f_groff, NULL_TREE);
6865 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6866 nregs = rsize / UNITS_PER_WORD;
6867
6868 if (align > 8)
6869 dw_align = true;
6870
6871 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6872 && size < UNITS_PER_WORD)
6873 {
6874 adjust = UNITS_PER_WORD - size;
6875 }
6876 }
6877
6878 /* Get a local temporary for the field value. */
6879 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6880
6881 /* Emit code to branch if off >= 0. */
6882 t = build2 (GE_EXPR, boolean_type_node, off,
6883 build_int_cst (TREE_TYPE (off), 0));
6884 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6885
6886 if (dw_align)
6887 {
6888 /* Emit: offs = (offs + 15) & -16. */
6889 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6890 build_int_cst (TREE_TYPE (off), 15));
6891 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6892 build_int_cst (TREE_TYPE (off), -16));
6893 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6894 }
6895 else
6896 roundup = NULL;
6897
6898 /* Update ap.__[g|v]r_offs */
6899 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6900 build_int_cst (TREE_TYPE (off), rsize));
6901 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6902
6903 /* String up. */
6904 if (roundup)
6905 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6906
6907 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6908 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6909 build_int_cst (TREE_TYPE (f_off), 0));
6910 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6911
6912 /* String up: make sure the assignment happens before the use. */
6913 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6914 COND_EXPR_ELSE (cond1) = t;
6915
6916 /* Prepare the trees handling the argument that is passed on the stack;
6917 the top level node will store in ON_STACK. */
6918 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6919 if (align > 8)
6920 {
6921 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6922 t = fold_convert (intDI_type_node, arg);
6923 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6924 build_int_cst (TREE_TYPE (t), 15));
6925 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6926 build_int_cst (TREE_TYPE (t), -16));
6927 t = fold_convert (TREE_TYPE (arg), t);
6928 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6929 }
6930 else
6931 roundup = NULL;
6932 /* Advance ap.__stack */
6933 t = fold_convert (intDI_type_node, arg);
6934 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6935 build_int_cst (TREE_TYPE (t), size + 7));
6936 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6937 build_int_cst (TREE_TYPE (t), -8));
6938 t = fold_convert (TREE_TYPE (arg), t);
6939 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6940 /* String up roundup and advance. */
6941 if (roundup)
6942 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6943 /* String up with arg */
6944 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6945 /* Big-endianness related address adjustment. */
6946 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6947 && size < UNITS_PER_WORD)
6948 {
6949 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6950 size_int (UNITS_PER_WORD - size));
6951 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6952 }
6953
6954 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6955 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6956
6957 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6958 t = off;
6959 if (adjust)
6960 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6961 build_int_cst (TREE_TYPE (off), adjust));
6962
6963 t = fold_convert (sizetype, t);
6964 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6965
6966 if (is_ha)
6967 {
6968 /* type ha; // treat as "struct {ftype field[n];}"
6969 ... [computing offs]
6970 for (i = 0; i <nregs; ++i, offs += 16)
6971 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6972 return ha; */
6973 int i;
6974 tree tmp_ha, field_t, field_ptr_t;
6975
6976 /* Declare a local variable. */
6977 tmp_ha = create_tmp_var_raw (type, "ha");
6978 gimple_add_tmp_var (tmp_ha);
6979
6980 /* Establish the base type. */
6981 switch (ag_mode)
6982 {
6983 case SFmode:
6984 field_t = float_type_node;
6985 field_ptr_t = float_ptr_type_node;
6986 break;
6987 case DFmode:
6988 field_t = double_type_node;
6989 field_ptr_t = double_ptr_type_node;
6990 break;
6991 case TFmode:
6992 field_t = long_double_type_node;
6993 field_ptr_t = long_double_ptr_type_node;
6994 break;
6995/* The half precision and quad precision are not fully supported yet. Enable
6996 the following code after the support is complete. Need to find the correct
6997 type node for __fp16 *. */
6998#if 0
6999 case HFmode:
7000 field_t = float_type_node;
7001 field_ptr_t = float_ptr_type_node;
7002 break;
7003#endif
7004 case V2SImode:
7005 case V4SImode:
7006 {
7007 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7008 field_t = build_vector_type_for_mode (innertype, ag_mode);
7009 field_ptr_t = build_pointer_type (field_t);
7010 }
7011 break;
7012 default:
7013 gcc_assert (0);
7014 }
7015
7016 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7017 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7018 addr = t;
7019 t = fold_convert (field_ptr_t, addr);
7020 t = build2 (MODIFY_EXPR, field_t,
7021 build1 (INDIRECT_REF, field_t, tmp_ha),
7022 build1 (INDIRECT_REF, field_t, t));
7023
7024 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7025 for (i = 1; i < nregs; ++i)
7026 {
7027 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7028 u = fold_convert (field_ptr_t, addr);
7029 u = build2 (MODIFY_EXPR, field_t,
7030 build2 (MEM_REF, field_t, tmp_ha,
7031 build_int_cst (field_ptr_t,
7032 (i *
7033 int_size_in_bytes (field_t)))),
7034 build1 (INDIRECT_REF, field_t, u));
7035 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7036 }
7037
7038 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7039 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7040 }
7041
7042 COND_EXPR_ELSE (cond2) = t;
7043 addr = fold_convert (build_pointer_type (type), cond1);
7044 addr = build_va_arg_indirect_ref (addr);
7045
7046 if (indirect_p)
7047 addr = build_va_arg_indirect_ref (addr);
7048
7049 return addr;
7050}
7051
7052/* Implement TARGET_SETUP_INCOMING_VARARGS. */
7053
7054static void
7055aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7056 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7057 int no_rtl)
7058{
7059 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7060 CUMULATIVE_ARGS local_cum;
7061 int gr_saved, vr_saved;
7062
7063 /* The caller has advanced CUM up to, but not beyond, the last named
7064 argument. Advance a local copy of CUM past the last "real" named
7065 argument, to find out how many registers are left over. */
7066 local_cum = *cum;
7067 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7068
7069 /* Found out how many registers we need to save. */
7070 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7071 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7072
7073 if (TARGET_GENERAL_REGS_ONLY)
7074 {
7075 if (local_cum.aapcs_nvrn > 0)
7076 sorry ("%qs and floating point or vector arguments",
7077 "-mgeneral-regs-only");
7078 vr_saved = 0;
7079 }
7080
7081 if (!no_rtl)
7082 {
7083 if (gr_saved > 0)
7084 {
7085 rtx ptr, mem;
7086
7087 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7088 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7089 - gr_saved * UNITS_PER_WORD);
7090 mem = gen_frame_mem (BLKmode, ptr);
7091 set_mem_alias_set (mem, get_varargs_alias_set ());
7092
7093 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7094 mem, gr_saved);
7095 }
7096 if (vr_saved > 0)
7097 {
7098 /* We can't use move_block_from_reg, because it will use
7099 the wrong mode, storing D regs only. */
7100 enum machine_mode mode = TImode;
7101 int off, i;
7102
7103 /* Set OFF to the offset from virtual_incoming_args_rtx of
7104 the first vector register. The VR save area lies below
7105 the GR one, and is aligned to 16 bytes. */
7106 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7107 STACK_BOUNDARY / BITS_PER_UNIT);
7108 off -= vr_saved * UNITS_PER_VREG;
7109
7110 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7111 {
7112 rtx ptr, mem;
7113
7114 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7115 mem = gen_frame_mem (mode, ptr);
7116 set_mem_alias_set (mem, get_varargs_alias_set ());
7117 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7118 off += UNITS_PER_VREG;
7119 }
7120 }
7121 }
7122
7123 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7124 any complication of having crtl->args.pretend_args_size changed. */
8799637a 7125 cfun->machine->frame.saved_varargs_size
43e9d192
IB
7126 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7127 STACK_BOUNDARY / BITS_PER_UNIT)
7128 + vr_saved * UNITS_PER_VREG);
7129}
7130
7131static void
7132aarch64_conditional_register_usage (void)
7133{
7134 int i;
7135 if (!TARGET_FLOAT)
7136 {
7137 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7138 {
7139 fixed_regs[i] = 1;
7140 call_used_regs[i] = 1;
7141 }
7142 }
7143}
7144
7145/* Walk down the type tree of TYPE counting consecutive base elements.
7146 If *MODEP is VOIDmode, then set it to the first valid floating point
7147 type. If a non-floating point type is found, or if a floating point
7148 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7149 otherwise return the count in the sub-tree. */
7150static int
7151aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7152{
7153 enum machine_mode mode;
7154 HOST_WIDE_INT size;
7155
7156 switch (TREE_CODE (type))
7157 {
7158 case REAL_TYPE:
7159 mode = TYPE_MODE (type);
7160 if (mode != DFmode && mode != SFmode && mode != TFmode)
7161 return -1;
7162
7163 if (*modep == VOIDmode)
7164 *modep = mode;
7165
7166 if (*modep == mode)
7167 return 1;
7168
7169 break;
7170
7171 case COMPLEX_TYPE:
7172 mode = TYPE_MODE (TREE_TYPE (type));
7173 if (mode != DFmode && mode != SFmode && mode != TFmode)
7174 return -1;
7175
7176 if (*modep == VOIDmode)
7177 *modep = mode;
7178
7179 if (*modep == mode)
7180 return 2;
7181
7182 break;
7183
7184 case VECTOR_TYPE:
7185 /* Use V2SImode and V4SImode as representatives of all 64-bit
7186 and 128-bit vector types. */
7187 size = int_size_in_bytes (type);
7188 switch (size)
7189 {
7190 case 8:
7191 mode = V2SImode;
7192 break;
7193 case 16:
7194 mode = V4SImode;
7195 break;
7196 default:
7197 return -1;
7198 }
7199
7200 if (*modep == VOIDmode)
7201 *modep = mode;
7202
7203 /* Vector modes are considered to be opaque: two vectors are
7204 equivalent for the purposes of being homogeneous aggregates
7205 if they are the same size. */
7206 if (*modep == mode)
7207 return 1;
7208
7209 break;
7210
7211 case ARRAY_TYPE:
7212 {
7213 int count;
7214 tree index = TYPE_DOMAIN (type);
7215
807e902e
KZ
7216 /* Can't handle incomplete types nor sizes that are not
7217 fixed. */
7218 if (!COMPLETE_TYPE_P (type)
7219 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7220 return -1;
7221
7222 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7223 if (count == -1
7224 || !index
7225 || !TYPE_MAX_VALUE (index)
cc269bb6 7226 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
43e9d192 7227 || !TYPE_MIN_VALUE (index)
cc269bb6 7228 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
43e9d192
IB
7229 || count < 0)
7230 return -1;
7231
ae7e9ddd
RS
7232 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7233 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
43e9d192
IB
7234
7235 /* There must be no padding. */
807e902e 7236 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7237 return -1;
7238
7239 return count;
7240 }
7241
7242 case RECORD_TYPE:
7243 {
7244 int count = 0;
7245 int sub_count;
7246 tree field;
7247
807e902e
KZ
7248 /* Can't handle incomplete types nor sizes that are not
7249 fixed. */
7250 if (!COMPLETE_TYPE_P (type)
7251 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7252 return -1;
7253
7254 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7255 {
7256 if (TREE_CODE (field) != FIELD_DECL)
7257 continue;
7258
7259 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7260 if (sub_count < 0)
7261 return -1;
7262 count += sub_count;
7263 }
7264
7265 /* There must be no padding. */
807e902e 7266 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7267 return -1;
7268
7269 return count;
7270 }
7271
7272 case UNION_TYPE:
7273 case QUAL_UNION_TYPE:
7274 {
7275 /* These aren't very interesting except in a degenerate case. */
7276 int count = 0;
7277 int sub_count;
7278 tree field;
7279
807e902e
KZ
7280 /* Can't handle incomplete types nor sizes that are not
7281 fixed. */
7282 if (!COMPLETE_TYPE_P (type)
7283 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
43e9d192
IB
7284 return -1;
7285
7286 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7287 {
7288 if (TREE_CODE (field) != FIELD_DECL)
7289 continue;
7290
7291 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7292 if (sub_count < 0)
7293 return -1;
7294 count = count > sub_count ? count : sub_count;
7295 }
7296
7297 /* There must be no padding. */
807e902e 7298 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
43e9d192
IB
7299 return -1;
7300
7301 return count;
7302 }
7303
7304 default:
7305 break;
7306 }
7307
7308 return -1;
7309}
7310
38e8f663
YR
7311/* Return true if we use LRA instead of reload pass. */
7312static bool
7313aarch64_lra_p (void)
7314{
7315 return aarch64_lra_flag;
7316}
7317
43e9d192
IB
7318/* Return TRUE if the type, as described by TYPE and MODE, is a composite
7319 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7320 array types. The C99 floating-point complex types are also considered
7321 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7322 types, which are GCC extensions and out of the scope of AAPCS64, are
7323 treated as composite types here as well.
7324
7325 Note that MODE itself is not sufficient in determining whether a type
7326 is such a composite type or not. This is because
7327 stor-layout.c:compute_record_mode may have already changed the MODE
7328 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7329 structure with only one field may have its MODE set to the mode of the
7330 field. Also an integer mode whose size matches the size of the
7331 RECORD_TYPE type may be used to substitute the original mode
7332 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7333 solely relied on. */
7334
7335static bool
7336aarch64_composite_type_p (const_tree type,
7337 enum machine_mode mode)
7338{
7339 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7340 return true;
7341
7342 if (mode == BLKmode
7343 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7344 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7345 return true;
7346
7347 return false;
7348}
7349
7350/* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7351 type as described in AAPCS64 \S 4.1.2.
7352
7353 See the comment above aarch64_composite_type_p for the notes on MODE. */
7354
7355static bool
7356aarch64_short_vector_p (const_tree type,
7357 enum machine_mode mode)
7358{
7359 HOST_WIDE_INT size = -1;
7360
7361 if (type && TREE_CODE (type) == VECTOR_TYPE)
7362 size = int_size_in_bytes (type);
7363 else if (!aarch64_composite_type_p (type, mode)
7364 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7365 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7366 size = GET_MODE_SIZE (mode);
7367
7368 return (size == 8 || size == 16) ? true : false;
7369}
7370
7371/* Return TRUE if an argument, whose type is described by TYPE and MODE,
7372 shall be passed or returned in simd/fp register(s) (providing these
7373 parameter passing registers are available).
7374
7375 Upon successful return, *COUNT returns the number of needed registers,
7376 *BASE_MODE returns the mode of the individual register and when IS_HAF
7377 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7378 floating-point aggregate or a homogeneous short-vector aggregate. */
7379
7380static bool
7381aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7382 const_tree type,
7383 enum machine_mode *base_mode,
7384 int *count,
7385 bool *is_ha)
7386{
7387 enum machine_mode new_mode = VOIDmode;
7388 bool composite_p = aarch64_composite_type_p (type, mode);
7389
7390 if (is_ha != NULL) *is_ha = false;
7391
7392 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7393 || aarch64_short_vector_p (type, mode))
7394 {
7395 *count = 1;
7396 new_mode = mode;
7397 }
7398 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7399 {
7400 if (is_ha != NULL) *is_ha = true;
7401 *count = 2;
7402 new_mode = GET_MODE_INNER (mode);
7403 }
7404 else if (type && composite_p)
7405 {
7406 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7407
7408 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7409 {
7410 if (is_ha != NULL) *is_ha = true;
7411 *count = ag_count;
7412 }
7413 else
7414 return false;
7415 }
7416 else
7417 return false;
7418
7419 *base_mode = new_mode;
7420 return true;
7421}
7422
7423/* Implement TARGET_STRUCT_VALUE_RTX. */
7424
7425static rtx
7426aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7427 int incoming ATTRIBUTE_UNUSED)
7428{
7429 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7430}
7431
7432/* Implements target hook vector_mode_supported_p. */
7433static bool
7434aarch64_vector_mode_supported_p (enum machine_mode mode)
7435{
7436 if (TARGET_SIMD
7437 && (mode == V4SImode || mode == V8HImode
7438 || mode == V16QImode || mode == V2DImode
7439 || mode == V2SImode || mode == V4HImode
7440 || mode == V8QImode || mode == V2SFmode
ad7d90cc
AL
7441 || mode == V4SFmode || mode == V2DFmode
7442 || mode == V1DFmode))
43e9d192
IB
7443 return true;
7444
7445 return false;
7446}
7447
b7342d25
IB
7448/* Return appropriate SIMD container
7449 for MODE within a vector of WIDTH bits. */
43e9d192 7450static enum machine_mode
b7342d25 7451aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
43e9d192 7452{
b7342d25 7453 gcc_assert (width == 64 || width == 128);
43e9d192 7454 if (TARGET_SIMD)
b7342d25
IB
7455 {
7456 if (width == 128)
7457 switch (mode)
7458 {
7459 case DFmode:
7460 return V2DFmode;
7461 case SFmode:
7462 return V4SFmode;
7463 case SImode:
7464 return V4SImode;
7465 case HImode:
7466 return V8HImode;
7467 case QImode:
7468 return V16QImode;
7469 case DImode:
7470 return V2DImode;
7471 default:
7472 break;
7473 }
7474 else
7475 switch (mode)
7476 {
7477 case SFmode:
7478 return V2SFmode;
7479 case SImode:
7480 return V2SImode;
7481 case HImode:
7482 return V4HImode;
7483 case QImode:
7484 return V8QImode;
7485 default:
7486 break;
7487 }
7488 }
43e9d192
IB
7489 return word_mode;
7490}
7491
b7342d25
IB
7492/* Return 128-bit container as the preferred SIMD mode for MODE. */
7493static enum machine_mode
7494aarch64_preferred_simd_mode (enum machine_mode mode)
7495{
7496 return aarch64_simd_container_mode (mode, 128);
7497}
7498
3b357264
JG
7499/* Return the bitmask of possible vector sizes for the vectorizer
7500 to iterate over. */
7501static unsigned int
7502aarch64_autovectorize_vector_sizes (void)
7503{
7504 return (16 | 8);
7505}
7506
c6fc9e43
YZ
7507/* A table to help perform AArch64-specific name mangling for AdvSIMD
7508 vector types in order to conform to the AAPCS64 (see "Procedure
7509 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7510 qualify for emission with the mangled names defined in that document,
7511 a vector type must not only be of the correct mode but also be
7512 composed of AdvSIMD vector element types (e.g.
7513 _builtin_aarch64_simd_qi); these types are registered by
7514 aarch64_init_simd_builtins (). In other words, vector types defined
7515 in other ways e.g. via vector_size attribute will get default
7516 mangled names. */
7517typedef struct
7518{
7519 enum machine_mode mode;
7520 const char *element_type_name;
7521 const char *mangled_name;
7522} aarch64_simd_mangle_map_entry;
7523
7524static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7525 /* 64-bit containerized types. */
7526 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7527 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7528 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7529 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7530 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7531 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7532 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
096c59be
AL
7533 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7534 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
c6a29a09 7535 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
c6fc9e43
YZ
7536 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7537 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7538 /* 128-bit containerized types. */
7539 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7540 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7541 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7542 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7543 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7544 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7545 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7546 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7547 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7548 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7549 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7550 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7baa225d 7551 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
c6fc9e43
YZ
7552 { VOIDmode, NULL, NULL }
7553};
7554
ac2b960f
YZ
7555/* Implement TARGET_MANGLE_TYPE. */
7556
6f549691 7557static const char *
ac2b960f
YZ
7558aarch64_mangle_type (const_tree type)
7559{
7560 /* The AArch64 ABI documents say that "__va_list" has to be
7561 managled as if it is in the "std" namespace. */
7562 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7563 return "St9__va_list";
7564
c6fc9e43
YZ
7565 /* Check the mode of the vector type, and the name of the vector
7566 element type, against the table. */
7567 if (TREE_CODE (type) == VECTOR_TYPE)
7568 {
7569 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7570
7571 while (pos->mode != VOIDmode)
7572 {
7573 tree elt_type = TREE_TYPE (type);
7574
7575 if (pos->mode == TYPE_MODE (type)
7576 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7577 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7578 pos->element_type_name))
7579 return pos->mangled_name;
7580
7581 pos++;
7582 }
7583 }
7584
ac2b960f
YZ
7585 /* Use the default mangling. */
7586 return NULL;
7587}
7588
75cf1494
KT
7589static int
7590is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
7591{
7592 return MEM_P (*x);
7593}
7594
7595static bool
7596is_memory_op (rtx_insn *mem_insn)
7597{
7598 rtx pattern = PATTERN (mem_insn);
7599 return for_each_rtx (&pattern, is_mem_p, NULL);
7600}
7601
7602/* Find the first rtx_insn before insn that will generate an assembly
7603 instruction. */
7604
7605static rtx_insn *
7606aarch64_prev_real_insn (rtx_insn *insn)
7607{
7608 if (!insn)
7609 return NULL;
7610
7611 do
7612 {
7613 insn = prev_real_insn (insn);
7614 }
7615 while (insn && recog_memoized (insn) < 0);
7616
7617 return insn;
7618}
7619
7620static bool
7621is_madd_op (enum attr_type t1)
7622{
7623 unsigned int i;
7624 /* A number of these may be AArch32 only. */
7625 enum attr_type mlatypes[] = {
7626 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
7627 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
7628 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
7629 };
7630
7631 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
7632 {
7633 if (t1 == mlatypes[i])
7634 return true;
7635 }
7636
7637 return false;
7638}
7639
7640/* Check if there is a register dependency between a load and the insn
7641 for which we hold recog_data. */
7642
7643static bool
7644dep_between_memop_and_curr (rtx memop)
7645{
7646 rtx load_reg;
7647 int opno;
7648
7649 if (!memop)
7650 return false;
7651
7652 if (!REG_P (SET_DEST (memop)))
7653 return false;
7654
7655 load_reg = SET_DEST (memop);
7656 for (opno = 0; opno < recog_data.n_operands; opno++)
7657 {
7658 rtx operand = recog_data.operand[opno];
7659 if (REG_P (operand)
7660 && reg_overlap_mentioned_p (load_reg, operand))
7661 return true;
7662
7663 }
7664 return false;
7665}
7666
7667bool
7668aarch64_madd_needs_nop (rtx_insn* insn)
7669{
7670 enum attr_type attr_type;
7671 rtx_insn *prev;
7672 rtx body;
7673
7674 if (!aarch64_fix_a53_err835769)
7675 return false;
7676
7677 if (recog_memoized (insn) < 0)
7678 return false;
7679
7680 attr_type = get_attr_type (insn);
7681 if (!is_madd_op (attr_type))
7682 return false;
7683
7684 prev = aarch64_prev_real_insn (insn);
7685 if (!prev)
7686 return false;
7687
7688 body = single_set (prev);
7689
7690 /* If the previous insn is a memory op and there is no dependency between
7691 it and the madd, emit a nop between them. If we know the previous insn is
7692 a memory op but body is NULL, emit the nop to be safe, it's probably a
7693 load/store pair insn. */
7694 if (is_memory_op (prev)
7695 && GET_MODE (recog_data.operand[0]) == DImode
7696 && (!dep_between_memop_and_curr (body)))
7697 return true;
7698
7699 return false;
7700
7701}
7702
7703void
7704aarch64_final_prescan_insn (rtx_insn *insn)
7705{
7706 if (aarch64_madd_needs_nop (insn))
7707 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
7708}
7709
7710
43e9d192 7711/* Return the equivalent letter for size. */
81c2dfb9 7712static char
43e9d192
IB
7713sizetochar (int size)
7714{
7715 switch (size)
7716 {
7717 case 64: return 'd';
7718 case 32: return 's';
7719 case 16: return 'h';
7720 case 8 : return 'b';
7721 default: gcc_unreachable ();
7722 }
7723}
7724
3520f7cc
JG
7725/* Return true iff x is a uniform vector of floating-point
7726 constants, and the constant can be represented in
7727 quarter-precision form. Note, as aarch64_float_const_representable
7728 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7729static bool
7730aarch64_vect_float_const_representable_p (rtx x)
7731{
7732 int i = 0;
7733 REAL_VALUE_TYPE r0, ri;
7734 rtx x0, xi;
7735
7736 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7737 return false;
7738
7739 x0 = CONST_VECTOR_ELT (x, 0);
7740 if (!CONST_DOUBLE_P (x0))
7741 return false;
7742
7743 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7744
7745 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7746 {
7747 xi = CONST_VECTOR_ELT (x, i);
7748 if (!CONST_DOUBLE_P (xi))
7749 return false;
7750
7751 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7752 if (!REAL_VALUES_EQUAL (r0, ri))
7753 return false;
7754 }
7755
7756 return aarch64_float_const_representable_p (x0);
7757}
7758
d8edd899 7759/* Return true for valid and false for invalid. */
3ea63f60 7760bool
48063b9d
IB
7761aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7762 struct simd_immediate_info *info)
43e9d192
IB
7763{
7764#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7765 matches = 1; \
7766 for (i = 0; i < idx; i += (STRIDE)) \
7767 if (!(TEST)) \
7768 matches = 0; \
7769 if (matches) \
7770 { \
7771 immtype = (CLASS); \
7772 elsize = (ELSIZE); \
43e9d192
IB
7773 eshift = (SHIFT); \
7774 emvn = (NEG); \
7775 break; \
7776 }
7777
7778 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7779 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7780 unsigned char bytes[16];
43e9d192
IB
7781 int immtype = -1, matches;
7782 unsigned int invmask = inverse ? 0xff : 0;
7783 int eshift, emvn;
7784
43e9d192 7785 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
3520f7cc 7786 {
81c2dfb9
IB
7787 if (! (aarch64_simd_imm_zero_p (op, mode)
7788 || aarch64_vect_float_const_representable_p (op)))
d8edd899 7789 return false;
3520f7cc 7790
48063b9d
IB
7791 if (info)
7792 {
7793 info->value = CONST_VECTOR_ELT (op, 0);
81c2dfb9 7794 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
48063b9d
IB
7795 info->mvn = false;
7796 info->shift = 0;
7797 }
3520f7cc 7798
d8edd899 7799 return true;
3520f7cc 7800 }
43e9d192
IB
7801
7802 /* Splat vector constant out into a byte vector. */
7803 for (i = 0; i < n_elts; i++)
7804 {
4b1e108c
AL
7805 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7806 it must be laid out in the vector register in reverse order. */
7807 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
43e9d192
IB
7808 unsigned HOST_WIDE_INT elpart;
7809 unsigned int part, parts;
7810
4aa81c2e 7811 if (CONST_INT_P (el))
43e9d192
IB
7812 {
7813 elpart = INTVAL (el);
7814 parts = 1;
7815 }
7816 else if (GET_CODE (el) == CONST_DOUBLE)
7817 {
7818 elpart = CONST_DOUBLE_LOW (el);
7819 parts = 2;
7820 }
7821 else
7822 gcc_unreachable ();
7823
7824 for (part = 0; part < parts; part++)
7825 {
7826 unsigned int byte;
7827 for (byte = 0; byte < innersize; byte++)
7828 {
7829 bytes[idx++] = (elpart & 0xff) ^ invmask;
7830 elpart >>= BITS_PER_UNIT;
7831 }
7832 if (GET_CODE (el) == CONST_DOUBLE)
7833 elpart = CONST_DOUBLE_HIGH (el);
7834 }
7835 }
7836
7837 /* Sanity check. */
7838 gcc_assert (idx == GET_MODE_SIZE (mode));
7839
7840 do
7841 {
7842 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7843 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7844
7845 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7846 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7847
7848 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7849 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7850
7851 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7852 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7853
7854 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7855
7856 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7857
7858 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7859 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7860
7861 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7862 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7863
7864 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7865 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7866
7867 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7868 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7869
7870 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7871
7872 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7873
7874 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
e4f0f84d 7875 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
43e9d192
IB
7876
7877 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
e4f0f84d 7878 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
43e9d192
IB
7879
7880 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
e4f0f84d 7881 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
43e9d192
IB
7882
7883 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
e4f0f84d 7884 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
43e9d192
IB
7885
7886 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7887
7888 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7889 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7890 }
7891 while (0);
7892
e4f0f84d 7893 if (immtype == -1)
d8edd899 7894 return false;
43e9d192 7895
48063b9d 7896 if (info)
43e9d192 7897 {
48063b9d 7898 info->element_width = elsize;
48063b9d
IB
7899 info->mvn = emvn != 0;
7900 info->shift = eshift;
7901
43e9d192
IB
7902 unsigned HOST_WIDE_INT imm = 0;
7903
e4f0f84d
TB
7904 if (immtype >= 12 && immtype <= 15)
7905 info->msl = true;
7906
43e9d192
IB
7907 /* Un-invert bytes of recognized vector, if necessary. */
7908 if (invmask != 0)
7909 for (i = 0; i < idx; i++)
7910 bytes[i] ^= invmask;
7911
7912 if (immtype == 17)
7913 {
7914 /* FIXME: Broken on 32-bit H_W_I hosts. */
7915 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7916
7917 for (i = 0; i < 8; i++)
7918 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7919 << (i * BITS_PER_UNIT);
7920
43e9d192 7921
48063b9d
IB
7922 info->value = GEN_INT (imm);
7923 }
7924 else
7925 {
7926 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7927 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
43e9d192
IB
7928
7929 /* Construct 'abcdefgh' because the assembler cannot handle
48063b9d
IB
7930 generic constants. */
7931 if (info->mvn)
43e9d192 7932 imm = ~imm;
48063b9d
IB
7933 imm = (imm >> info->shift) & 0xff;
7934 info->value = GEN_INT (imm);
7935 }
43e9d192
IB
7936 }
7937
48063b9d 7938 return true;
43e9d192
IB
7939#undef CHECK
7940}
7941
43e9d192
IB
7942/* Check of immediate shift constants are within range. */
7943bool
7944aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7945{
7946 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7947 if (left)
ddeabd3e 7948 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
43e9d192 7949 else
ddeabd3e 7950 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
43e9d192
IB
7951}
7952
3520f7cc
JG
7953/* Return true if X is a uniform vector where all elements
7954 are either the floating-point constant 0.0 or the
7955 integer constant 0. */
43e9d192
IB
7956bool
7957aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7958{
3520f7cc 7959 return x == CONST0_RTX (mode);
43e9d192
IB
7960}
7961
7962bool
7963aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7964{
7965 HOST_WIDE_INT imm = INTVAL (x);
7966 int i;
7967
7968 for (i = 0; i < 8; i++)
7969 {
7970 unsigned int byte = imm & 0xff;
7971 if (byte != 0xff && byte != 0)
7972 return false;
7973 imm >>= 8;
7974 }
7975
7976 return true;
7977}
7978
83f8c414
CSS
7979bool
7980aarch64_mov_operand_p (rtx x,
a5350ddc 7981 enum aarch64_symbol_context context,
83f8c414
CSS
7982 enum machine_mode mode)
7983{
83f8c414
CSS
7984 if (GET_CODE (x) == HIGH
7985 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7986 return true;
7987
7988 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7989 return true;
7990
7991 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7992 return true;
7993
a5350ddc
CSS
7994 return aarch64_classify_symbolic_expression (x, context)
7995 == SYMBOL_TINY_ABSOLUTE;
83f8c414
CSS
7996}
7997
43e9d192
IB
7998/* Return a const_int vector of VAL. */
7999rtx
8000aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
8001{
8002 int nunits = GET_MODE_NUNITS (mode);
8003 rtvec v = rtvec_alloc (nunits);
8004 int i;
8005
8006 for (i=0; i < nunits; i++)
8007 RTVEC_ELT (v, i) = GEN_INT (val);
8008
8009 return gen_rtx_CONST_VECTOR (mode, v);
8010}
8011
051d0e2f
SN
8012/* Check OP is a legal scalar immediate for the MOVI instruction. */
8013
8014bool
8015aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
8016{
8017 enum machine_mode vmode;
8018
8019 gcc_assert (!VECTOR_MODE_P (mode));
8020 vmode = aarch64_preferred_simd_mode (mode);
8021 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
48063b9d 8022 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
051d0e2f
SN
8023}
8024
988fa693
JG
8025/* Construct and return a PARALLEL RTX vector with elements numbering the
8026 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8027 the vector - from the perspective of the architecture. This does not
8028 line up with GCC's perspective on lane numbers, so we end up with
8029 different masks depending on our target endian-ness. The diagram
8030 below may help. We must draw the distinction when building masks
8031 which select one half of the vector. An instruction selecting
8032 architectural low-lanes for a big-endian target, must be described using
8033 a mask selecting GCC high-lanes.
8034
8035 Big-Endian Little-Endian
8036
8037GCC 0 1 2 3 3 2 1 0
8038 | x | x | x | x | | x | x | x | x |
8039Architecture 3 2 1 0 3 2 1 0
8040
8041Low Mask: { 2, 3 } { 0, 1 }
8042High Mask: { 0, 1 } { 2, 3 }
8043*/
8044
43e9d192
IB
8045rtx
8046aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
8047{
8048 int nunits = GET_MODE_NUNITS (mode);
8049 rtvec v = rtvec_alloc (nunits / 2);
988fa693
JG
8050 int high_base = nunits / 2;
8051 int low_base = 0;
8052 int base;
43e9d192
IB
8053 rtx t1;
8054 int i;
8055
988fa693
JG
8056 if (BYTES_BIG_ENDIAN)
8057 base = high ? low_base : high_base;
8058 else
8059 base = high ? high_base : low_base;
8060
8061 for (i = 0; i < nunits / 2; i++)
43e9d192
IB
8062 RTVEC_ELT (v, i) = GEN_INT (base + i);
8063
8064 t1 = gen_rtx_PARALLEL (mode, v);
8065 return t1;
8066}
8067
988fa693
JG
8068/* Check OP for validity as a PARALLEL RTX vector with elements
8069 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8070 from the perspective of the architecture. See the diagram above
8071 aarch64_simd_vect_par_cnst_half for more details. */
8072
8073bool
8074aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
8075 bool high)
8076{
8077 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8078 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8079 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8080 int i = 0;
8081
8082 if (!VECTOR_MODE_P (mode))
8083 return false;
8084
8085 if (count_op != count_ideal)
8086 return false;
8087
8088 for (i = 0; i < count_ideal; i++)
8089 {
8090 rtx elt_op = XVECEXP (op, 0, i);
8091 rtx elt_ideal = XVECEXP (ideal, 0, i);
8092
4aa81c2e 8093 if (!CONST_INT_P (elt_op)
988fa693
JG
8094 || INTVAL (elt_ideal) != INTVAL (elt_op))
8095 return false;
8096 }
8097 return true;
8098}
8099
43e9d192
IB
8100/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8101 HIGH (exclusive). */
8102void
8103aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8104{
8105 HOST_WIDE_INT lane;
4aa81c2e 8106 gcc_assert (CONST_INT_P (operand));
43e9d192
IB
8107 lane = INTVAL (operand);
8108
8109 if (lane < low || lane >= high)
8110 error ("lane out of range");
8111}
8112
43e9d192
IB
8113/* Emit code to place a AdvSIMD pair result in memory locations (with equal
8114 registers). */
8115void
8116aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
8117 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8118 rtx op1)
8119{
8120 rtx mem = gen_rtx_MEM (mode, destaddr);
8121 rtx tmp1 = gen_reg_rtx (mode);
8122 rtx tmp2 = gen_reg_rtx (mode);
8123
8124 emit_insn (intfn (tmp1, op1, tmp2));
8125
8126 emit_move_insn (mem, tmp1);
8127 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8128 emit_move_insn (mem, tmp2);
8129}
8130
8131/* Return TRUE if OP is a valid vector addressing mode. */
8132bool
8133aarch64_simd_mem_operand_p (rtx op)
8134{
8135 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
4aa81c2e 8136 || REG_P (XEXP (op, 0)));
43e9d192
IB
8137}
8138
8139/* Set up OPERANDS for a register copy from SRC to DEST, taking care
8140 not to early-clobber SRC registers in the process.
8141
8142 We assume that the operands described by SRC and DEST represent a
8143 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8144 number of components into which the copy has been decomposed. */
8145void
8146aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8147 rtx *src, unsigned int count)
8148{
8149 unsigned int i;
8150
8151 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8152 || REGNO (operands[0]) < REGNO (operands[1]))
8153 {
8154 for (i = 0; i < count; i++)
8155 {
8156 operands[2 * i] = dest[i];
8157 operands[2 * i + 1] = src[i];
8158 }
8159 }
8160 else
8161 {
8162 for (i = 0; i < count; i++)
8163 {
8164 operands[2 * i] = dest[count - i - 1];
8165 operands[2 * i + 1] = src[count - i - 1];
8166 }
8167 }
8168}
8169
8170/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8171 one of VSTRUCT modes: OI, CI or XI. */
8172int
647d790d 8173aarch64_simd_attr_length_move (rtx_insn *insn)
43e9d192 8174{
43e9d192
IB
8175 enum machine_mode mode;
8176
8177 extract_insn_cached (insn);
8178
8179 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8180 {
8181 mode = GET_MODE (recog_data.operand[0]);
8182 switch (mode)
8183 {
8184 case OImode:
8185 return 8;
8186 case CImode:
8187 return 12;
8188 case XImode:
8189 return 16;
8190 default:
8191 gcc_unreachable ();
8192 }
8193 }
8194 return 4;
8195}
8196
db0253a4
TB
8197/* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8198 alignment of a vector to 128 bits. */
8199static HOST_WIDE_INT
8200aarch64_simd_vector_alignment (const_tree type)
8201{
9439e9a1 8202 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
db0253a4
TB
8203 return MIN (align, 128);
8204}
8205
8206/* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8207static bool
8208aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8209{
8210 if (is_packed)
8211 return false;
8212
8213 /* We guarantee alignment for vectors up to 128-bits. */
8214 if (tree_int_cst_compare (TYPE_SIZE (type),
8215 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8216 return false;
8217
8218 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8219 return true;
8220}
8221
4369c11e
TB
8222/* If VALS is a vector constant that can be loaded into a register
8223 using DUP, generate instructions to do so and return an RTX to
8224 assign to the register. Otherwise return NULL_RTX. */
8225static rtx
8226aarch64_simd_dup_constant (rtx vals)
8227{
8228 enum machine_mode mode = GET_MODE (vals);
8229 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8230 int n_elts = GET_MODE_NUNITS (mode);
8231 bool all_same = true;
8232 rtx x;
8233 int i;
8234
8235 if (GET_CODE (vals) != CONST_VECTOR)
8236 return NULL_RTX;
8237
8238 for (i = 1; i < n_elts; ++i)
8239 {
8240 x = CONST_VECTOR_ELT (vals, i);
8241 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8242 all_same = false;
8243 }
8244
8245 if (!all_same)
8246 return NULL_RTX;
8247
8248 /* We can load this constant by using DUP and a constant in a
8249 single ARM register. This will be cheaper than a vector
8250 load. */
8251 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8252 return gen_rtx_VEC_DUPLICATE (mode, x);
8253}
8254
8255
8256/* Generate code to load VALS, which is a PARALLEL containing only
8257 constants (for vec_init) or CONST_VECTOR, efficiently into a
8258 register. Returns an RTX to copy into the register, or NULL_RTX
8259 for a PARALLEL that can not be converted into a CONST_VECTOR. */
1df3f464 8260static rtx
4369c11e
TB
8261aarch64_simd_make_constant (rtx vals)
8262{
8263 enum machine_mode mode = GET_MODE (vals);
8264 rtx const_dup;
8265 rtx const_vec = NULL_RTX;
8266 int n_elts = GET_MODE_NUNITS (mode);
8267 int n_const = 0;
8268 int i;
8269
8270 if (GET_CODE (vals) == CONST_VECTOR)
8271 const_vec = vals;
8272 else if (GET_CODE (vals) == PARALLEL)
8273 {
8274 /* A CONST_VECTOR must contain only CONST_INTs and
8275 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8276 Only store valid constants in a CONST_VECTOR. */
8277 for (i = 0; i < n_elts; ++i)
8278 {
8279 rtx x = XVECEXP (vals, 0, i);
8280 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8281 n_const++;
8282 }
8283 if (n_const == n_elts)
8284 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8285 }
8286 else
8287 gcc_unreachable ();
8288
8289 if (const_vec != NULL_RTX
48063b9d 8290 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
4369c11e
TB
8291 /* Load using MOVI/MVNI. */
8292 return const_vec;
8293 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8294 /* Loaded using DUP. */
8295 return const_dup;
8296 else if (const_vec != NULL_RTX)
8297 /* Load from constant pool. We can not take advantage of single-cycle
8298 LD1 because we need a PC-relative addressing mode. */
8299 return const_vec;
8300 else
8301 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8302 We can not construct an initializer. */
8303 return NULL_RTX;
8304}
8305
8306void
8307aarch64_expand_vector_init (rtx target, rtx vals)
8308{
8309 enum machine_mode mode = GET_MODE (target);
8310 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8311 int n_elts = GET_MODE_NUNITS (mode);
8312 int n_var = 0, one_var = -1;
8313 bool all_same = true;
8314 rtx x, mem;
8315 int i;
8316
8317 x = XVECEXP (vals, 0, 0);
8318 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8319 n_var = 1, one_var = 0;
8320
8321 for (i = 1; i < n_elts; ++i)
8322 {
8323 x = XVECEXP (vals, 0, i);
8324 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8325 ++n_var, one_var = i;
8326
8327 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8328 all_same = false;
8329 }
8330
8331 if (n_var == 0)
8332 {
8333 rtx constant = aarch64_simd_make_constant (vals);
8334 if (constant != NULL_RTX)
8335 {
8336 emit_move_insn (target, constant);
8337 return;
8338 }
8339 }
8340
8341 /* Splat a single non-constant element if we can. */
8342 if (all_same)
8343 {
8344 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8345 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8346 return;
8347 }
8348
8349 /* One field is non-constant. Load constant then overwrite varying
8350 field. This is more efficient than using the stack. */
8351 if (n_var == 1)
8352 {
8353 rtx copy = copy_rtx (vals);
8354 rtx index = GEN_INT (one_var);
8355 enum insn_code icode;
8356
8357 /* Load constant part of vector, substitute neighboring value for
8358 varying element. */
8359 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8360 aarch64_expand_vector_init (target, copy);
8361
8362 /* Insert variable. */
8363 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8364 icode = optab_handler (vec_set_optab, mode);
8365 gcc_assert (icode != CODE_FOR_nothing);
8366 emit_insn (GEN_FCN (icode) (target, x, index));
8367 return;
8368 }
8369
8370 /* Construct the vector in memory one field at a time
8371 and load the whole vector. */
8372 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8373 for (i = 0; i < n_elts; i++)
8374 emit_move_insn (adjust_address_nv (mem, inner_mode,
8375 i * GET_MODE_SIZE (inner_mode)),
8376 XVECEXP (vals, 0, i));
8377 emit_move_insn (target, mem);
8378
8379}
8380
43e9d192
IB
8381static unsigned HOST_WIDE_INT
8382aarch64_shift_truncation_mask (enum machine_mode mode)
8383{
8384 return
8385 (aarch64_vector_mode_supported_p (mode)
8386 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8387}
8388
8389#ifndef TLS_SECTION_ASM_FLAG
8390#define TLS_SECTION_ASM_FLAG 'T'
8391#endif
8392
8393void
8394aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8395 tree decl ATTRIBUTE_UNUSED)
8396{
8397 char flagchars[10], *f = flagchars;
8398
8399 /* If we have already declared this section, we can use an
8400 abbreviated form to switch back to it -- unless this section is
8401 part of a COMDAT groups, in which case GAS requires the full
8402 declaration every time. */
8403 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8404 && (flags & SECTION_DECLARED))
8405 {
8406 fprintf (asm_out_file, "\t.section\t%s\n", name);
8407 return;
8408 }
8409
8410 if (!(flags & SECTION_DEBUG))
8411 *f++ = 'a';
8412 if (flags & SECTION_WRITE)
8413 *f++ = 'w';
8414 if (flags & SECTION_CODE)
8415 *f++ = 'x';
8416 if (flags & SECTION_SMALL)
8417 *f++ = 's';
8418 if (flags & SECTION_MERGE)
8419 *f++ = 'M';
8420 if (flags & SECTION_STRINGS)
8421 *f++ = 'S';
8422 if (flags & SECTION_TLS)
8423 *f++ = TLS_SECTION_ASM_FLAG;
8424 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8425 *f++ = 'G';
8426 *f = '\0';
8427
8428 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8429
8430 if (!(flags & SECTION_NOTYPE))
8431 {
8432 const char *type;
8433 const char *format;
8434
8435 if (flags & SECTION_BSS)
8436 type = "nobits";
8437 else
8438 type = "progbits";
8439
8440#ifdef TYPE_OPERAND_FMT
8441 format = "," TYPE_OPERAND_FMT;
8442#else
8443 format = ",@%s";
8444#endif
8445
8446 fprintf (asm_out_file, format, type);
8447
8448 if (flags & SECTION_ENTSIZE)
8449 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8450 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8451 {
8452 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8453 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8454 else
8455 fprintf (asm_out_file, ",%s,comdat",
8456 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8457 }
8458 }
8459
8460 putc ('\n', asm_out_file);
8461}
8462
8463/* Select a format to encode pointers in exception handling data. */
8464int
8465aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8466{
8467 int type;
8468 switch (aarch64_cmodel)
8469 {
8470 case AARCH64_CMODEL_TINY:
8471 case AARCH64_CMODEL_TINY_PIC:
8472 case AARCH64_CMODEL_SMALL:
8473 case AARCH64_CMODEL_SMALL_PIC:
8474 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8475 for everything. */
8476 type = DW_EH_PE_sdata4;
8477 break;
8478 default:
8479 /* No assumptions here. 8-byte relocs required. */
8480 type = DW_EH_PE_sdata8;
8481 break;
8482 }
8483 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8484}
8485
0462169c
SN
8486/* Emit load exclusive. */
8487
8488static void
8489aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8490 rtx mem, rtx model_rtx)
8491{
8492 rtx (*gen) (rtx, rtx, rtx);
8493
8494 switch (mode)
8495 {
8496 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8497 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8498 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8499 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8500 default:
8501 gcc_unreachable ();
8502 }
8503
8504 emit_insn (gen (rval, mem, model_rtx));
8505}
8506
8507/* Emit store exclusive. */
8508
8509static void
8510aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8511 rtx rval, rtx mem, rtx model_rtx)
8512{
8513 rtx (*gen) (rtx, rtx, rtx, rtx);
8514
8515 switch (mode)
8516 {
8517 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8518 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8519 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8520 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8521 default:
8522 gcc_unreachable ();
8523 }
8524
8525 emit_insn (gen (bval, rval, mem, model_rtx));
8526}
8527
8528/* Mark the previous jump instruction as unlikely. */
8529
8530static void
8531aarch64_emit_unlikely_jump (rtx insn)
8532{
e5af9ddd 8533 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
0462169c
SN
8534
8535 insn = emit_jump_insn (insn);
e5af9ddd 8536 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
0462169c
SN
8537}
8538
8539/* Expand a compare and swap pattern. */
8540
8541void
8542aarch64_expand_compare_and_swap (rtx operands[])
8543{
8544 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8545 enum machine_mode mode, cmp_mode;
8546 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8547
8548 bval = operands[0];
8549 rval = operands[1];
8550 mem = operands[2];
8551 oldval = operands[3];
8552 newval = operands[4];
8553 is_weak = operands[5];
8554 mod_s = operands[6];
8555 mod_f = operands[7];
8556 mode = GET_MODE (mem);
8557 cmp_mode = mode;
8558
8559 /* Normally the succ memory model must be stronger than fail, but in the
8560 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8561 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8562
8563 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8564 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8565 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8566
8567 switch (mode)
8568 {
8569 case QImode:
8570 case HImode:
8571 /* For short modes, we're going to perform the comparison in SImode,
8572 so do the zero-extension now. */
8573 cmp_mode = SImode;
8574 rval = gen_reg_rtx (SImode);
8575 oldval = convert_modes (SImode, mode, oldval, true);
8576 /* Fall through. */
8577
8578 case SImode:
8579 case DImode:
8580 /* Force the value into a register if needed. */
8581 if (!aarch64_plus_operand (oldval, mode))
8582 oldval = force_reg (cmp_mode, oldval);
8583 break;
8584
8585 default:
8586 gcc_unreachable ();
8587 }
8588
8589 switch (mode)
8590 {
8591 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8592 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8593 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8594 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8595 default:
8596 gcc_unreachable ();
8597 }
8598
8599 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8600
8601 if (mode == QImode || mode == HImode)
8602 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8603
8604 x = gen_rtx_REG (CCmode, CC_REGNUM);
8605 x = gen_rtx_EQ (SImode, x, const0_rtx);
8606 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8607}
8608
8609/* Split a compare and swap pattern. */
8610
8611void
8612aarch64_split_compare_and_swap (rtx operands[])
8613{
8614 rtx rval, mem, oldval, newval, scratch;
8615 enum machine_mode mode;
0462169c 8616 bool is_weak;
5d8a22a5
DM
8617 rtx_code_label *label1, *label2;
8618 rtx x, cond;
0462169c
SN
8619
8620 rval = operands[0];
8621 mem = operands[1];
8622 oldval = operands[2];
8623 newval = operands[3];
8624 is_weak = (operands[4] != const0_rtx);
0462169c
SN
8625 scratch = operands[7];
8626 mode = GET_MODE (mem);
8627
5d8a22a5 8628 label1 = NULL;
0462169c
SN
8629 if (!is_weak)
8630 {
8631 label1 = gen_label_rtx ();
8632 emit_label (label1);
8633 }
8634 label2 = gen_label_rtx ();
8635
8636 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8637
8638 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8639 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8640 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8641 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8642 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8643
8644 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8645
8646 if (!is_weak)
8647 {
8648 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8649 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8650 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8651 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8652 }
8653 else
8654 {
8655 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8656 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8657 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8658 }
8659
8660 emit_label (label2);
8661}
8662
8663/* Split an atomic operation. */
8664
8665void
8666aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8667 rtx value, rtx model_rtx, rtx cond)
8668{
8669 enum machine_mode mode = GET_MODE (mem);
8670 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
5d8a22a5
DM
8671 rtx_code_label *label;
8672 rtx x;
0462169c
SN
8673
8674 label = gen_label_rtx ();
8675 emit_label (label);
8676
8677 if (new_out)
8678 new_out = gen_lowpart (wmode, new_out);
8679 if (old_out)
8680 old_out = gen_lowpart (wmode, old_out);
8681 else
8682 old_out = new_out;
8683 value = simplify_gen_subreg (wmode, value, mode, 0);
8684
8685 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8686
8687 switch (code)
8688 {
8689 case SET:
8690 new_out = value;
8691 break;
8692
8693 case NOT:
8694 x = gen_rtx_AND (wmode, old_out, value);
8695 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8696 x = gen_rtx_NOT (wmode, new_out);
8697 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8698 break;
8699
8700 case MINUS:
8701 if (CONST_INT_P (value))
8702 {
8703 value = GEN_INT (-INTVAL (value));
8704 code = PLUS;
8705 }
8706 /* Fall through. */
8707
8708 default:
8709 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8710 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8711 break;
8712 }
8713
8714 aarch64_emit_store_exclusive (mode, cond, mem,
8715 gen_lowpart (mode, new_out), model_rtx);
8716
8717 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8718 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8719 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8720 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8721}
8722
95ca411e
YZ
8723static void
8724aarch64_print_extension (void)
8725{
8726 const struct aarch64_option_extension *opt = NULL;
8727
8728 for (opt = all_extensions; opt->name != NULL; opt++)
8729 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8730 asm_fprintf (asm_out_file, "+%s", opt->name);
8731
8732 asm_fprintf (asm_out_file, "\n");
8733}
8734
43e9d192
IB
8735static void
8736aarch64_start_file (void)
8737{
8738 if (selected_arch)
95ca411e
YZ
8739 {
8740 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8741 aarch64_print_extension ();
8742 }
43e9d192 8743 else if (selected_cpu)
95ca411e 8744 {
682287fb
JG
8745 const char *truncated_name
8746 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8747 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
95ca411e
YZ
8748 aarch64_print_extension ();
8749 }
43e9d192
IB
8750 default_file_start();
8751}
8752
8753/* Target hook for c_mode_for_suffix. */
8754static enum machine_mode
8755aarch64_c_mode_for_suffix (char suffix)
8756{
8757 if (suffix == 'q')
8758 return TFmode;
8759
8760 return VOIDmode;
8761}
8762
3520f7cc
JG
8763/* We can only represent floating point constants which will fit in
8764 "quarter-precision" values. These values are characterised by
8765 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8766 by:
8767
8768 (-1)^s * (n/16) * 2^r
8769
8770 Where:
8771 's' is the sign bit.
8772 'n' is an integer in the range 16 <= n <= 31.
8773 'r' is an integer in the range -3 <= r <= 4. */
8774
8775/* Return true iff X can be represented by a quarter-precision
8776 floating point immediate operand X. Note, we cannot represent 0.0. */
8777bool
8778aarch64_float_const_representable_p (rtx x)
8779{
8780 /* This represents our current view of how many bits
8781 make up the mantissa. */
8782 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
ba96cdfb 8783 int exponent;
3520f7cc 8784 unsigned HOST_WIDE_INT mantissa, mask;
3520f7cc 8785 REAL_VALUE_TYPE r, m;
807e902e 8786 bool fail;
3520f7cc
JG
8787
8788 if (!CONST_DOUBLE_P (x))
8789 return false;
8790
94bfa2da
TV
8791 if (GET_MODE (x) == VOIDmode)
8792 return false;
8793
3520f7cc
JG
8794 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8795
8796 /* We cannot represent infinities, NaNs or +/-zero. We won't
8797 know if we have +zero until we analyse the mantissa, but we
8798 can reject the other invalid values. */
8799 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8800 || REAL_VALUE_MINUS_ZERO (r))
8801 return false;
8802
ba96cdfb 8803 /* Extract exponent. */
3520f7cc
JG
8804 r = real_value_abs (&r);
8805 exponent = REAL_EXP (&r);
8806
8807 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8808 highest (sign) bit, with a fixed binary point at bit point_pos.
8809 m1 holds the low part of the mantissa, m2 the high part.
8810 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8811 bits for the mantissa, this can fail (low bits will be lost). */
8812 real_ldexp (&m, &r, point_pos - exponent);
807e902e 8813 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
3520f7cc
JG
8814
8815 /* If the low part of the mantissa has bits set we cannot represent
8816 the value. */
807e902e 8817 if (w.elt (0) != 0)
3520f7cc
JG
8818 return false;
8819 /* We have rejected the lower HOST_WIDE_INT, so update our
8820 understanding of how many bits lie in the mantissa and
8821 look only at the high HOST_WIDE_INT. */
807e902e 8822 mantissa = w.elt (1);
3520f7cc
JG
8823 point_pos -= HOST_BITS_PER_WIDE_INT;
8824
8825 /* We can only represent values with a mantissa of the form 1.xxxx. */
8826 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8827 if ((mantissa & mask) != 0)
8828 return false;
8829
8830 /* Having filtered unrepresentable values, we may now remove all
8831 but the highest 5 bits. */
8832 mantissa >>= point_pos - 5;
8833
8834 /* We cannot represent the value 0.0, so reject it. This is handled
8835 elsewhere. */
8836 if (mantissa == 0)
8837 return false;
8838
8839 /* Then, as bit 4 is always set, we can mask it off, leaving
8840 the mantissa in the range [0, 15]. */
8841 mantissa &= ~(1 << 4);
8842 gcc_assert (mantissa <= 15);
8843
8844 /* GCC internally does not use IEEE754-like encoding (where normalized
8845 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8846 Our mantissa values are shifted 4 places to the left relative to
8847 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8848 by 5 places to correct for GCC's representation. */
8849 exponent = 5 - exponent;
8850
8851 return (exponent >= 0 && exponent <= 7);
8852}
8853
8854char*
81c2dfb9 8855aarch64_output_simd_mov_immediate (rtx const_vector,
3520f7cc
JG
8856 enum machine_mode mode,
8857 unsigned width)
8858{
3ea63f60 8859 bool is_valid;
3520f7cc 8860 static char templ[40];
3520f7cc 8861 const char *mnemonic;
e4f0f84d 8862 const char *shift_op;
3520f7cc 8863 unsigned int lane_count = 0;
81c2dfb9 8864 char element_char;
3520f7cc 8865
e4f0f84d 8866 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
48063b9d
IB
8867
8868 /* This will return true to show const_vector is legal for use as either
8869 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8870 also update INFO to show how the immediate should be generated. */
81c2dfb9 8871 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
3520f7cc
JG
8872 gcc_assert (is_valid);
8873
81c2dfb9 8874 element_char = sizetochar (info.element_width);
48063b9d
IB
8875 lane_count = width / info.element_width;
8876
3520f7cc
JG
8877 mode = GET_MODE_INNER (mode);
8878 if (mode == SFmode || mode == DFmode)
8879 {
48063b9d
IB
8880 gcc_assert (info.shift == 0 && ! info.mvn);
8881 if (aarch64_float_const_zero_rtx_p (info.value))
8882 info.value = GEN_INT (0);
8883 else
8884 {
8885#define buf_size 20
8886 REAL_VALUE_TYPE r;
8887 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8888 char float_buf[buf_size] = {'\0'};
8889 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8890#undef buf_size
8891
8892 if (lane_count == 1)
8893 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8894 else
8895 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
81c2dfb9 8896 lane_count, element_char, float_buf);
48063b9d
IB
8897 return templ;
8898 }
3520f7cc 8899 }
3520f7cc 8900
48063b9d 8901 mnemonic = info.mvn ? "mvni" : "movi";
e4f0f84d 8902 shift_op = info.msl ? "msl" : "lsl";
3520f7cc
JG
8903
8904 if (lane_count == 1)
48063b9d
IB
8905 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8906 mnemonic, UINTVAL (info.value));
8907 else if (info.shift)
8908 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
e4f0f84d
TB
8909 ", %s %d", mnemonic, lane_count, element_char,
8910 UINTVAL (info.value), shift_op, info.shift);
3520f7cc 8911 else
48063b9d 8912 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
81c2dfb9 8913 mnemonic, lane_count, element_char, UINTVAL (info.value));
3520f7cc
JG
8914 return templ;
8915}
8916
b7342d25
IB
8917char*
8918aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8919 enum machine_mode mode)
8920{
8921 enum machine_mode vmode;
8922
8923 gcc_assert (!VECTOR_MODE_P (mode));
8924 vmode = aarch64_simd_container_mode (mode, 64);
8925 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8926 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8927}
8928
88b08073
JG
8929/* Split operands into moves from op[1] + op[2] into op[0]. */
8930
8931void
8932aarch64_split_combinev16qi (rtx operands[3])
8933{
8934 unsigned int dest = REGNO (operands[0]);
8935 unsigned int src1 = REGNO (operands[1]);
8936 unsigned int src2 = REGNO (operands[2]);
8937 enum machine_mode halfmode = GET_MODE (operands[1]);
8938 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8939 rtx destlo, desthi;
8940
8941 gcc_assert (halfmode == V16QImode);
8942
8943 if (src1 == dest && src2 == dest + halfregs)
8944 {
8945 /* No-op move. Can't split to nothing; emit something. */
8946 emit_note (NOTE_INSN_DELETED);
8947 return;
8948 }
8949
8950 /* Preserve register attributes for variable tracking. */
8951 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8952 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8953 GET_MODE_SIZE (halfmode));
8954
8955 /* Special case of reversed high/low parts. */
8956 if (reg_overlap_mentioned_p (operands[2], destlo)
8957 && reg_overlap_mentioned_p (operands[1], desthi))
8958 {
8959 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8960 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8961 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8962 }
8963 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8964 {
8965 /* Try to avoid unnecessary moves if part of the result
8966 is in the right place already. */
8967 if (src1 != dest)
8968 emit_move_insn (destlo, operands[1]);
8969 if (src2 != dest + halfregs)
8970 emit_move_insn (desthi, operands[2]);
8971 }
8972 else
8973 {
8974 if (src2 != dest + halfregs)
8975 emit_move_insn (desthi, operands[2]);
8976 if (src1 != dest)
8977 emit_move_insn (destlo, operands[1]);
8978 }
8979}
8980
8981/* vec_perm support. */
8982
8983#define MAX_VECT_LEN 16
8984
8985struct expand_vec_perm_d
8986{
8987 rtx target, op0, op1;
8988 unsigned char perm[MAX_VECT_LEN];
8989 enum machine_mode vmode;
8990 unsigned char nelt;
8991 bool one_vector_p;
8992 bool testing_p;
8993};
8994
8995/* Generate a variable permutation. */
8996
8997static void
8998aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8999{
9000 enum machine_mode vmode = GET_MODE (target);
9001 bool one_vector_p = rtx_equal_p (op0, op1);
9002
9003 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9004 gcc_checking_assert (GET_MODE (op0) == vmode);
9005 gcc_checking_assert (GET_MODE (op1) == vmode);
9006 gcc_checking_assert (GET_MODE (sel) == vmode);
9007 gcc_checking_assert (TARGET_SIMD);
9008
9009 if (one_vector_p)
9010 {
9011 if (vmode == V8QImode)
9012 {
9013 /* Expand the argument to a V16QI mode by duplicating it. */
9014 rtx pair = gen_reg_rtx (V16QImode);
9015 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9016 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9017 }
9018 else
9019 {
9020 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9021 }
9022 }
9023 else
9024 {
9025 rtx pair;
9026
9027 if (vmode == V8QImode)
9028 {
9029 pair = gen_reg_rtx (V16QImode);
9030 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9031 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9032 }
9033 else
9034 {
9035 pair = gen_reg_rtx (OImode);
9036 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9037 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9038 }
9039 }
9040}
9041
9042void
9043aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9044{
9045 enum machine_mode vmode = GET_MODE (target);
c9d1a16a 9046 unsigned int nelt = GET_MODE_NUNITS (vmode);
88b08073 9047 bool one_vector_p = rtx_equal_p (op0, op1);
f7c4e5b8 9048 rtx mask;
88b08073
JG
9049
9050 /* The TBL instruction does not use a modulo index, so we must take care
9051 of that ourselves. */
f7c4e5b8
AL
9052 mask = aarch64_simd_gen_const_vector_dup (vmode,
9053 one_vector_p ? nelt - 1 : 2 * nelt - 1);
88b08073
JG
9054 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9055
f7c4e5b8
AL
9056 /* For big-endian, we also need to reverse the index within the vector
9057 (but not which vector). */
9058 if (BYTES_BIG_ENDIAN)
9059 {
9060 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9061 if (!one_vector_p)
9062 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9063 sel = expand_simple_binop (vmode, XOR, sel, mask,
9064 NULL, 0, OPTAB_LIB_WIDEN);
9065 }
88b08073
JG
9066 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9067}
9068
cc4d934f
JG
9069/* Recognize patterns suitable for the TRN instructions. */
9070static bool
9071aarch64_evpc_trn (struct expand_vec_perm_d *d)
9072{
9073 unsigned int i, odd, mask, nelt = d->nelt;
9074 rtx out, in0, in1, x;
9075 rtx (*gen) (rtx, rtx, rtx);
9076 enum machine_mode vmode = d->vmode;
9077
9078 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9079 return false;
9080
9081 /* Note that these are little-endian tests.
9082 We correct for big-endian later. */
9083 if (d->perm[0] == 0)
9084 odd = 0;
9085 else if (d->perm[0] == 1)
9086 odd = 1;
9087 else
9088 return false;
9089 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9090
9091 for (i = 0; i < nelt; i += 2)
9092 {
9093 if (d->perm[i] != i + odd)
9094 return false;
9095 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9096 return false;
9097 }
9098
9099 /* Success! */
9100 if (d->testing_p)
9101 return true;
9102
9103 in0 = d->op0;
9104 in1 = d->op1;
9105 if (BYTES_BIG_ENDIAN)
9106 {
9107 x = in0, in0 = in1, in1 = x;
9108 odd = !odd;
9109 }
9110 out = d->target;
9111
9112 if (odd)
9113 {
9114 switch (vmode)
9115 {
9116 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9117 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9118 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9119 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9120 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9121 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9122 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9123 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9124 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9125 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9126 default:
9127 return false;
9128 }
9129 }
9130 else
9131 {
9132 switch (vmode)
9133 {
9134 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9135 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9136 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9137 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9138 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9139 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9140 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9141 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9142 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9143 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9144 default:
9145 return false;
9146 }
9147 }
9148
9149 emit_insn (gen (out, in0, in1));
9150 return true;
9151}
9152
9153/* Recognize patterns suitable for the UZP instructions. */
9154static bool
9155aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9156{
9157 unsigned int i, odd, mask, nelt = d->nelt;
9158 rtx out, in0, in1, x;
9159 rtx (*gen) (rtx, rtx, rtx);
9160 enum machine_mode vmode = d->vmode;
9161
9162 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9163 return false;
9164
9165 /* Note that these are little-endian tests.
9166 We correct for big-endian later. */
9167 if (d->perm[0] == 0)
9168 odd = 0;
9169 else if (d->perm[0] == 1)
9170 odd = 1;
9171 else
9172 return false;
9173 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9174
9175 for (i = 0; i < nelt; i++)
9176 {
9177 unsigned elt = (i * 2 + odd) & mask;
9178 if (d->perm[i] != elt)
9179 return false;
9180 }
9181
9182 /* Success! */
9183 if (d->testing_p)
9184 return true;
9185
9186 in0 = d->op0;
9187 in1 = d->op1;
9188 if (BYTES_BIG_ENDIAN)
9189 {
9190 x = in0, in0 = in1, in1 = x;
9191 odd = !odd;
9192 }
9193 out = d->target;
9194
9195 if (odd)
9196 {
9197 switch (vmode)
9198 {
9199 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9200 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9201 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9202 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9203 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9204 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9205 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9206 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9207 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9208 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9209 default:
9210 return false;
9211 }
9212 }
9213 else
9214 {
9215 switch (vmode)
9216 {
9217 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9218 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9219 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9220 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9221 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9222 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9223 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9224 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9225 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9226 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9227 default:
9228 return false;
9229 }
9230 }
9231
9232 emit_insn (gen (out, in0, in1));
9233 return true;
9234}
9235
9236/* Recognize patterns suitable for the ZIP instructions. */
9237static bool
9238aarch64_evpc_zip (struct expand_vec_perm_d *d)
9239{
9240 unsigned int i, high, mask, nelt = d->nelt;
9241 rtx out, in0, in1, x;
9242 rtx (*gen) (rtx, rtx, rtx);
9243 enum machine_mode vmode = d->vmode;
9244
9245 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9246 return false;
9247
9248 /* Note that these are little-endian tests.
9249 We correct for big-endian later. */
9250 high = nelt / 2;
9251 if (d->perm[0] == high)
9252 /* Do Nothing. */
9253 ;
9254 else if (d->perm[0] == 0)
9255 high = 0;
9256 else
9257 return false;
9258 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9259
9260 for (i = 0; i < nelt / 2; i++)
9261 {
9262 unsigned elt = (i + high) & mask;
9263 if (d->perm[i * 2] != elt)
9264 return false;
9265 elt = (elt + nelt) & mask;
9266 if (d->perm[i * 2 + 1] != elt)
9267 return false;
9268 }
9269
9270 /* Success! */
9271 if (d->testing_p)
9272 return true;
9273
9274 in0 = d->op0;
9275 in1 = d->op1;
9276 if (BYTES_BIG_ENDIAN)
9277 {
9278 x = in0, in0 = in1, in1 = x;
9279 high = !high;
9280 }
9281 out = d->target;
9282
9283 if (high)
9284 {
9285 switch (vmode)
9286 {
9287 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9288 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9289 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9290 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9291 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9292 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9293 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9294 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9295 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9296 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9297 default:
9298 return false;
9299 }
9300 }
9301 else
9302 {
9303 switch (vmode)
9304 {
9305 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9306 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9307 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9308 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9309 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9310 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9311 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9312 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9313 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9314 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9315 default:
9316 return false;
9317 }
9318 }
9319
9320 emit_insn (gen (out, in0, in1));
9321 return true;
9322}
9323
ae0533da
AL
9324/* Recognize patterns for the EXT insn. */
9325
9326static bool
9327aarch64_evpc_ext (struct expand_vec_perm_d *d)
9328{
9329 unsigned int i, nelt = d->nelt;
9330 rtx (*gen) (rtx, rtx, rtx, rtx);
9331 rtx offset;
9332
9333 unsigned int location = d->perm[0]; /* Always < nelt. */
9334
9335 /* Check if the extracted indices are increasing by one. */
9336 for (i = 1; i < nelt; i++)
9337 {
9338 unsigned int required = location + i;
9339 if (d->one_vector_p)
9340 {
9341 /* We'll pass the same vector in twice, so allow indices to wrap. */
9342 required &= (nelt - 1);
9343 }
9344 if (d->perm[i] != required)
9345 return false;
9346 }
9347
ae0533da
AL
9348 switch (d->vmode)
9349 {
9350 case V16QImode: gen = gen_aarch64_extv16qi; break;
9351 case V8QImode: gen = gen_aarch64_extv8qi; break;
9352 case V4HImode: gen = gen_aarch64_extv4hi; break;
9353 case V8HImode: gen = gen_aarch64_extv8hi; break;
9354 case V2SImode: gen = gen_aarch64_extv2si; break;
9355 case V4SImode: gen = gen_aarch64_extv4si; break;
9356 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9357 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9358 case V2DImode: gen = gen_aarch64_extv2di; break;
9359 case V2DFmode: gen = gen_aarch64_extv2df; break;
9360 default:
9361 return false;
9362 }
9363
9364 /* Success! */
9365 if (d->testing_p)
9366 return true;
9367
b31e65bb
AL
9368 /* The case where (location == 0) is a no-op for both big- and little-endian,
9369 and is removed by the mid-end at optimization levels -O1 and higher. */
9370
9371 if (BYTES_BIG_ENDIAN && (location != 0))
ae0533da
AL
9372 {
9373 /* After setup, we want the high elements of the first vector (stored
9374 at the LSB end of the register), and the low elements of the second
9375 vector (stored at the MSB end of the register). So swap. */
9376 rtx temp = d->op0;
9377 d->op0 = d->op1;
9378 d->op1 = temp;
9379 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9380 location = nelt - location;
9381 }
9382
9383 offset = GEN_INT (location);
9384 emit_insn (gen (d->target, d->op0, d->op1, offset));
9385 return true;
9386}
9387
923fcec3
AL
9388/* Recognize patterns for the REV insns. */
9389
9390static bool
9391aarch64_evpc_rev (struct expand_vec_perm_d *d)
9392{
9393 unsigned int i, j, diff, nelt = d->nelt;
9394 rtx (*gen) (rtx, rtx);
9395
9396 if (!d->one_vector_p)
9397 return false;
9398
9399 diff = d->perm[0];
9400 switch (diff)
9401 {
9402 case 7:
9403 switch (d->vmode)
9404 {
9405 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9406 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9407 default:
9408 return false;
9409 }
9410 break;
9411 case 3:
9412 switch (d->vmode)
9413 {
9414 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9415 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9416 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9417 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9418 default:
9419 return false;
9420 }
9421 break;
9422 case 1:
9423 switch (d->vmode)
9424 {
9425 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9426 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9427 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9428 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9429 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9430 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9431 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9432 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9433 default:
9434 return false;
9435 }
9436 break;
9437 default:
9438 return false;
9439 }
9440
9441 for (i = 0; i < nelt ; i += diff + 1)
9442 for (j = 0; j <= diff; j += 1)
9443 {
9444 /* This is guaranteed to be true as the value of diff
9445 is 7, 3, 1 and we should have enough elements in the
9446 queue to generate this. Getting a vector mask with a
9447 value of diff other than these values implies that
9448 something is wrong by the time we get here. */
9449 gcc_assert (i + j < nelt);
9450 if (d->perm[i + j] != i + diff - j)
9451 return false;
9452 }
9453
9454 /* Success! */
9455 if (d->testing_p)
9456 return true;
9457
9458 emit_insn (gen (d->target, d->op0));
9459 return true;
9460}
9461
91bd4114
JG
9462static bool
9463aarch64_evpc_dup (struct expand_vec_perm_d *d)
9464{
9465 rtx (*gen) (rtx, rtx, rtx);
9466 rtx out = d->target;
9467 rtx in0;
9468 enum machine_mode vmode = d->vmode;
9469 unsigned int i, elt, nelt = d->nelt;
9470 rtx lane;
9471
91bd4114
JG
9472 elt = d->perm[0];
9473 for (i = 1; i < nelt; i++)
9474 {
9475 if (elt != d->perm[i])
9476 return false;
9477 }
9478
9479 /* The generic preparation in aarch64_expand_vec_perm_const_1
9480 swaps the operand order and the permute indices if it finds
9481 d->perm[0] to be in the second operand. Thus, we can always
9482 use d->op0 and need not do any extra arithmetic to get the
9483 correct lane number. */
9484 in0 = d->op0;
f901401e 9485 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
91bd4114
JG
9486
9487 switch (vmode)
9488 {
9489 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9490 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9491 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9492 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9493 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9494 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9495 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9496 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9497 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9498 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9499 default:
9500 return false;
9501 }
9502
9503 emit_insn (gen (out, in0, lane));
9504 return true;
9505}
9506
88b08073
JG
9507static bool
9508aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9509{
9510 rtx rperm[MAX_VECT_LEN], sel;
9511 enum machine_mode vmode = d->vmode;
9512 unsigned int i, nelt = d->nelt;
9513
88b08073
JG
9514 if (d->testing_p)
9515 return true;
9516
9517 /* Generic code will try constant permutation twice. Once with the
9518 original mode and again with the elements lowered to QImode.
9519 So wait and don't do the selector expansion ourselves. */
9520 if (vmode != V8QImode && vmode != V16QImode)
9521 return false;
9522
9523 for (i = 0; i < nelt; ++i)
bbcc9c00
TB
9524 {
9525 int nunits = GET_MODE_NUNITS (vmode);
9526
9527 /* If big-endian and two vectors we end up with a weird mixed-endian
9528 mode on NEON. Reverse the index within each word but not the word
9529 itself. */
9530 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9531 : d->perm[i]);
9532 }
88b08073
JG
9533 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9534 sel = force_reg (vmode, sel);
9535
9536 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9537 return true;
9538}
9539
9540static bool
9541aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9542{
9543 /* The pattern matching functions above are written to look for a small
9544 number to begin the sequence (0, 1, N/2). If we begin with an index
9545 from the second operand, we can swap the operands. */
9546 if (d->perm[0] >= d->nelt)
9547 {
9548 unsigned i, nelt = d->nelt;
9549 rtx x;
9550
0696116a 9551 gcc_assert (nelt == (nelt & -nelt));
88b08073 9552 for (i = 0; i < nelt; ++i)
0696116a 9553 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
88b08073
JG
9554
9555 x = d->op0;
9556 d->op0 = d->op1;
9557 d->op1 = x;
9558 }
9559
9560 if (TARGET_SIMD)
cc4d934f 9561 {
923fcec3
AL
9562 if (aarch64_evpc_rev (d))
9563 return true;
9564 else if (aarch64_evpc_ext (d))
ae0533da 9565 return true;
f901401e
AL
9566 else if (aarch64_evpc_dup (d))
9567 return true;
ae0533da 9568 else if (aarch64_evpc_zip (d))
cc4d934f
JG
9569 return true;
9570 else if (aarch64_evpc_uzp (d))
9571 return true;
9572 else if (aarch64_evpc_trn (d))
9573 return true;
9574 return aarch64_evpc_tbl (d);
9575 }
88b08073
JG
9576 return false;
9577}
9578
9579/* Expand a vec_perm_const pattern. */
9580
9581bool
9582aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9583{
9584 struct expand_vec_perm_d d;
9585 int i, nelt, which;
9586
9587 d.target = target;
9588 d.op0 = op0;
9589 d.op1 = op1;
9590
9591 d.vmode = GET_MODE (target);
9592 gcc_assert (VECTOR_MODE_P (d.vmode));
9593 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9594 d.testing_p = false;
9595
9596 for (i = which = 0; i < nelt; ++i)
9597 {
9598 rtx e = XVECEXP (sel, 0, i);
9599 int ei = INTVAL (e) & (2 * nelt - 1);
9600 which |= (ei < nelt ? 1 : 2);
9601 d.perm[i] = ei;
9602 }
9603
9604 switch (which)
9605 {
9606 default:
9607 gcc_unreachable ();
9608
9609 case 3:
9610 d.one_vector_p = false;
9611 if (!rtx_equal_p (op0, op1))
9612 break;
9613
9614 /* The elements of PERM do not suggest that only the first operand
9615 is used, but both operands are identical. Allow easier matching
9616 of the permutation by folding the permutation into the single
9617 input vector. */
9618 /* Fall Through. */
9619 case 2:
9620 for (i = 0; i < nelt; ++i)
9621 d.perm[i] &= nelt - 1;
9622 d.op0 = op1;
9623 d.one_vector_p = true;
9624 break;
9625
9626 case 1:
9627 d.op1 = op0;
9628 d.one_vector_p = true;
9629 break;
9630 }
9631
9632 return aarch64_expand_vec_perm_const_1 (&d);
9633}
9634
9635static bool
9636aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9637 const unsigned char *sel)
9638{
9639 struct expand_vec_perm_d d;
9640 unsigned int i, nelt, which;
9641 bool ret;
9642
9643 d.vmode = vmode;
9644 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9645 d.testing_p = true;
9646 memcpy (d.perm, sel, nelt);
9647
9648 /* Calculate whether all elements are in one vector. */
9649 for (i = which = 0; i < nelt; ++i)
9650 {
9651 unsigned char e = d.perm[i];
9652 gcc_assert (e < 2 * nelt);
9653 which |= (e < nelt ? 1 : 2);
9654 }
9655
9656 /* If all elements are from the second vector, reindex as if from the
9657 first vector. */
9658 if (which == 2)
9659 for (i = 0; i < nelt; ++i)
9660 d.perm[i] -= nelt;
9661
9662 /* Check whether the mask can be applied to a single vector. */
9663 d.one_vector_p = (which != 3);
9664
9665 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9666 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9667 if (!d.one_vector_p)
9668 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9669
9670 start_sequence ();
9671 ret = aarch64_expand_vec_perm_const_1 (&d);
9672 end_sequence ();
9673
9674 return ret;
9675}
9676
69675d50
TB
9677/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9678bool
9679aarch64_cannot_change_mode_class (enum machine_mode from,
9680 enum machine_mode to,
9681 enum reg_class rclass)
9682{
9683 /* Full-reg subregs are allowed on general regs or any class if they are
9684 the same size. */
9685 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9686 || !reg_classes_intersect_p (FP_REGS, rclass))
9687 return false;
9688
9689 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9690 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9691 2. Scalar to Scalar for integer modes or same size float modes.
97e1ad78
JG
9692 3. Vector to Vector modes.
9693 4. On little-endian only, Vector-Structure to Vector modes. */
69675d50
TB
9694 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9695 {
9696 if (aarch64_vector_mode_supported_p (from)
9697 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9698 return false;
9699
9700 if (GET_MODE_NUNITS (from) == 1
9701 && GET_MODE_NUNITS (to) == 1
9702 && (GET_MODE_CLASS (from) == MODE_INT
9703 || from == to))
9704 return false;
9705
9706 if (aarch64_vector_mode_supported_p (from)
9707 && aarch64_vector_mode_supported_p (to))
9708 return false;
97e1ad78
JG
9709
9710 /* Within an vector structure straddling multiple vector registers
9711 we are in a mixed-endian representation. As such, we can't
9712 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9713 switch between vectors and vector structures cheaply. */
9714 if (!BYTES_BIG_ENDIAN)
9715 if ((aarch64_vector_mode_supported_p (from)
9716 && aarch64_vect_struct_mode_p (to))
9717 || (aarch64_vector_mode_supported_p (to)
9718 && aarch64_vect_struct_mode_p (from)))
9719 return false;
69675d50
TB
9720 }
9721
9722 return true;
9723}
9724
97e1ad78
JG
9725/* Implement MODES_TIEABLE_P. */
9726
9727bool
9728aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9729{
9730 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9731 return true;
9732
9733 /* We specifically want to allow elements of "structure" modes to
9734 be tieable to the structure. This more general condition allows
9735 other rarer situations too. */
9736 if (TARGET_SIMD
9737 && aarch64_vector_mode_p (mode1)
9738 && aarch64_vector_mode_p (mode2))
9739 return true;
9740
9741 return false;
9742}
9743
e2c75eea
JG
9744/* Return a new RTX holding the result of moving POINTER forward by
9745 AMOUNT bytes. */
9746
9747static rtx
9748aarch64_move_pointer (rtx pointer, int amount)
9749{
9750 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9751
9752 return adjust_automodify_address (pointer, GET_MODE (pointer),
9753 next, amount);
9754}
9755
9756/* Return a new RTX holding the result of moving POINTER forward by the
9757 size of the mode it points to. */
9758
9759static rtx
9760aarch64_progress_pointer (rtx pointer)
9761{
9762 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9763
9764 return aarch64_move_pointer (pointer, amount);
9765}
9766
9767/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9768 MODE bytes. */
9769
9770static void
9771aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9772 enum machine_mode mode)
9773{
9774 rtx reg = gen_reg_rtx (mode);
9775
9776 /* "Cast" the pointers to the correct mode. */
9777 *src = adjust_address (*src, mode, 0);
9778 *dst = adjust_address (*dst, mode, 0);
9779 /* Emit the memcpy. */
9780 emit_move_insn (reg, *src);
9781 emit_move_insn (*dst, reg);
9782 /* Move the pointers forward. */
9783 *src = aarch64_progress_pointer (*src);
9784 *dst = aarch64_progress_pointer (*dst);
9785}
9786
9787/* Expand movmem, as if from a __builtin_memcpy. Return true if
9788 we succeed, otherwise return false. */
9789
9790bool
9791aarch64_expand_movmem (rtx *operands)
9792{
9793 unsigned int n;
9794 rtx dst = operands[0];
9795 rtx src = operands[1];
9796 rtx base;
9797 bool speed_p = !optimize_function_for_size_p (cfun);
9798
9799 /* When optimizing for size, give a better estimate of the length of a
9800 memcpy call, but use the default otherwise. */
9801 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9802
9803 /* We can't do anything smart if the amount to copy is not constant. */
9804 if (!CONST_INT_P (operands[2]))
9805 return false;
9806
9807 n = UINTVAL (operands[2]);
9808
9809 /* Try to keep the number of instructions low. For cases below 16 bytes we
9810 need to make at most two moves. For cases above 16 bytes it will be one
9811 move for each 16 byte chunk, then at most two additional moves. */
9812 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9813 return false;
9814
9815 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9816 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9817
9818 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9819 src = adjust_automodify_address (src, VOIDmode, base, 0);
9820
9821 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9822 1-byte chunk. */
9823 if (n < 4)
9824 {
9825 if (n >= 2)
9826 {
9827 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9828 n -= 2;
9829 }
9830
9831 if (n == 1)
9832 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9833
9834 return true;
9835 }
9836
9837 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9838 4-byte chunk, partially overlapping with the previously copied chunk. */
9839 if (n < 8)
9840 {
9841 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9842 n -= 4;
9843 if (n > 0)
9844 {
9845 int move = n - 4;
9846
9847 src = aarch64_move_pointer (src, move);
9848 dst = aarch64_move_pointer (dst, move);
9849 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9850 }
9851 return true;
9852 }
9853
9854 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9855 them, then (if applicable) an 8-byte chunk. */
9856 while (n >= 8)
9857 {
9858 if (n / 16)
9859 {
9860 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9861 n -= 16;
9862 }
9863 else
9864 {
9865 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9866 n -= 8;
9867 }
9868 }
9869
9870 /* Finish the final bytes of the copy. We can always do this in one
9871 instruction. We either copy the exact amount we need, or partially
9872 overlap with the previous chunk we copied and copy 8-bytes. */
9873 if (n == 0)
9874 return true;
9875 else if (n == 1)
9876 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9877 else if (n == 2)
9878 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9879 else if (n == 4)
9880 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9881 else
9882 {
9883 if (n == 3)
9884 {
9885 src = aarch64_move_pointer (src, -1);
9886 dst = aarch64_move_pointer (dst, -1);
9887 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9888 }
9889 else
9890 {
9891 int move = n - 8;
9892
9893 src = aarch64_move_pointer (src, move);
9894 dst = aarch64_move_pointer (dst, move);
9895 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9896 }
9897 }
9898
9899 return true;
9900}
9901
a3125fc2
CL
9902/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
9903
9904static unsigned HOST_WIDE_INT
9905aarch64_asan_shadow_offset (void)
9906{
9907 return (HOST_WIDE_INT_1 << 36);
9908}
9909
43e9d192
IB
9910#undef TARGET_ADDRESS_COST
9911#define TARGET_ADDRESS_COST aarch64_address_cost
9912
9913/* This hook will determines whether unnamed bitfields affect the alignment
9914 of the containing structure. The hook returns true if the structure
9915 should inherit the alignment requirements of an unnamed bitfield's
9916 type. */
9917#undef TARGET_ALIGN_ANON_BITFIELD
9918#define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9919
9920#undef TARGET_ASM_ALIGNED_DI_OP
9921#define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9922
9923#undef TARGET_ASM_ALIGNED_HI_OP
9924#define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9925
9926#undef TARGET_ASM_ALIGNED_SI_OP
9927#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9928
9929#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9930#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9931 hook_bool_const_tree_hwi_hwi_const_tree_true
9932
9933#undef TARGET_ASM_FILE_START
9934#define TARGET_ASM_FILE_START aarch64_start_file
9935
9936#undef TARGET_ASM_OUTPUT_MI_THUNK
9937#define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9938
9939#undef TARGET_ASM_SELECT_RTX_SECTION
9940#define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9941
9942#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9943#define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9944
9945#undef TARGET_BUILD_BUILTIN_VA_LIST
9946#define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9947
9948#undef TARGET_CALLEE_COPIES
9949#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9950
9951#undef TARGET_CAN_ELIMINATE
9952#define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9953
9954#undef TARGET_CANNOT_FORCE_CONST_MEM
9955#define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9956
9957#undef TARGET_CONDITIONAL_REGISTER_USAGE
9958#define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9959
9960/* Only the least significant bit is used for initialization guard
9961 variables. */
9962#undef TARGET_CXX_GUARD_MASK_BIT
9963#define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9964
9965#undef TARGET_C_MODE_FOR_SUFFIX
9966#define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9967
9968#ifdef TARGET_BIG_ENDIAN_DEFAULT
9969#undef TARGET_DEFAULT_TARGET_FLAGS
9970#define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9971#endif
9972
9973#undef TARGET_CLASS_MAX_NREGS
9974#define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9975
119103ca
JG
9976#undef TARGET_BUILTIN_DECL
9977#define TARGET_BUILTIN_DECL aarch64_builtin_decl
9978
43e9d192
IB
9979#undef TARGET_EXPAND_BUILTIN
9980#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9981
9982#undef TARGET_EXPAND_BUILTIN_VA_START
9983#define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9984
9697e620
JG
9985#undef TARGET_FOLD_BUILTIN
9986#define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9987
43e9d192
IB
9988#undef TARGET_FUNCTION_ARG
9989#define TARGET_FUNCTION_ARG aarch64_function_arg
9990
9991#undef TARGET_FUNCTION_ARG_ADVANCE
9992#define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9993
9994#undef TARGET_FUNCTION_ARG_BOUNDARY
9995#define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9996
9997#undef TARGET_FUNCTION_OK_FOR_SIBCALL
9998#define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9999
10000#undef TARGET_FUNCTION_VALUE
10001#define TARGET_FUNCTION_VALUE aarch64_function_value
10002
10003#undef TARGET_FUNCTION_VALUE_REGNO_P
10004#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
10005
10006#undef TARGET_FRAME_POINTER_REQUIRED
10007#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
10008
0ac198d3
JG
10009#undef TARGET_GIMPLE_FOLD_BUILTIN
10010#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
10011
43e9d192
IB
10012#undef TARGET_GIMPLIFY_VA_ARG_EXPR
10013#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
10014
10015#undef TARGET_INIT_BUILTINS
10016#define TARGET_INIT_BUILTINS aarch64_init_builtins
10017
10018#undef TARGET_LEGITIMATE_ADDRESS_P
10019#define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
10020
10021#undef TARGET_LEGITIMATE_CONSTANT_P
10022#define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
10023
10024#undef TARGET_LIBGCC_CMP_RETURN_MODE
10025#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
10026
38e8f663
YR
10027#undef TARGET_LRA_P
10028#define TARGET_LRA_P aarch64_lra_p
10029
ac2b960f
YZ
10030#undef TARGET_MANGLE_TYPE
10031#define TARGET_MANGLE_TYPE aarch64_mangle_type
10032
43e9d192
IB
10033#undef TARGET_MEMORY_MOVE_COST
10034#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
10035
10036#undef TARGET_MUST_PASS_IN_STACK
10037#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
10038
10039/* This target hook should return true if accesses to volatile bitfields
10040 should use the narrowest mode possible. It should return false if these
10041 accesses should use the bitfield container type. */
10042#undef TARGET_NARROW_VOLATILE_BITFIELD
10043#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
10044
10045#undef TARGET_OPTION_OVERRIDE
10046#define TARGET_OPTION_OVERRIDE aarch64_override_options
10047
10048#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
10049#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
10050 aarch64_override_options_after_change
10051
10052#undef TARGET_PASS_BY_REFERENCE
10053#define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
10054
10055#undef TARGET_PREFERRED_RELOAD_CLASS
10056#define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
10057
10058#undef TARGET_SECONDARY_RELOAD
10059#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
10060
10061#undef TARGET_SHIFT_TRUNCATION_MASK
10062#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
10063
10064#undef TARGET_SETUP_INCOMING_VARARGS
10065#define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
10066
10067#undef TARGET_STRUCT_VALUE_RTX
10068#define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
10069
10070#undef TARGET_REGISTER_MOVE_COST
10071#define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
10072
10073#undef TARGET_RETURN_IN_MEMORY
10074#define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
10075
10076#undef TARGET_RETURN_IN_MSB
10077#define TARGET_RETURN_IN_MSB aarch64_return_in_msb
10078
10079#undef TARGET_RTX_COSTS
7cc2145f 10080#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
43e9d192 10081
d126a4ae
AP
10082#undef TARGET_SCHED_ISSUE_RATE
10083#define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
10084
43e9d192
IB
10085#undef TARGET_TRAMPOLINE_INIT
10086#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
10087
10088#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
10089#define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
10090
10091#undef TARGET_VECTOR_MODE_SUPPORTED_P
10092#define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
10093
10094#undef TARGET_ARRAY_MODE_SUPPORTED_P
10095#define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
10096
8990e73a
TB
10097#undef TARGET_VECTORIZE_ADD_STMT_COST
10098#define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
10099
10100#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10101#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10102 aarch64_builtin_vectorization_cost
10103
43e9d192
IB
10104#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10105#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
10106
42fc9a7f
JG
10107#undef TARGET_VECTORIZE_BUILTINS
10108#define TARGET_VECTORIZE_BUILTINS
10109
10110#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
10111#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
10112 aarch64_builtin_vectorized_function
10113
3b357264
JG
10114#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
10115#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
10116 aarch64_autovectorize_vector_sizes
10117
aa87aced
KV
10118#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10119#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
10120 aarch64_atomic_assign_expand_fenv
10121
43e9d192
IB
10122/* Section anchor support. */
10123
10124#undef TARGET_MIN_ANCHOR_OFFSET
10125#define TARGET_MIN_ANCHOR_OFFSET -256
10126
10127/* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10128 byte offset; we can do much more for larger data types, but have no way
10129 to determine the size of the access. We assume accesses are aligned. */
10130#undef TARGET_MAX_ANCHOR_OFFSET
10131#define TARGET_MAX_ANCHOR_OFFSET 4095
10132
db0253a4
TB
10133#undef TARGET_VECTOR_ALIGNMENT
10134#define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10135
10136#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10137#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10138 aarch64_simd_vector_alignment_reachable
10139
88b08073
JG
10140/* vec_perm support. */
10141
10142#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10143#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10144 aarch64_vectorize_vec_perm_const_ok
10145
70f09188 10146
706b2314 10147#undef TARGET_FIXED_CONDITION_CODE_REGS
70f09188
AP
10148#define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10149
5cb74e90
RR
10150#undef TARGET_FLAGS_REGNUM
10151#define TARGET_FLAGS_REGNUM CC_REGNUM
10152
78607708
TV
10153#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10154#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10155
a3125fc2
CL
10156#undef TARGET_ASAN_SHADOW_OFFSET
10157#define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
10158
43e9d192
IB
10159struct gcc_target targetm = TARGET_INITIALIZER;
10160
10161#include "gt-aarch64.h"