]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64.c
dojump.h: New header file.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64.c
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
41 #include "calls.h"
42 #include "varasm.h"
43 #include "regs.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfganal.h"
48 #include "lcm.h"
49 #include "cfgbuild.h"
50 #include "cfgcleanup.h"
51 #include "predict.h"
52 #include "basic-block.h"
53 #include "df.h"
54 #include "hard-reg-set.h"
55 #include "output.h"
56 #include "hashtab.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "statistics.h"
60 #include "real.h"
61 #include "fixed-value.h"
62 #include "insn-config.h"
63 #include "expmed.h"
64 #include "dojump.h"
65 #include "explow.h"
66 #include "emit-rtl.h"
67 #include "stmt.h"
68 #include "expr.h"
69 #include "reload.h"
70 #include "toplev.h"
71 #include "target.h"
72 #include "target-def.h"
73 #include "targhooks.h"
74 #include "ggc.h"
75 #include "tm_p.h"
76 #include "recog.h"
77 #include "langhooks.h"
78 #include "diagnostic-core.h"
79 #include "hash-table.h"
80 #include "tree-ssa-alias.h"
81 #include "internal-fn.h"
82 #include "gimple-fold.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "is-a.h"
86 #include "gimple.h"
87 #include "gimplify.h"
88 #include "optabs.h"
89 #include "dwarf2.h"
90 #include "cfgloop.h"
91 #include "tree-vectorizer.h"
92 #include "aarch64-cost-tables.h"
93 #include "dumpfile.h"
94 #include "builtins.h"
95 #include "rtl-iter.h"
96 #include "tm-constrs.h"
97
98 /* Defined for convenience. */
99 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
100
101 /* Classifies an address.
102
103 ADDRESS_REG_IMM
104 A simple base register plus immediate offset.
105
106 ADDRESS_REG_WB
107 A base register indexed by immediate offset with writeback.
108
109 ADDRESS_REG_REG
110 A base register indexed by (optionally scaled) register.
111
112 ADDRESS_REG_UXTW
113 A base register indexed by (optionally scaled) zero-extended register.
114
115 ADDRESS_REG_SXTW
116 A base register indexed by (optionally scaled) sign-extended register.
117
118 ADDRESS_LO_SUM
119 A LO_SUM rtx with a base register and "LO12" symbol relocation.
120
121 ADDRESS_SYMBOLIC:
122 A constant symbolic address, in pc-relative literal pool. */
123
124 enum aarch64_address_type {
125 ADDRESS_REG_IMM,
126 ADDRESS_REG_WB,
127 ADDRESS_REG_REG,
128 ADDRESS_REG_UXTW,
129 ADDRESS_REG_SXTW,
130 ADDRESS_LO_SUM,
131 ADDRESS_SYMBOLIC
132 };
133
134 struct aarch64_address_info {
135 enum aarch64_address_type type;
136 rtx base;
137 rtx offset;
138 int shift;
139 enum aarch64_symbol_type symbol_type;
140 };
141
142 struct simd_immediate_info
143 {
144 rtx value;
145 int shift;
146 int element_width;
147 bool mvn;
148 bool msl;
149 };
150
151 /* The current code model. */
152 enum aarch64_code_model aarch64_cmodel;
153
154 #ifdef HAVE_AS_TLS
155 #undef TARGET_HAVE_TLS
156 #define TARGET_HAVE_TLS 1
157 #endif
158
159 static bool aarch64_lra_p (void);
160 static bool aarch64_composite_type_p (const_tree, machine_mode);
161 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
162 const_tree,
163 machine_mode *, int *,
164 bool *);
165 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
166 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
167 static void aarch64_override_options_after_change (void);
168 static bool aarch64_vector_mode_supported_p (machine_mode);
169 static unsigned bit_count (unsigned HOST_WIDE_INT);
170 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
171 const unsigned char *sel);
172 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
173
174 /* Major revision number of the ARM Architecture implemented by the target. */
175 unsigned aarch64_architecture_version;
176
177 /* The processor for which instructions should be scheduled. */
178 enum aarch64_processor aarch64_tune = cortexa53;
179
180 /* The current tuning set. */
181 const struct tune_params *aarch64_tune_params;
182
183 /* Mask to specify which instructions we are allowed to generate. */
184 unsigned long aarch64_isa_flags = 0;
185
186 /* Mask to specify which instruction scheduling options should be used. */
187 unsigned long aarch64_tune_flags = 0;
188
189 /* Tuning parameters. */
190
191 #if HAVE_DESIGNATED_INITIALIZERS
192 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
193 #else
194 #define NAMED_PARAM(NAME, VAL) (VAL)
195 #endif
196
197 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
198 __extension__
199 #endif
200
201 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
202 __extension__
203 #endif
204 static const struct cpu_addrcost_table generic_addrcost_table =
205 {
206 #if HAVE_DESIGNATED_INITIALIZERS
207 .addr_scale_costs =
208 #endif
209 {
210 NAMED_PARAM (hi, 0),
211 NAMED_PARAM (si, 0),
212 NAMED_PARAM (di, 0),
213 NAMED_PARAM (ti, 0),
214 },
215 NAMED_PARAM (pre_modify, 0),
216 NAMED_PARAM (post_modify, 0),
217 NAMED_PARAM (register_offset, 0),
218 NAMED_PARAM (register_extend, 0),
219 NAMED_PARAM (imm_offset, 0)
220 };
221
222 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
223 __extension__
224 #endif
225 static const struct cpu_addrcost_table cortexa57_addrcost_table =
226 {
227 #if HAVE_DESIGNATED_INITIALIZERS
228 .addr_scale_costs =
229 #endif
230 {
231 NAMED_PARAM (hi, 1),
232 NAMED_PARAM (si, 0),
233 NAMED_PARAM (di, 0),
234 NAMED_PARAM (ti, 1),
235 },
236 NAMED_PARAM (pre_modify, 0),
237 NAMED_PARAM (post_modify, 0),
238 NAMED_PARAM (register_offset, 0),
239 NAMED_PARAM (register_extend, 0),
240 NAMED_PARAM (imm_offset, 0),
241 };
242
243 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
244 __extension__
245 #endif
246 static const struct cpu_regmove_cost generic_regmove_cost =
247 {
248 NAMED_PARAM (GP2GP, 1),
249 /* Avoid the use of slow int<->fp moves for spilling by setting
250 their cost higher than memmov_cost. */
251 NAMED_PARAM (GP2FP, 5),
252 NAMED_PARAM (FP2GP, 5),
253 NAMED_PARAM (FP2FP, 2)
254 };
255
256 static const struct cpu_regmove_cost cortexa57_regmove_cost =
257 {
258 NAMED_PARAM (GP2GP, 1),
259 /* Avoid the use of slow int<->fp moves for spilling by setting
260 their cost higher than memmov_cost. */
261 NAMED_PARAM (GP2FP, 5),
262 NAMED_PARAM (FP2GP, 5),
263 NAMED_PARAM (FP2FP, 2)
264 };
265
266 static const struct cpu_regmove_cost cortexa53_regmove_cost =
267 {
268 NAMED_PARAM (GP2GP, 1),
269 /* Avoid the use of slow int<->fp moves for spilling by setting
270 their cost higher than memmov_cost. */
271 NAMED_PARAM (GP2FP, 5),
272 NAMED_PARAM (FP2GP, 5),
273 NAMED_PARAM (FP2FP, 2)
274 };
275
276 static const struct cpu_regmove_cost thunderx_regmove_cost =
277 {
278 NAMED_PARAM (GP2GP, 2),
279 NAMED_PARAM (GP2FP, 2),
280 NAMED_PARAM (FP2GP, 6),
281 NAMED_PARAM (FP2FP, 4)
282 };
283
284 /* Generic costs for vector insn classes. */
285 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
286 __extension__
287 #endif
288 static const struct cpu_vector_cost generic_vector_cost =
289 {
290 NAMED_PARAM (scalar_stmt_cost, 1),
291 NAMED_PARAM (scalar_load_cost, 1),
292 NAMED_PARAM (scalar_store_cost, 1),
293 NAMED_PARAM (vec_stmt_cost, 1),
294 NAMED_PARAM (vec_to_scalar_cost, 1),
295 NAMED_PARAM (scalar_to_vec_cost, 1),
296 NAMED_PARAM (vec_align_load_cost, 1),
297 NAMED_PARAM (vec_unalign_load_cost, 1),
298 NAMED_PARAM (vec_unalign_store_cost, 1),
299 NAMED_PARAM (vec_store_cost, 1),
300 NAMED_PARAM (cond_taken_branch_cost, 3),
301 NAMED_PARAM (cond_not_taken_branch_cost, 1)
302 };
303
304 /* Generic costs for vector insn classes. */
305 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
306 __extension__
307 #endif
308 static const struct cpu_vector_cost cortexa57_vector_cost =
309 {
310 NAMED_PARAM (scalar_stmt_cost, 1),
311 NAMED_PARAM (scalar_load_cost, 4),
312 NAMED_PARAM (scalar_store_cost, 1),
313 NAMED_PARAM (vec_stmt_cost, 3),
314 NAMED_PARAM (vec_to_scalar_cost, 8),
315 NAMED_PARAM (scalar_to_vec_cost, 8),
316 NAMED_PARAM (vec_align_load_cost, 5),
317 NAMED_PARAM (vec_unalign_load_cost, 5),
318 NAMED_PARAM (vec_unalign_store_cost, 1),
319 NAMED_PARAM (vec_store_cost, 1),
320 NAMED_PARAM (cond_taken_branch_cost, 1),
321 NAMED_PARAM (cond_not_taken_branch_cost, 1)
322 };
323
324 #define AARCH64_FUSE_NOTHING (0)
325 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
326 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
327 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
328 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
329 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
330
331 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
332 __extension__
333 #endif
334 static const struct tune_params generic_tunings =
335 {
336 &cortexa57_extra_costs,
337 &generic_addrcost_table,
338 &generic_regmove_cost,
339 &generic_vector_cost,
340 NAMED_PARAM (memmov_cost, 4),
341 NAMED_PARAM (issue_rate, 2),
342 NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING),
343 8, /* function_align. */
344 8, /* jump_align. */
345 4, /* loop_align. */
346 2, /* int_reassoc_width. */
347 4, /* fp_reassoc_width. */
348 1 /* vec_reassoc_width. */
349 };
350
351 static const struct tune_params cortexa53_tunings =
352 {
353 &cortexa53_extra_costs,
354 &generic_addrcost_table,
355 &cortexa53_regmove_cost,
356 &generic_vector_cost,
357 NAMED_PARAM (memmov_cost, 4),
358 NAMED_PARAM (issue_rate, 2),
359 NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
360 | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR)),
361 8, /* function_align. */
362 8, /* jump_align. */
363 4, /* loop_align. */
364 2, /* int_reassoc_width. */
365 4, /* fp_reassoc_width. */
366 1 /* vec_reassoc_width. */
367 };
368
369 static const struct tune_params cortexa57_tunings =
370 {
371 &cortexa57_extra_costs,
372 &cortexa57_addrcost_table,
373 &cortexa57_regmove_cost,
374 &cortexa57_vector_cost,
375 NAMED_PARAM (memmov_cost, 4),
376 NAMED_PARAM (issue_rate, 3),
377 NAMED_PARAM (fuseable_ops, (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK_MOVK)),
378 16, /* function_align. */
379 8, /* jump_align. */
380 4, /* loop_align. */
381 2, /* int_reassoc_width. */
382 4, /* fp_reassoc_width. */
383 1 /* vec_reassoc_width. */
384 };
385
386 static const struct tune_params thunderx_tunings =
387 {
388 &thunderx_extra_costs,
389 &generic_addrcost_table,
390 &thunderx_regmove_cost,
391 &generic_vector_cost,
392 NAMED_PARAM (memmov_cost, 6),
393 NAMED_PARAM (issue_rate, 2),
394 NAMED_PARAM (fuseable_ops, AARCH64_FUSE_CMP_BRANCH),
395 8, /* function_align. */
396 8, /* jump_align. */
397 8, /* loop_align. */
398 2, /* int_reassoc_width. */
399 4, /* fp_reassoc_width. */
400 1 /* vec_reassoc_width. */
401 };
402
403 /* A processor implementing AArch64. */
404 struct processor
405 {
406 const char *const name;
407 enum aarch64_processor core;
408 const char *arch;
409 unsigned architecture_version;
410 const unsigned long flags;
411 const struct tune_params *const tune;
412 };
413
414 /* Processor cores implementing AArch64. */
415 static const struct processor all_cores[] =
416 {
417 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
418 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
419 #include "aarch64-cores.def"
420 #undef AARCH64_CORE
421 {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
422 {NULL, aarch64_none, NULL, 0, 0, NULL}
423 };
424
425 /* Architectures implementing AArch64. */
426 static const struct processor all_architectures[] =
427 {
428 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
429 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
430 #include "aarch64-arches.def"
431 #undef AARCH64_ARCH
432 {NULL, aarch64_none, NULL, 0, 0, NULL}
433 };
434
435 /* Target specification. These are populated as commandline arguments
436 are processed, or NULL if not specified. */
437 static const struct processor *selected_arch;
438 static const struct processor *selected_cpu;
439 static const struct processor *selected_tune;
440
441 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
442
443 /* An ISA extension in the co-processor and main instruction set space. */
444 struct aarch64_option_extension
445 {
446 const char *const name;
447 const unsigned long flags_on;
448 const unsigned long flags_off;
449 };
450
451 /* ISA extensions in AArch64. */
452 static const struct aarch64_option_extension all_extensions[] =
453 {
454 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
455 {NAME, FLAGS_ON, FLAGS_OFF},
456 #include "aarch64-option-extensions.def"
457 #undef AARCH64_OPT_EXTENSION
458 {NULL, 0, 0}
459 };
460
461 /* Used to track the size of an address when generating a pre/post
462 increment address. */
463 static machine_mode aarch64_memory_reference_mode;
464
465 /* Used to force GTY into this file. */
466 static GTY(()) int gty_dummy;
467
468 /* A table of valid AArch64 "bitmask immediate" values for
469 logical instructions. */
470
471 #define AARCH64_NUM_BITMASKS 5334
472 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
473
474 typedef enum aarch64_cond_code
475 {
476 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
477 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
478 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
479 }
480 aarch64_cc;
481
482 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
483
484 /* The condition codes of the processor, and the inverse function. */
485 static const char * const aarch64_condition_codes[] =
486 {
487 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
488 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
489 };
490
491 static unsigned int
492 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
493 {
494 return 2;
495 }
496
497 static int
498 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
499 enum machine_mode mode)
500 {
501 if (VECTOR_MODE_P (mode))
502 return aarch64_tune_params->vec_reassoc_width;
503 if (INTEGRAL_MODE_P (mode))
504 return aarch64_tune_params->int_reassoc_width;
505 if (FLOAT_MODE_P (mode))
506 return aarch64_tune_params->fp_reassoc_width;
507 return 1;
508 }
509
510 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
511 unsigned
512 aarch64_dbx_register_number (unsigned regno)
513 {
514 if (GP_REGNUM_P (regno))
515 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
516 else if (regno == SP_REGNUM)
517 return AARCH64_DWARF_SP;
518 else if (FP_REGNUM_P (regno))
519 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
520
521 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
522 equivalent DWARF register. */
523 return DWARF_FRAME_REGISTERS;
524 }
525
526 /* Return TRUE if MODE is any of the large INT modes. */
527 static bool
528 aarch64_vect_struct_mode_p (machine_mode mode)
529 {
530 return mode == OImode || mode == CImode || mode == XImode;
531 }
532
533 /* Return TRUE if MODE is any of the vector modes. */
534 static bool
535 aarch64_vector_mode_p (machine_mode mode)
536 {
537 return aarch64_vector_mode_supported_p (mode)
538 || aarch64_vect_struct_mode_p (mode);
539 }
540
541 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
542 static bool
543 aarch64_array_mode_supported_p (machine_mode mode,
544 unsigned HOST_WIDE_INT nelems)
545 {
546 if (TARGET_SIMD
547 && AARCH64_VALID_SIMD_QREG_MODE (mode)
548 && (nelems >= 2 && nelems <= 4))
549 return true;
550
551 return false;
552 }
553
554 /* Implement HARD_REGNO_NREGS. */
555
556 int
557 aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
558 {
559 switch (aarch64_regno_regclass (regno))
560 {
561 case FP_REGS:
562 case FP_LO_REGS:
563 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
564 default:
565 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
566 }
567 gcc_unreachable ();
568 }
569
570 /* Implement HARD_REGNO_MODE_OK. */
571
572 int
573 aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
574 {
575 if (GET_MODE_CLASS (mode) == MODE_CC)
576 return regno == CC_REGNUM;
577
578 if (regno == SP_REGNUM)
579 /* The purpose of comparing with ptr_mode is to support the
580 global register variable associated with the stack pointer
581 register via the syntax of asm ("wsp") in ILP32. */
582 return mode == Pmode || mode == ptr_mode;
583
584 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
585 return mode == Pmode;
586
587 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
588 return 1;
589
590 if (FP_REGNUM_P (regno))
591 {
592 if (aarch64_vect_struct_mode_p (mode))
593 return
594 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
595 else
596 return 1;
597 }
598
599 return 0;
600 }
601
602 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
603 machine_mode
604 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
605 machine_mode mode)
606 {
607 /* Handle modes that fit within single registers. */
608 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
609 {
610 if (GET_MODE_SIZE (mode) >= 4)
611 return mode;
612 else
613 return SImode;
614 }
615 /* Fall back to generic for multi-reg and very large modes. */
616 else
617 return choose_hard_reg_mode (regno, nregs, false);
618 }
619
620 /* Return true if calls to DECL should be treated as
621 long-calls (ie called via a register). */
622 static bool
623 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
624 {
625 return false;
626 }
627
628 /* Return true if calls to symbol-ref SYM should be treated as
629 long-calls (ie called via a register). */
630 bool
631 aarch64_is_long_call_p (rtx sym)
632 {
633 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
634 }
635
636 /* Return true if the offsets to a zero/sign-extract operation
637 represent an expression that matches an extend operation. The
638 operands represent the paramters from
639
640 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
641 bool
642 aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
643 rtx extract_imm)
644 {
645 HOST_WIDE_INT mult_val, extract_val;
646
647 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
648 return false;
649
650 mult_val = INTVAL (mult_imm);
651 extract_val = INTVAL (extract_imm);
652
653 if (extract_val > 8
654 && extract_val < GET_MODE_BITSIZE (mode)
655 && exact_log2 (extract_val & ~7) > 0
656 && (extract_val & 7) <= 4
657 && mult_val == (1 << (extract_val & 7)))
658 return true;
659
660 return false;
661 }
662
663 /* Emit an insn that's a simple single-set. Both the operands must be
664 known to be valid. */
665 inline static rtx
666 emit_set_insn (rtx x, rtx y)
667 {
668 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
669 }
670
671 /* X and Y are two things to compare using CODE. Emit the compare insn and
672 return the rtx for register 0 in the proper mode. */
673 rtx
674 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
675 {
676 machine_mode mode = SELECT_CC_MODE (code, x, y);
677 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
678
679 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
680 return cc_reg;
681 }
682
683 /* Build the SYMBOL_REF for __tls_get_addr. */
684
685 static GTY(()) rtx tls_get_addr_libfunc;
686
687 rtx
688 aarch64_tls_get_addr (void)
689 {
690 if (!tls_get_addr_libfunc)
691 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
692 return tls_get_addr_libfunc;
693 }
694
695 /* Return the TLS model to use for ADDR. */
696
697 static enum tls_model
698 tls_symbolic_operand_type (rtx addr)
699 {
700 enum tls_model tls_kind = TLS_MODEL_NONE;
701 rtx sym, addend;
702
703 if (GET_CODE (addr) == CONST)
704 {
705 split_const (addr, &sym, &addend);
706 if (GET_CODE (sym) == SYMBOL_REF)
707 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
708 }
709 else if (GET_CODE (addr) == SYMBOL_REF)
710 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
711
712 return tls_kind;
713 }
714
715 /* We'll allow lo_sum's in addresses in our legitimate addresses
716 so that combine would take care of combining addresses where
717 necessary, but for generation purposes, we'll generate the address
718 as :
719 RTL Absolute
720 tmp = hi (symbol_ref); adrp x1, foo
721 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
722 nop
723
724 PIC TLS
725 adrp x1, :got:foo adrp tmp, :tlsgd:foo
726 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
727 bl __tls_get_addr
728 nop
729
730 Load TLS symbol, depending on TLS mechanism and TLS access model.
731
732 Global Dynamic - Traditional TLS:
733 adrp tmp, :tlsgd:imm
734 add dest, tmp, #:tlsgd_lo12:imm
735 bl __tls_get_addr
736
737 Global Dynamic - TLS Descriptors:
738 adrp dest, :tlsdesc:imm
739 ldr tmp, [dest, #:tlsdesc_lo12:imm]
740 add dest, dest, #:tlsdesc_lo12:imm
741 blr tmp
742 mrs tp, tpidr_el0
743 add dest, dest, tp
744
745 Initial Exec:
746 mrs tp, tpidr_el0
747 adrp tmp, :gottprel:imm
748 ldr dest, [tmp, #:gottprel_lo12:imm]
749 add dest, dest, tp
750
751 Local Exec:
752 mrs tp, tpidr_el0
753 add t0, tp, #:tprel_hi12:imm
754 add t0, #:tprel_lo12_nc:imm
755 */
756
757 static void
758 aarch64_load_symref_appropriately (rtx dest, rtx imm,
759 enum aarch64_symbol_type type)
760 {
761 switch (type)
762 {
763 case SYMBOL_SMALL_ABSOLUTE:
764 {
765 /* In ILP32, the mode of dest can be either SImode or DImode. */
766 rtx tmp_reg = dest;
767 machine_mode mode = GET_MODE (dest);
768
769 gcc_assert (mode == Pmode || mode == ptr_mode);
770
771 if (can_create_pseudo_p ())
772 tmp_reg = gen_reg_rtx (mode);
773
774 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
775 emit_insn (gen_add_losym (dest, tmp_reg, imm));
776 return;
777 }
778
779 case SYMBOL_TINY_ABSOLUTE:
780 emit_insn (gen_rtx_SET (Pmode, dest, imm));
781 return;
782
783 case SYMBOL_SMALL_GOT:
784 {
785 /* In ILP32, the mode of dest can be either SImode or DImode,
786 while the got entry is always of SImode size. The mode of
787 dest depends on how dest is used: if dest is assigned to a
788 pointer (e.g. in the memory), it has SImode; it may have
789 DImode if dest is dereferenced to access the memeory.
790 This is why we have to handle three different ldr_got_small
791 patterns here (two patterns for ILP32). */
792 rtx tmp_reg = dest;
793 machine_mode mode = GET_MODE (dest);
794
795 if (can_create_pseudo_p ())
796 tmp_reg = gen_reg_rtx (mode);
797
798 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
799 if (mode == ptr_mode)
800 {
801 if (mode == DImode)
802 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
803 else
804 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
805 }
806 else
807 {
808 gcc_assert (mode == Pmode);
809 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
810 }
811
812 return;
813 }
814
815 case SYMBOL_SMALL_TLSGD:
816 {
817 rtx_insn *insns;
818 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
819
820 start_sequence ();
821 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
822 insns = get_insns ();
823 end_sequence ();
824
825 RTL_CONST_CALL_P (insns) = 1;
826 emit_libcall_block (insns, dest, result, imm);
827 return;
828 }
829
830 case SYMBOL_SMALL_TLSDESC:
831 {
832 machine_mode mode = GET_MODE (dest);
833 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
834 rtx tp;
835
836 gcc_assert (mode == Pmode || mode == ptr_mode);
837
838 /* In ILP32, the got entry is always of SImode size. Unlike
839 small GOT, the dest is fixed at reg 0. */
840 if (TARGET_ILP32)
841 emit_insn (gen_tlsdesc_small_si (imm));
842 else
843 emit_insn (gen_tlsdesc_small_di (imm));
844 tp = aarch64_load_tp (NULL);
845
846 if (mode != Pmode)
847 tp = gen_lowpart (mode, tp);
848
849 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
850 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
851 return;
852 }
853
854 case SYMBOL_SMALL_GOTTPREL:
855 {
856 /* In ILP32, the mode of dest can be either SImode or DImode,
857 while the got entry is always of SImode size. The mode of
858 dest depends on how dest is used: if dest is assigned to a
859 pointer (e.g. in the memory), it has SImode; it may have
860 DImode if dest is dereferenced to access the memeory.
861 This is why we have to handle three different tlsie_small
862 patterns here (two patterns for ILP32). */
863 machine_mode mode = GET_MODE (dest);
864 rtx tmp_reg = gen_reg_rtx (mode);
865 rtx tp = aarch64_load_tp (NULL);
866
867 if (mode == ptr_mode)
868 {
869 if (mode == DImode)
870 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
871 else
872 {
873 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
874 tp = gen_lowpart (mode, tp);
875 }
876 }
877 else
878 {
879 gcc_assert (mode == Pmode);
880 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
881 }
882
883 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
884 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
885 return;
886 }
887
888 case SYMBOL_SMALL_TPREL:
889 {
890 rtx tp = aarch64_load_tp (NULL);
891 emit_insn (gen_tlsle_small (dest, tp, imm));
892 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
893 return;
894 }
895
896 case SYMBOL_TINY_GOT:
897 emit_insn (gen_ldr_got_tiny (dest, imm));
898 return;
899
900 default:
901 gcc_unreachable ();
902 }
903 }
904
905 /* Emit a move from SRC to DEST. Assume that the move expanders can
906 handle all moves if !can_create_pseudo_p (). The distinction is
907 important because, unlike emit_move_insn, the move expanders know
908 how to force Pmode objects into the constant pool even when the
909 constant pool address is not itself legitimate. */
910 static rtx
911 aarch64_emit_move (rtx dest, rtx src)
912 {
913 return (can_create_pseudo_p ()
914 ? emit_move_insn (dest, src)
915 : emit_move_insn_1 (dest, src));
916 }
917
918 /* Split a 128-bit move operation into two 64-bit move operations,
919 taking care to handle partial overlap of register to register
920 copies. Special cases are needed when moving between GP regs and
921 FP regs. SRC can be a register, constant or memory; DST a register
922 or memory. If either operand is memory it must not have any side
923 effects. */
924 void
925 aarch64_split_128bit_move (rtx dst, rtx src)
926 {
927 rtx dst_lo, dst_hi;
928 rtx src_lo, src_hi;
929
930 machine_mode mode = GET_MODE (dst);
931
932 gcc_assert (mode == TImode || mode == TFmode);
933 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
934 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
935
936 if (REG_P (dst) && REG_P (src))
937 {
938 int src_regno = REGNO (src);
939 int dst_regno = REGNO (dst);
940
941 /* Handle FP <-> GP regs. */
942 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
943 {
944 src_lo = gen_lowpart (word_mode, src);
945 src_hi = gen_highpart (word_mode, src);
946
947 if (mode == TImode)
948 {
949 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
950 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
951 }
952 else
953 {
954 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
955 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
956 }
957 return;
958 }
959 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
960 {
961 dst_lo = gen_lowpart (word_mode, dst);
962 dst_hi = gen_highpart (word_mode, dst);
963
964 if (mode == TImode)
965 {
966 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
967 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
968 }
969 else
970 {
971 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
972 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
973 }
974 return;
975 }
976 }
977
978 dst_lo = gen_lowpart (word_mode, dst);
979 dst_hi = gen_highpart (word_mode, dst);
980 src_lo = gen_lowpart (word_mode, src);
981 src_hi = gen_highpart_mode (word_mode, mode, src);
982
983 /* At most one pairing may overlap. */
984 if (reg_overlap_mentioned_p (dst_lo, src_hi))
985 {
986 aarch64_emit_move (dst_hi, src_hi);
987 aarch64_emit_move (dst_lo, src_lo);
988 }
989 else
990 {
991 aarch64_emit_move (dst_lo, src_lo);
992 aarch64_emit_move (dst_hi, src_hi);
993 }
994 }
995
996 bool
997 aarch64_split_128bit_move_p (rtx dst, rtx src)
998 {
999 return (! REG_P (src)
1000 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
1001 }
1002
1003 /* Split a complex SIMD combine. */
1004
1005 void
1006 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
1007 {
1008 machine_mode src_mode = GET_MODE (src1);
1009 machine_mode dst_mode = GET_MODE (dst);
1010
1011 gcc_assert (VECTOR_MODE_P (dst_mode));
1012
1013 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
1014 {
1015 rtx (*gen) (rtx, rtx, rtx);
1016
1017 switch (src_mode)
1018 {
1019 case V8QImode:
1020 gen = gen_aarch64_simd_combinev8qi;
1021 break;
1022 case V4HImode:
1023 gen = gen_aarch64_simd_combinev4hi;
1024 break;
1025 case V2SImode:
1026 gen = gen_aarch64_simd_combinev2si;
1027 break;
1028 case V2SFmode:
1029 gen = gen_aarch64_simd_combinev2sf;
1030 break;
1031 case DImode:
1032 gen = gen_aarch64_simd_combinedi;
1033 break;
1034 case DFmode:
1035 gen = gen_aarch64_simd_combinedf;
1036 break;
1037 default:
1038 gcc_unreachable ();
1039 }
1040
1041 emit_insn (gen (dst, src1, src2));
1042 return;
1043 }
1044 }
1045
1046 /* Split a complex SIMD move. */
1047
1048 void
1049 aarch64_split_simd_move (rtx dst, rtx src)
1050 {
1051 machine_mode src_mode = GET_MODE (src);
1052 machine_mode dst_mode = GET_MODE (dst);
1053
1054 gcc_assert (VECTOR_MODE_P (dst_mode));
1055
1056 if (REG_P (dst) && REG_P (src))
1057 {
1058 rtx (*gen) (rtx, rtx);
1059
1060 gcc_assert (VECTOR_MODE_P (src_mode));
1061
1062 switch (src_mode)
1063 {
1064 case V16QImode:
1065 gen = gen_aarch64_split_simd_movv16qi;
1066 break;
1067 case V8HImode:
1068 gen = gen_aarch64_split_simd_movv8hi;
1069 break;
1070 case V4SImode:
1071 gen = gen_aarch64_split_simd_movv4si;
1072 break;
1073 case V2DImode:
1074 gen = gen_aarch64_split_simd_movv2di;
1075 break;
1076 case V4SFmode:
1077 gen = gen_aarch64_split_simd_movv4sf;
1078 break;
1079 case V2DFmode:
1080 gen = gen_aarch64_split_simd_movv2df;
1081 break;
1082 default:
1083 gcc_unreachable ();
1084 }
1085
1086 emit_insn (gen (dst, src));
1087 return;
1088 }
1089 }
1090
1091 static rtx
1092 aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
1093 {
1094 if (can_create_pseudo_p ())
1095 return force_reg (mode, value);
1096 else
1097 {
1098 x = aarch64_emit_move (x, value);
1099 return x;
1100 }
1101 }
1102
1103
1104 static rtx
1105 aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
1106 {
1107 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
1108 {
1109 rtx high;
1110 /* Load the full offset into a register. This
1111 might be improvable in the future. */
1112 high = GEN_INT (offset);
1113 offset = 0;
1114 high = aarch64_force_temporary (mode, temp, high);
1115 reg = aarch64_force_temporary (mode, temp,
1116 gen_rtx_PLUS (mode, high, reg));
1117 }
1118 return plus_constant (mode, reg, offset);
1119 }
1120
1121 static int
1122 aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
1123 machine_mode mode)
1124 {
1125 unsigned HOST_WIDE_INT mask;
1126 int i;
1127 bool first;
1128 unsigned HOST_WIDE_INT val;
1129 bool subtargets;
1130 rtx subtarget;
1131 int one_match, zero_match, first_not_ffff_match;
1132 int num_insns = 0;
1133
1134 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1135 {
1136 if (generate)
1137 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1138 num_insns++;
1139 return num_insns;
1140 }
1141
1142 if (mode == SImode)
1143 {
1144 /* We know we can't do this in 1 insn, and we must be able to do it
1145 in two; so don't mess around looking for sequences that don't buy
1146 us anything. */
1147 if (generate)
1148 {
1149 emit_insn (gen_rtx_SET (VOIDmode, dest,
1150 GEN_INT (INTVAL (imm) & 0xffff)));
1151 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1152 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1153 }
1154 num_insns += 2;
1155 return num_insns;
1156 }
1157
1158 /* Remaining cases are all for DImode. */
1159
1160 val = INTVAL (imm);
1161 subtargets = optimize && can_create_pseudo_p ();
1162
1163 one_match = 0;
1164 zero_match = 0;
1165 mask = 0xffff;
1166 first_not_ffff_match = -1;
1167
1168 for (i = 0; i < 64; i += 16, mask <<= 16)
1169 {
1170 if ((val & mask) == mask)
1171 one_match++;
1172 else
1173 {
1174 if (first_not_ffff_match < 0)
1175 first_not_ffff_match = i;
1176 if ((val & mask) == 0)
1177 zero_match++;
1178 }
1179 }
1180
1181 if (one_match == 2)
1182 {
1183 /* Set one of the quarters and then insert back into result. */
1184 mask = 0xffffll << first_not_ffff_match;
1185 if (generate)
1186 {
1187 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1188 emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
1189 GEN_INT ((val >> first_not_ffff_match)
1190 & 0xffff)));
1191 }
1192 num_insns += 2;
1193 return num_insns;
1194 }
1195
1196 if (zero_match == 2)
1197 goto simple_sequence;
1198
1199 mask = 0x0ffff0000UL;
1200 for (i = 16; i < 64; i += 16, mask <<= 16)
1201 {
1202 HOST_WIDE_INT comp = mask & ~(mask - 1);
1203
1204 if (aarch64_uimm12_shift (val - (val & mask)))
1205 {
1206 if (generate)
1207 {
1208 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1209 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1210 GEN_INT (val & mask)));
1211 emit_insn (gen_adddi3 (dest, subtarget,
1212 GEN_INT (val - (val & mask))));
1213 }
1214 num_insns += 2;
1215 return num_insns;
1216 }
1217 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1218 {
1219 if (generate)
1220 {
1221 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1222 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1223 GEN_INT ((val + comp) & mask)));
1224 emit_insn (gen_adddi3 (dest, subtarget,
1225 GEN_INT (val - ((val + comp) & mask))));
1226 }
1227 num_insns += 2;
1228 return num_insns;
1229 }
1230 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1231 {
1232 if (generate)
1233 {
1234 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1235 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1236 GEN_INT ((val - comp) | ~mask)));
1237 emit_insn (gen_adddi3 (dest, subtarget,
1238 GEN_INT (val - ((val - comp) | ~mask))));
1239 }
1240 num_insns += 2;
1241 return num_insns;
1242 }
1243 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1244 {
1245 if (generate)
1246 {
1247 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1248 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1249 GEN_INT (val | ~mask)));
1250 emit_insn (gen_adddi3 (dest, subtarget,
1251 GEN_INT (val - (val | ~mask))));
1252 }
1253 num_insns += 2;
1254 return num_insns;
1255 }
1256 }
1257
1258 /* See if we can do it by arithmetically combining two
1259 immediates. */
1260 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1261 {
1262 int j;
1263 mask = 0xffff;
1264
1265 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1266 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1267 {
1268 if (generate)
1269 {
1270 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1271 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1272 GEN_INT (aarch64_bitmasks[i])));
1273 emit_insn (gen_adddi3 (dest, subtarget,
1274 GEN_INT (val - aarch64_bitmasks[i])));
1275 }
1276 num_insns += 2;
1277 return num_insns;
1278 }
1279
1280 for (j = 0; j < 64; j += 16, mask <<= 16)
1281 {
1282 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1283 {
1284 if (generate)
1285 {
1286 emit_insn (gen_rtx_SET (VOIDmode, dest,
1287 GEN_INT (aarch64_bitmasks[i])));
1288 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1289 GEN_INT ((val >> j) & 0xffff)));
1290 }
1291 num_insns += 2;
1292 return num_insns;
1293 }
1294 }
1295 }
1296
1297 /* See if we can do it by logically combining two immediates. */
1298 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1299 {
1300 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1301 {
1302 int j;
1303
1304 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1305 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1306 {
1307 if (generate)
1308 {
1309 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1310 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1311 GEN_INT (aarch64_bitmasks[i])));
1312 emit_insn (gen_iordi3 (dest, subtarget,
1313 GEN_INT (aarch64_bitmasks[j])));
1314 }
1315 num_insns += 2;
1316 return num_insns;
1317 }
1318 }
1319 else if ((val & aarch64_bitmasks[i]) == val)
1320 {
1321 int j;
1322
1323 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1324 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1325 {
1326 if (generate)
1327 {
1328 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1329 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1330 GEN_INT (aarch64_bitmasks[j])));
1331 emit_insn (gen_anddi3 (dest, subtarget,
1332 GEN_INT (aarch64_bitmasks[i])));
1333 }
1334 num_insns += 2;
1335 return num_insns;
1336 }
1337 }
1338 }
1339
1340 if (one_match > zero_match)
1341 {
1342 /* Set either first three quarters or all but the third. */
1343 mask = 0xffffll << (16 - first_not_ffff_match);
1344 if (generate)
1345 emit_insn (gen_rtx_SET (VOIDmode, dest,
1346 GEN_INT (val | mask | 0xffffffff00000000ull)));
1347 num_insns ++;
1348
1349 /* Now insert other two quarters. */
1350 for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
1351 i < 64; i += 16, mask <<= 16)
1352 {
1353 if ((val & mask) != mask)
1354 {
1355 if (generate)
1356 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1357 GEN_INT ((val >> i) & 0xffff)));
1358 num_insns ++;
1359 }
1360 }
1361 return num_insns;
1362 }
1363
1364 simple_sequence:
1365 first = true;
1366 mask = 0xffff;
1367 for (i = 0; i < 64; i += 16, mask <<= 16)
1368 {
1369 if ((val & mask) != 0)
1370 {
1371 if (first)
1372 {
1373 if (generate)
1374 emit_insn (gen_rtx_SET (VOIDmode, dest,
1375 GEN_INT (val & mask)));
1376 num_insns ++;
1377 first = false;
1378 }
1379 else
1380 {
1381 if (generate)
1382 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1383 GEN_INT ((val >> i) & 0xffff)));
1384 num_insns ++;
1385 }
1386 }
1387 }
1388
1389 return num_insns;
1390 }
1391
1392
1393 void
1394 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1395 {
1396 machine_mode mode = GET_MODE (dest);
1397
1398 gcc_assert (mode == SImode || mode == DImode);
1399
1400 /* Check on what type of symbol it is. */
1401 if (GET_CODE (imm) == SYMBOL_REF
1402 || GET_CODE (imm) == LABEL_REF
1403 || GET_CODE (imm) == CONST)
1404 {
1405 rtx mem, base, offset;
1406 enum aarch64_symbol_type sty;
1407
1408 /* If we have (const (plus symbol offset)), separate out the offset
1409 before we start classifying the symbol. */
1410 split_const (imm, &base, &offset);
1411
1412 sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
1413 switch (sty)
1414 {
1415 case SYMBOL_FORCE_TO_MEM:
1416 if (offset != const0_rtx
1417 && targetm.cannot_force_const_mem (mode, imm))
1418 {
1419 gcc_assert (can_create_pseudo_p ());
1420 base = aarch64_force_temporary (mode, dest, base);
1421 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1422 aarch64_emit_move (dest, base);
1423 return;
1424 }
1425 mem = force_const_mem (ptr_mode, imm);
1426 gcc_assert (mem);
1427 if (mode != ptr_mode)
1428 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1429 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1430 return;
1431
1432 case SYMBOL_SMALL_TLSGD:
1433 case SYMBOL_SMALL_TLSDESC:
1434 case SYMBOL_SMALL_GOTTPREL:
1435 case SYMBOL_SMALL_GOT:
1436 case SYMBOL_TINY_GOT:
1437 if (offset != const0_rtx)
1438 {
1439 gcc_assert(can_create_pseudo_p ());
1440 base = aarch64_force_temporary (mode, dest, base);
1441 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1442 aarch64_emit_move (dest, base);
1443 return;
1444 }
1445 /* FALLTHRU */
1446
1447 case SYMBOL_SMALL_TPREL:
1448 case SYMBOL_SMALL_ABSOLUTE:
1449 case SYMBOL_TINY_ABSOLUTE:
1450 aarch64_load_symref_appropriately (dest, imm, sty);
1451 return;
1452
1453 default:
1454 gcc_unreachable ();
1455 }
1456 }
1457
1458 if (!CONST_INT_P (imm))
1459 {
1460 if (GET_CODE (imm) == HIGH)
1461 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1462 else
1463 {
1464 rtx mem = force_const_mem (mode, imm);
1465 gcc_assert (mem);
1466 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1467 }
1468
1469 return;
1470 }
1471
1472 aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
1473 }
1474
1475 static bool
1476 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1477 tree exp ATTRIBUTE_UNUSED)
1478 {
1479 /* Currently, always true. */
1480 return true;
1481 }
1482
1483 /* Implement TARGET_PASS_BY_REFERENCE. */
1484
1485 static bool
1486 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1487 machine_mode mode,
1488 const_tree type,
1489 bool named ATTRIBUTE_UNUSED)
1490 {
1491 HOST_WIDE_INT size;
1492 machine_mode dummymode;
1493 int nregs;
1494
1495 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1496 size = (mode == BLKmode && type)
1497 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1498
1499 /* Aggregates are passed by reference based on their size. */
1500 if (type && AGGREGATE_TYPE_P (type))
1501 {
1502 size = int_size_in_bytes (type);
1503 }
1504
1505 /* Variable sized arguments are always returned by reference. */
1506 if (size < 0)
1507 return true;
1508
1509 /* Can this be a candidate to be passed in fp/simd register(s)? */
1510 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1511 &dummymode, &nregs,
1512 NULL))
1513 return false;
1514
1515 /* Arguments which are variable sized or larger than 2 registers are
1516 passed by reference unless they are a homogenous floating point
1517 aggregate. */
1518 return size > 2 * UNITS_PER_WORD;
1519 }
1520
1521 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1522 static bool
1523 aarch64_return_in_msb (const_tree valtype)
1524 {
1525 machine_mode dummy_mode;
1526 int dummy_int;
1527
1528 /* Never happens in little-endian mode. */
1529 if (!BYTES_BIG_ENDIAN)
1530 return false;
1531
1532 /* Only composite types smaller than or equal to 16 bytes can
1533 be potentially returned in registers. */
1534 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1535 || int_size_in_bytes (valtype) <= 0
1536 || int_size_in_bytes (valtype) > 16)
1537 return false;
1538
1539 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1540 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1541 is always passed/returned in the least significant bits of fp/simd
1542 register(s). */
1543 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1544 &dummy_mode, &dummy_int, NULL))
1545 return false;
1546
1547 return true;
1548 }
1549
1550 /* Implement TARGET_FUNCTION_VALUE.
1551 Define how to find the value returned by a function. */
1552
1553 static rtx
1554 aarch64_function_value (const_tree type, const_tree func,
1555 bool outgoing ATTRIBUTE_UNUSED)
1556 {
1557 machine_mode mode;
1558 int unsignedp;
1559 int count;
1560 machine_mode ag_mode;
1561
1562 mode = TYPE_MODE (type);
1563 if (INTEGRAL_TYPE_P (type))
1564 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1565
1566 if (aarch64_return_in_msb (type))
1567 {
1568 HOST_WIDE_INT size = int_size_in_bytes (type);
1569
1570 if (size % UNITS_PER_WORD != 0)
1571 {
1572 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1573 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1574 }
1575 }
1576
1577 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1578 &ag_mode, &count, NULL))
1579 {
1580 if (!aarch64_composite_type_p (type, mode))
1581 {
1582 gcc_assert (count == 1 && mode == ag_mode);
1583 return gen_rtx_REG (mode, V0_REGNUM);
1584 }
1585 else
1586 {
1587 int i;
1588 rtx par;
1589
1590 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1591 for (i = 0; i < count; i++)
1592 {
1593 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1594 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1595 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1596 XVECEXP (par, 0, i) = tmp;
1597 }
1598 return par;
1599 }
1600 }
1601 else
1602 return gen_rtx_REG (mode, R0_REGNUM);
1603 }
1604
1605 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1606 Return true if REGNO is the number of a hard register in which the values
1607 of called function may come back. */
1608
1609 static bool
1610 aarch64_function_value_regno_p (const unsigned int regno)
1611 {
1612 /* Maximum of 16 bytes can be returned in the general registers. Examples
1613 of 16-byte return values are: 128-bit integers and 16-byte small
1614 structures (excluding homogeneous floating-point aggregates). */
1615 if (regno == R0_REGNUM || regno == R1_REGNUM)
1616 return true;
1617
1618 /* Up to four fp/simd registers can return a function value, e.g. a
1619 homogeneous floating-point aggregate having four members. */
1620 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1621 return !TARGET_GENERAL_REGS_ONLY;
1622
1623 return false;
1624 }
1625
1626 /* Implement TARGET_RETURN_IN_MEMORY.
1627
1628 If the type T of the result of a function is such that
1629 void func (T arg)
1630 would require that arg be passed as a value in a register (or set of
1631 registers) according to the parameter passing rules, then the result
1632 is returned in the same registers as would be used for such an
1633 argument. */
1634
1635 static bool
1636 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1637 {
1638 HOST_WIDE_INT size;
1639 machine_mode ag_mode;
1640 int count;
1641
1642 if (!AGGREGATE_TYPE_P (type)
1643 && TREE_CODE (type) != COMPLEX_TYPE
1644 && TREE_CODE (type) != VECTOR_TYPE)
1645 /* Simple scalar types always returned in registers. */
1646 return false;
1647
1648 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1649 type,
1650 &ag_mode,
1651 &count,
1652 NULL))
1653 return false;
1654
1655 /* Types larger than 2 registers returned in memory. */
1656 size = int_size_in_bytes (type);
1657 return (size < 0 || size > 2 * UNITS_PER_WORD);
1658 }
1659
1660 static bool
1661 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
1662 const_tree type, int *nregs)
1663 {
1664 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1665 return aarch64_vfp_is_call_or_return_candidate (mode,
1666 type,
1667 &pcum->aapcs_vfp_rmode,
1668 nregs,
1669 NULL);
1670 }
1671
1672 /* Given MODE and TYPE of a function argument, return the alignment in
1673 bits. The idea is to suppress any stronger alignment requested by
1674 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1675 This is a helper function for local use only. */
1676
1677 static unsigned int
1678 aarch64_function_arg_alignment (machine_mode mode, const_tree type)
1679 {
1680 unsigned int alignment;
1681
1682 if (type)
1683 {
1684 if (!integer_zerop (TYPE_SIZE (type)))
1685 {
1686 if (TYPE_MODE (type) == mode)
1687 alignment = TYPE_ALIGN (type);
1688 else
1689 alignment = GET_MODE_ALIGNMENT (mode);
1690 }
1691 else
1692 alignment = 0;
1693 }
1694 else
1695 alignment = GET_MODE_ALIGNMENT (mode);
1696
1697 return alignment;
1698 }
1699
1700 /* Layout a function argument according to the AAPCS64 rules. The rule
1701 numbers refer to the rule numbers in the AAPCS64. */
1702
1703 static void
1704 aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
1705 const_tree type,
1706 bool named ATTRIBUTE_UNUSED)
1707 {
1708 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1709 int ncrn, nvrn, nregs;
1710 bool allocate_ncrn, allocate_nvrn;
1711 HOST_WIDE_INT size;
1712
1713 /* We need to do this once per argument. */
1714 if (pcum->aapcs_arg_processed)
1715 return;
1716
1717 pcum->aapcs_arg_processed = true;
1718
1719 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1720 size
1721 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1722 UNITS_PER_WORD);
1723
1724 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1725 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1726 mode,
1727 type,
1728 &nregs);
1729
1730 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1731 The following code thus handles passing by SIMD/FP registers first. */
1732
1733 nvrn = pcum->aapcs_nvrn;
1734
1735 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1736 and homogenous short-vector aggregates (HVA). */
1737 if (allocate_nvrn)
1738 {
1739 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1740 {
1741 pcum->aapcs_nextnvrn = nvrn + nregs;
1742 if (!aarch64_composite_type_p (type, mode))
1743 {
1744 gcc_assert (nregs == 1);
1745 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1746 }
1747 else
1748 {
1749 rtx par;
1750 int i;
1751 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1752 for (i = 0; i < nregs; i++)
1753 {
1754 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1755 V0_REGNUM + nvrn + i);
1756 tmp = gen_rtx_EXPR_LIST
1757 (VOIDmode, tmp,
1758 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1759 XVECEXP (par, 0, i) = tmp;
1760 }
1761 pcum->aapcs_reg = par;
1762 }
1763 return;
1764 }
1765 else
1766 {
1767 /* C.3 NSRN is set to 8. */
1768 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1769 goto on_stack;
1770 }
1771 }
1772
1773 ncrn = pcum->aapcs_ncrn;
1774 nregs = size / UNITS_PER_WORD;
1775
1776 /* C6 - C9. though the sign and zero extension semantics are
1777 handled elsewhere. This is the case where the argument fits
1778 entirely general registers. */
1779 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1780 {
1781 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1782
1783 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1784
1785 /* C.8 if the argument has an alignment of 16 then the NGRN is
1786 rounded up to the next even number. */
1787 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1788 {
1789 ++ncrn;
1790 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1791 }
1792 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1793 A reg is still generated for it, but the caller should be smart
1794 enough not to use it. */
1795 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1796 {
1797 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1798 }
1799 else
1800 {
1801 rtx par;
1802 int i;
1803
1804 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1805 for (i = 0; i < nregs; i++)
1806 {
1807 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1809 GEN_INT (i * UNITS_PER_WORD));
1810 XVECEXP (par, 0, i) = tmp;
1811 }
1812 pcum->aapcs_reg = par;
1813 }
1814
1815 pcum->aapcs_nextncrn = ncrn + nregs;
1816 return;
1817 }
1818
1819 /* C.11 */
1820 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1821
1822 /* The argument is passed on stack; record the needed number of words for
1823 this argument and align the total size if necessary. */
1824 on_stack:
1825 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1826 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1827 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1828 16 / UNITS_PER_WORD);
1829 return;
1830 }
1831
1832 /* Implement TARGET_FUNCTION_ARG. */
1833
1834 static rtx
1835 aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
1836 const_tree type, bool named)
1837 {
1838 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1839 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1840
1841 if (mode == VOIDmode)
1842 return NULL_RTX;
1843
1844 aarch64_layout_arg (pcum_v, mode, type, named);
1845 return pcum->aapcs_reg;
1846 }
1847
1848 void
1849 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1850 const_tree fntype ATTRIBUTE_UNUSED,
1851 rtx libname ATTRIBUTE_UNUSED,
1852 const_tree fndecl ATTRIBUTE_UNUSED,
1853 unsigned n_named ATTRIBUTE_UNUSED)
1854 {
1855 pcum->aapcs_ncrn = 0;
1856 pcum->aapcs_nvrn = 0;
1857 pcum->aapcs_nextncrn = 0;
1858 pcum->aapcs_nextnvrn = 0;
1859 pcum->pcs_variant = ARM_PCS_AAPCS64;
1860 pcum->aapcs_reg = NULL_RTX;
1861 pcum->aapcs_arg_processed = false;
1862 pcum->aapcs_stack_words = 0;
1863 pcum->aapcs_stack_size = 0;
1864
1865 return;
1866 }
1867
1868 static void
1869 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1870 machine_mode mode,
1871 const_tree type,
1872 bool named)
1873 {
1874 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1875 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1876 {
1877 aarch64_layout_arg (pcum_v, mode, type, named);
1878 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1879 != (pcum->aapcs_stack_words != 0));
1880 pcum->aapcs_arg_processed = false;
1881 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1882 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1883 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1884 pcum->aapcs_stack_words = 0;
1885 pcum->aapcs_reg = NULL_RTX;
1886 }
1887 }
1888
1889 bool
1890 aarch64_function_arg_regno_p (unsigned regno)
1891 {
1892 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1893 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1894 }
1895
1896 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1897 PARM_BOUNDARY bits of alignment, but will be given anything up
1898 to STACK_BOUNDARY bits if the type requires it. This makes sure
1899 that both before and after the layout of each argument, the Next
1900 Stacked Argument Address (NSAA) will have a minimum alignment of
1901 8 bytes. */
1902
1903 static unsigned int
1904 aarch64_function_arg_boundary (machine_mode mode, const_tree type)
1905 {
1906 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1907
1908 if (alignment < PARM_BOUNDARY)
1909 alignment = PARM_BOUNDARY;
1910 if (alignment > STACK_BOUNDARY)
1911 alignment = STACK_BOUNDARY;
1912 return alignment;
1913 }
1914
1915 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1916
1917 Return true if an argument passed on the stack should be padded upwards,
1918 i.e. if the least-significant byte of the stack slot has useful data.
1919
1920 Small aggregate types are placed in the lowest memory address.
1921
1922 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1923
1924 bool
1925 aarch64_pad_arg_upward (machine_mode mode, const_tree type)
1926 {
1927 /* On little-endian targets, the least significant byte of every stack
1928 argument is passed at the lowest byte address of the stack slot. */
1929 if (!BYTES_BIG_ENDIAN)
1930 return true;
1931
1932 /* Otherwise, integral, floating-point and pointer types are padded downward:
1933 the least significant byte of a stack argument is passed at the highest
1934 byte address of the stack slot. */
1935 if (type
1936 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1937 || POINTER_TYPE_P (type))
1938 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1939 return false;
1940
1941 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1942 return true;
1943 }
1944
1945 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1946
1947 It specifies padding for the last (may also be the only)
1948 element of a block move between registers and memory. If
1949 assuming the block is in the memory, padding upward means that
1950 the last element is padded after its highest significant byte,
1951 while in downward padding, the last element is padded at the
1952 its least significant byte side.
1953
1954 Small aggregates and small complex types are always padded
1955 upwards.
1956
1957 We don't need to worry about homogeneous floating-point or
1958 short-vector aggregates; their move is not affected by the
1959 padding direction determined here. Regardless of endianness,
1960 each element of such an aggregate is put in the least
1961 significant bits of a fp/simd register.
1962
1963 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1964 register has useful data, and return the opposite if the most
1965 significant byte does. */
1966
1967 bool
1968 aarch64_pad_reg_upward (machine_mode mode, const_tree type,
1969 bool first ATTRIBUTE_UNUSED)
1970 {
1971
1972 /* Small composite types are always padded upward. */
1973 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1974 {
1975 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1976 : GET_MODE_SIZE (mode));
1977 if (size < 2 * UNITS_PER_WORD)
1978 return true;
1979 }
1980
1981 /* Otherwise, use the default padding. */
1982 return !BYTES_BIG_ENDIAN;
1983 }
1984
1985 static machine_mode
1986 aarch64_libgcc_cmp_return_mode (void)
1987 {
1988 return SImode;
1989 }
1990
1991 static bool
1992 aarch64_frame_pointer_required (void)
1993 {
1994 /* In aarch64_override_options_after_change
1995 flag_omit_leaf_frame_pointer turns off the frame pointer by
1996 default. Turn it back on now if we've not got a leaf
1997 function. */
1998 if (flag_omit_leaf_frame_pointer
1999 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
2000 return true;
2001
2002 return false;
2003 }
2004
2005 /* Mark the registers that need to be saved by the callee and calculate
2006 the size of the callee-saved registers area and frame record (both FP
2007 and LR may be omitted). */
2008 static void
2009 aarch64_layout_frame (void)
2010 {
2011 HOST_WIDE_INT offset = 0;
2012 int regno;
2013
2014 if (reload_completed && cfun->machine->frame.laid_out)
2015 return;
2016
2017 #define SLOT_NOT_REQUIRED (-2)
2018 #define SLOT_REQUIRED (-1)
2019
2020 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
2021 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
2022
2023 /* First mark all the registers that really need to be saved... */
2024 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2025 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2026
2027 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2028 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
2029
2030 /* ... that includes the eh data registers (if needed)... */
2031 if (crtl->calls_eh_return)
2032 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
2033 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
2034 = SLOT_REQUIRED;
2035
2036 /* ... and any callee saved register that dataflow says is live. */
2037 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2038 if (df_regs_ever_live_p (regno)
2039 && (regno == R30_REGNUM
2040 || !call_used_regs[regno]))
2041 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2042
2043 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2044 if (df_regs_ever_live_p (regno)
2045 && !call_used_regs[regno])
2046 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
2047
2048 if (frame_pointer_needed)
2049 {
2050 /* FP and LR are placed in the linkage record. */
2051 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
2052 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
2053 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
2054 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
2055 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
2056 offset += 2 * UNITS_PER_WORD;
2057 }
2058
2059 /* Now assign stack slots for them. */
2060 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
2061 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2062 {
2063 cfun->machine->frame.reg_offset[regno] = offset;
2064 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2065 cfun->machine->frame.wb_candidate1 = regno;
2066 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
2067 cfun->machine->frame.wb_candidate2 = regno;
2068 offset += UNITS_PER_WORD;
2069 }
2070
2071 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
2072 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
2073 {
2074 cfun->machine->frame.reg_offset[regno] = offset;
2075 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
2076 cfun->machine->frame.wb_candidate1 = regno;
2077 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
2078 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
2079 cfun->machine->frame.wb_candidate2 = regno;
2080 offset += UNITS_PER_WORD;
2081 }
2082
2083 cfun->machine->frame.padding0 =
2084 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
2085 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
2086
2087 cfun->machine->frame.saved_regs_size = offset;
2088
2089 cfun->machine->frame.hard_fp_offset
2090 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
2091 + get_frame_size ()
2092 + cfun->machine->frame.saved_regs_size,
2093 STACK_BOUNDARY / BITS_PER_UNIT);
2094
2095 cfun->machine->frame.frame_size
2096 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
2097 + crtl->outgoing_args_size,
2098 STACK_BOUNDARY / BITS_PER_UNIT);
2099
2100 cfun->machine->frame.laid_out = true;
2101 }
2102
2103 static bool
2104 aarch64_register_saved_on_entry (int regno)
2105 {
2106 return cfun->machine->frame.reg_offset[regno] >= 0;
2107 }
2108
2109 static unsigned
2110 aarch64_next_callee_save (unsigned regno, unsigned limit)
2111 {
2112 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
2113 regno ++;
2114 return regno;
2115 }
2116
2117 static void
2118 aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
2119 HOST_WIDE_INT adjustment)
2120 {
2121 rtx base_rtx = stack_pointer_rtx;
2122 rtx insn, reg, mem;
2123
2124 reg = gen_rtx_REG (mode, regno);
2125 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
2126 plus_constant (Pmode, base_rtx, -adjustment));
2127 mem = gen_rtx_MEM (mode, mem);
2128
2129 insn = emit_move_insn (mem, reg);
2130 RTX_FRAME_RELATED_P (insn) = 1;
2131 }
2132
2133 static rtx
2134 aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2135 HOST_WIDE_INT adjustment)
2136 {
2137 switch (mode)
2138 {
2139 case DImode:
2140 return gen_storewb_pairdi_di (base, base, reg, reg2,
2141 GEN_INT (-adjustment),
2142 GEN_INT (UNITS_PER_WORD - adjustment));
2143 case DFmode:
2144 return gen_storewb_pairdf_di (base, base, reg, reg2,
2145 GEN_INT (-adjustment),
2146 GEN_INT (UNITS_PER_WORD - adjustment));
2147 default:
2148 gcc_unreachable ();
2149 }
2150 }
2151
2152 static void
2153 aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
2154 unsigned regno2, HOST_WIDE_INT adjustment)
2155 {
2156 rtx_insn *insn;
2157 rtx reg1 = gen_rtx_REG (mode, regno1);
2158 rtx reg2 = gen_rtx_REG (mode, regno2);
2159
2160 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
2161 reg2, adjustment));
2162 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2163 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2164 RTX_FRAME_RELATED_P (insn) = 1;
2165 }
2166
2167 static rtx
2168 aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
2169 HOST_WIDE_INT adjustment)
2170 {
2171 switch (mode)
2172 {
2173 case DImode:
2174 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2175 GEN_INT (UNITS_PER_WORD));
2176 case DFmode:
2177 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2178 GEN_INT (UNITS_PER_WORD));
2179 default:
2180 gcc_unreachable ();
2181 }
2182 }
2183
2184 static rtx
2185 aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2186 rtx reg2)
2187 {
2188 switch (mode)
2189 {
2190 case DImode:
2191 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2192
2193 case DFmode:
2194 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2195
2196 default:
2197 gcc_unreachable ();
2198 }
2199 }
2200
2201 static rtx
2202 aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2203 rtx mem2)
2204 {
2205 switch (mode)
2206 {
2207 case DImode:
2208 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2209
2210 case DFmode:
2211 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2212
2213 default:
2214 gcc_unreachable ();
2215 }
2216 }
2217
2218
2219 static void
2220 aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
2221 unsigned start, unsigned limit, bool skip_wb)
2222 {
2223 rtx_insn *insn;
2224 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2225 ? gen_frame_mem : gen_rtx_MEM);
2226 unsigned regno;
2227 unsigned regno2;
2228
2229 for (regno = aarch64_next_callee_save (start, limit);
2230 regno <= limit;
2231 regno = aarch64_next_callee_save (regno + 1, limit))
2232 {
2233 rtx reg, mem;
2234 HOST_WIDE_INT offset;
2235
2236 if (skip_wb
2237 && (regno == cfun->machine->frame.wb_candidate1
2238 || regno == cfun->machine->frame.wb_candidate2))
2239 continue;
2240
2241 reg = gen_rtx_REG (mode, regno);
2242 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2243 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2244 offset));
2245
2246 regno2 = aarch64_next_callee_save (regno + 1, limit);
2247
2248 if (regno2 <= limit
2249 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2250 == cfun->machine->frame.reg_offset[regno2]))
2251
2252 {
2253 rtx reg2 = gen_rtx_REG (mode, regno2);
2254 rtx mem2;
2255
2256 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2257 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2258 offset));
2259 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2260 reg2));
2261
2262 /* The first part of a frame-related parallel insn is
2263 always assumed to be relevant to the frame
2264 calculations; subsequent parts, are only
2265 frame-related if explicitly marked. */
2266 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2267 regno = regno2;
2268 }
2269 else
2270 insn = emit_move_insn (mem, reg);
2271
2272 RTX_FRAME_RELATED_P (insn) = 1;
2273 }
2274 }
2275
2276 static void
2277 aarch64_restore_callee_saves (machine_mode mode,
2278 HOST_WIDE_INT start_offset, unsigned start,
2279 unsigned limit, bool skip_wb, rtx *cfi_ops)
2280 {
2281 rtx base_rtx = stack_pointer_rtx;
2282 rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
2283 ? gen_frame_mem : gen_rtx_MEM);
2284 unsigned regno;
2285 unsigned regno2;
2286 HOST_WIDE_INT offset;
2287
2288 for (regno = aarch64_next_callee_save (start, limit);
2289 regno <= limit;
2290 regno = aarch64_next_callee_save (regno + 1, limit))
2291 {
2292 rtx reg, mem;
2293
2294 if (skip_wb
2295 && (regno == cfun->machine->frame.wb_candidate1
2296 || regno == cfun->machine->frame.wb_candidate2))
2297 continue;
2298
2299 reg = gen_rtx_REG (mode, regno);
2300 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2301 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2302
2303 regno2 = aarch64_next_callee_save (regno + 1, limit);
2304
2305 if (regno2 <= limit
2306 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2307 == cfun->machine->frame.reg_offset[regno2]))
2308 {
2309 rtx reg2 = gen_rtx_REG (mode, regno2);
2310 rtx mem2;
2311
2312 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2313 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2314 emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
2315
2316 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
2317 regno = regno2;
2318 }
2319 else
2320 emit_move_insn (reg, mem);
2321 *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
2322 }
2323 }
2324
2325 /* AArch64 stack frames generated by this compiler look like:
2326
2327 +-------------------------------+
2328 | |
2329 | incoming stack arguments |
2330 | |
2331 +-------------------------------+
2332 | | <-- incoming stack pointer (aligned)
2333 | callee-allocated save area |
2334 | for register varargs |
2335 | |
2336 +-------------------------------+
2337 | local variables | <-- frame_pointer_rtx
2338 | |
2339 +-------------------------------+
2340 | padding0 | \
2341 +-------------------------------+ |
2342 | callee-saved registers | | frame.saved_regs_size
2343 +-------------------------------+ |
2344 | LR' | |
2345 +-------------------------------+ |
2346 | FP' | / <- hard_frame_pointer_rtx (aligned)
2347 +-------------------------------+
2348 | dynamic allocation |
2349 +-------------------------------+
2350 | padding |
2351 +-------------------------------+
2352 | outgoing stack arguments | <-- arg_pointer
2353 | |
2354 +-------------------------------+
2355 | | <-- stack_pointer_rtx (aligned)
2356
2357 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2358 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2359 unchanged. */
2360
2361 /* Generate the prologue instructions for entry into a function.
2362 Establish the stack frame by decreasing the stack pointer with a
2363 properly calculated size and, if necessary, create a frame record
2364 filled with the values of LR and previous frame pointer. The
2365 current FP is also set up if it is in use. */
2366
2367 void
2368 aarch64_expand_prologue (void)
2369 {
2370 /* sub sp, sp, #<frame_size>
2371 stp {fp, lr}, [sp, #<frame_size> - 16]
2372 add fp, sp, #<frame_size> - hardfp_offset
2373 stp {cs_reg}, [fp, #-16] etc.
2374
2375 sub sp, sp, <final_adjustment_if_any>
2376 */
2377 HOST_WIDE_INT frame_size, offset;
2378 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2379 HOST_WIDE_INT hard_fp_offset;
2380 rtx_insn *insn;
2381
2382 aarch64_layout_frame ();
2383
2384 offset = frame_size = cfun->machine->frame.frame_size;
2385 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2386 fp_offset = frame_size - hard_fp_offset;
2387
2388 if (flag_stack_usage_info)
2389 current_function_static_stack_size = frame_size;
2390
2391 /* Store pairs and load pairs have a range only -512 to 504. */
2392 if (offset >= 512)
2393 {
2394 /* When the frame has a large size, an initial decrease is done on
2395 the stack pointer to jump over the callee-allocated save area for
2396 register varargs, the local variable area and/or the callee-saved
2397 register area. This will allow the pre-index write-back
2398 store pair instructions to be used for setting up the stack frame
2399 efficiently. */
2400 offset = hard_fp_offset;
2401 if (offset >= 512)
2402 offset = cfun->machine->frame.saved_regs_size;
2403
2404 frame_size -= (offset + crtl->outgoing_args_size);
2405 fp_offset = 0;
2406
2407 if (frame_size >= 0x1000000)
2408 {
2409 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2410 emit_move_insn (op0, GEN_INT (-frame_size));
2411 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2412
2413 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2414 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
2415 plus_constant (Pmode, stack_pointer_rtx,
2416 -frame_size)));
2417 RTX_FRAME_RELATED_P (insn) = 1;
2418 }
2419 else if (frame_size > 0)
2420 {
2421 int hi_ofs = frame_size & 0xfff000;
2422 int lo_ofs = frame_size & 0x000fff;
2423
2424 if (hi_ofs)
2425 {
2426 insn = emit_insn (gen_add2_insn
2427 (stack_pointer_rtx, GEN_INT (-hi_ofs)));
2428 RTX_FRAME_RELATED_P (insn) = 1;
2429 }
2430 if (lo_ofs)
2431 {
2432 insn = emit_insn (gen_add2_insn
2433 (stack_pointer_rtx, GEN_INT (-lo_ofs)));
2434 RTX_FRAME_RELATED_P (insn) = 1;
2435 }
2436 }
2437 }
2438 else
2439 frame_size = -1;
2440
2441 if (offset > 0)
2442 {
2443 bool skip_wb = false;
2444
2445 if (frame_pointer_needed)
2446 {
2447 skip_wb = true;
2448
2449 if (fp_offset)
2450 {
2451 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2452 GEN_INT (-offset)));
2453 RTX_FRAME_RELATED_P (insn) = 1;
2454
2455 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2456 R30_REGNUM, false);
2457 }
2458 else
2459 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2460
2461 /* Set up frame pointer to point to the location of the
2462 previous frame pointer on the stack. */
2463 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2464 stack_pointer_rtx,
2465 GEN_INT (fp_offset)));
2466 RTX_FRAME_RELATED_P (insn) = 1;
2467 emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
2468 }
2469 else
2470 {
2471 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2472 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2473
2474 if (fp_offset
2475 || reg1 == FIRST_PSEUDO_REGISTER
2476 || (reg2 == FIRST_PSEUDO_REGISTER
2477 && offset >= 256))
2478 {
2479 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2480 GEN_INT (-offset)));
2481 RTX_FRAME_RELATED_P (insn) = 1;
2482 }
2483 else
2484 {
2485 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2486
2487 skip_wb = true;
2488
2489 if (reg2 == FIRST_PSEUDO_REGISTER)
2490 aarch64_pushwb_single_reg (mode1, reg1, offset);
2491 else
2492 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2493 }
2494 }
2495
2496 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2497 skip_wb);
2498 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2499 skip_wb);
2500 }
2501
2502 /* when offset >= 512,
2503 sub sp, sp, #<outgoing_args_size> */
2504 if (frame_size > -1)
2505 {
2506 if (crtl->outgoing_args_size > 0)
2507 {
2508 insn = emit_insn (gen_add2_insn
2509 (stack_pointer_rtx,
2510 GEN_INT (- crtl->outgoing_args_size)));
2511 RTX_FRAME_RELATED_P (insn) = 1;
2512 }
2513 }
2514 }
2515
2516 /* Return TRUE if we can use a simple_return insn.
2517
2518 This function checks whether the callee saved stack is empty, which
2519 means no restore actions are need. The pro_and_epilogue will use
2520 this to check whether shrink-wrapping opt is feasible. */
2521
2522 bool
2523 aarch64_use_return_insn_p (void)
2524 {
2525 if (!reload_completed)
2526 return false;
2527
2528 if (crtl->profile)
2529 return false;
2530
2531 aarch64_layout_frame ();
2532
2533 return cfun->machine->frame.frame_size == 0;
2534 }
2535
2536 /* Generate the epilogue instructions for returning from a function. */
2537 void
2538 aarch64_expand_epilogue (bool for_sibcall)
2539 {
2540 HOST_WIDE_INT frame_size, offset;
2541 HOST_WIDE_INT fp_offset;
2542 HOST_WIDE_INT hard_fp_offset;
2543 rtx_insn *insn;
2544 /* We need to add memory barrier to prevent read from deallocated stack. */
2545 bool need_barrier_p = (get_frame_size () != 0
2546 || cfun->machine->frame.saved_varargs_size);
2547
2548 aarch64_layout_frame ();
2549
2550 offset = frame_size = cfun->machine->frame.frame_size;
2551 hard_fp_offset = cfun->machine->frame.hard_fp_offset;
2552 fp_offset = frame_size - hard_fp_offset;
2553
2554 /* Store pairs and load pairs have a range only -512 to 504. */
2555 if (offset >= 512)
2556 {
2557 offset = hard_fp_offset;
2558 if (offset >= 512)
2559 offset = cfun->machine->frame.saved_regs_size;
2560
2561 frame_size -= (offset + crtl->outgoing_args_size);
2562 fp_offset = 0;
2563 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2564 {
2565 insn = emit_insn (gen_add2_insn
2566 (stack_pointer_rtx,
2567 GEN_INT (crtl->outgoing_args_size)));
2568 RTX_FRAME_RELATED_P (insn) = 1;
2569 }
2570 }
2571 else
2572 frame_size = -1;
2573
2574 /* If there were outgoing arguments or we've done dynamic stack
2575 allocation, then restore the stack pointer from the frame
2576 pointer. This is at most one insn and more efficient than using
2577 GCC's internal mechanism. */
2578 if (frame_pointer_needed
2579 && (crtl->outgoing_args_size || cfun->calls_alloca))
2580 {
2581 if (cfun->calls_alloca)
2582 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2583
2584 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2585 hard_frame_pointer_rtx,
2586 GEN_INT (0)));
2587 offset = offset - fp_offset;
2588 }
2589
2590 if (offset > 0)
2591 {
2592 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2593 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2594 bool skip_wb = true;
2595 rtx cfi_ops = NULL;
2596
2597 if (frame_pointer_needed)
2598 fp_offset = 0;
2599 else if (fp_offset
2600 || reg1 == FIRST_PSEUDO_REGISTER
2601 || (reg2 == FIRST_PSEUDO_REGISTER
2602 && offset >= 256))
2603 skip_wb = false;
2604
2605 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2606 skip_wb, &cfi_ops);
2607 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2608 skip_wb, &cfi_ops);
2609
2610 if (need_barrier_p)
2611 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2612
2613 if (skip_wb)
2614 {
2615 machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2616 rtx rreg1 = gen_rtx_REG (mode1, reg1);
2617
2618 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
2619 if (reg2 == FIRST_PSEUDO_REGISTER)
2620 {
2621 rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
2622 mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
2623 mem = gen_rtx_MEM (mode1, mem);
2624 insn = emit_move_insn (rreg1, mem);
2625 }
2626 else
2627 {
2628 rtx rreg2 = gen_rtx_REG (mode1, reg2);
2629
2630 cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
2631 insn = emit_insn (aarch64_gen_loadwb_pair
2632 (mode1, stack_pointer_rtx, rreg1,
2633 rreg2, offset));
2634 }
2635 }
2636 else
2637 {
2638 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2639 GEN_INT (offset)));
2640 }
2641
2642 /* Reset the CFA to be SP + FRAME_SIZE. */
2643 rtx new_cfa = stack_pointer_rtx;
2644 if (frame_size > 0)
2645 new_cfa = plus_constant (Pmode, new_cfa, frame_size);
2646 cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
2647 REG_NOTES (insn) = cfi_ops;
2648 RTX_FRAME_RELATED_P (insn) = 1;
2649 }
2650
2651 if (frame_size > 0)
2652 {
2653 if (need_barrier_p)
2654 emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
2655
2656 if (frame_size >= 0x1000000)
2657 {
2658 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2659 emit_move_insn (op0, GEN_INT (frame_size));
2660 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2661 }
2662 else
2663 {
2664 int hi_ofs = frame_size & 0xfff000;
2665 int lo_ofs = frame_size & 0x000fff;
2666
2667 if (hi_ofs && lo_ofs)
2668 {
2669 insn = emit_insn (gen_add2_insn
2670 (stack_pointer_rtx, GEN_INT (hi_ofs)));
2671 RTX_FRAME_RELATED_P (insn) = 1;
2672 frame_size = lo_ofs;
2673 }
2674 insn = emit_insn (gen_add2_insn
2675 (stack_pointer_rtx, GEN_INT (frame_size)));
2676 }
2677
2678 /* Reset the CFA to be SP + 0. */
2679 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
2680 RTX_FRAME_RELATED_P (insn) = 1;
2681 }
2682
2683 /* Stack adjustment for exception handler. */
2684 if (crtl->calls_eh_return)
2685 {
2686 /* We need to unwind the stack by the offset computed by
2687 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2688 to be SP; letting the CFA move during this adjustment
2689 is just as correct as retaining the CFA from the body
2690 of the function. Therefore, do nothing special. */
2691 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2692 }
2693
2694 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2695 if (!for_sibcall)
2696 emit_jump_insn (ret_rtx);
2697 }
2698
2699 /* Return the place to copy the exception unwinding return address to.
2700 This will probably be a stack slot, but could (in theory be the
2701 return register). */
2702 rtx
2703 aarch64_final_eh_return_addr (void)
2704 {
2705 HOST_WIDE_INT fp_offset;
2706
2707 aarch64_layout_frame ();
2708
2709 fp_offset = cfun->machine->frame.frame_size
2710 - cfun->machine->frame.hard_fp_offset;
2711
2712 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2713 return gen_rtx_REG (DImode, LR_REGNUM);
2714
2715 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2716 result in a store to save LR introduced by builtin_eh_return () being
2717 incorrectly deleted because the alias is not detected.
2718 So in the calculation of the address to copy the exception unwinding
2719 return address to, we note 2 cases.
2720 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2721 we return a SP-relative location since all the addresses are SP-relative
2722 in this case. This prevents the store from being optimized away.
2723 If the fp_offset is not 0, then the addresses will be FP-relative and
2724 therefore we return a FP-relative location. */
2725
2726 if (frame_pointer_needed)
2727 {
2728 if (fp_offset)
2729 return gen_frame_mem (DImode,
2730 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2731 else
2732 return gen_frame_mem (DImode,
2733 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2734 }
2735
2736 /* If FP is not needed, we calculate the location of LR, which would be
2737 at the top of the saved registers block. */
2738
2739 return gen_frame_mem (DImode,
2740 plus_constant (Pmode,
2741 stack_pointer_rtx,
2742 fp_offset
2743 + cfun->machine->frame.saved_regs_size
2744 - 2 * UNITS_PER_WORD));
2745 }
2746
2747 /* Possibly output code to build up a constant in a register. For
2748 the benefit of the costs infrastructure, returns the number of
2749 instructions which would be emitted. GENERATE inhibits or
2750 enables code generation. */
2751
2752 static int
2753 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2754 {
2755 int insns = 0;
2756
2757 if (aarch64_bitmask_imm (val, DImode))
2758 {
2759 if (generate)
2760 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2761 insns = 1;
2762 }
2763 else
2764 {
2765 int i;
2766 int ncount = 0;
2767 int zcount = 0;
2768 HOST_WIDE_INT valp = val >> 16;
2769 HOST_WIDE_INT valm;
2770 HOST_WIDE_INT tval;
2771
2772 for (i = 16; i < 64; i += 16)
2773 {
2774 valm = (valp & 0xffff);
2775
2776 if (valm != 0)
2777 ++ zcount;
2778
2779 if (valm != 0xffff)
2780 ++ ncount;
2781
2782 valp >>= 16;
2783 }
2784
2785 /* zcount contains the number of additional MOVK instructions
2786 required if the constant is built up with an initial MOVZ instruction,
2787 while ncount is the number of MOVK instructions required if starting
2788 with a MOVN instruction. Choose the sequence that yields the fewest
2789 number of instructions, preferring MOVZ instructions when they are both
2790 the same. */
2791 if (ncount < zcount)
2792 {
2793 if (generate)
2794 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2795 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2796 tval = 0xffff;
2797 insns++;
2798 }
2799 else
2800 {
2801 if (generate)
2802 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2803 GEN_INT (val & 0xffff));
2804 tval = 0;
2805 insns++;
2806 }
2807
2808 val >>= 16;
2809
2810 for (i = 16; i < 64; i += 16)
2811 {
2812 if ((val & 0xffff) != tval)
2813 {
2814 if (generate)
2815 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2816 GEN_INT (i),
2817 GEN_INT (val & 0xffff)));
2818 insns++;
2819 }
2820 val >>= 16;
2821 }
2822 }
2823 return insns;
2824 }
2825
2826 static void
2827 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2828 {
2829 HOST_WIDE_INT mdelta = delta;
2830 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2831 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2832
2833 if (mdelta < 0)
2834 mdelta = -mdelta;
2835
2836 if (mdelta >= 4096 * 4096)
2837 {
2838 (void) aarch64_build_constant (scratchreg, delta, true);
2839 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2840 }
2841 else if (mdelta > 0)
2842 {
2843 if (mdelta >= 4096)
2844 {
2845 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2846 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2847 if (delta < 0)
2848 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2849 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2850 else
2851 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2852 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2853 }
2854 if (mdelta % 4096 != 0)
2855 {
2856 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2857 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2858 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2859 }
2860 }
2861 }
2862
2863 /* Output code to add DELTA to the first argument, and then jump
2864 to FUNCTION. Used for C++ multiple inheritance. */
2865 static void
2866 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2867 HOST_WIDE_INT delta,
2868 HOST_WIDE_INT vcall_offset,
2869 tree function)
2870 {
2871 /* The this pointer is always in x0. Note that this differs from
2872 Arm where the this pointer maybe bumped to r1 if r0 is required
2873 to return a pointer to an aggregate. On AArch64 a result value
2874 pointer will be in x8. */
2875 int this_regno = R0_REGNUM;
2876 rtx this_rtx, temp0, temp1, addr, funexp;
2877 rtx_insn *insn;
2878
2879 reload_completed = 1;
2880 emit_note (NOTE_INSN_PROLOGUE_END);
2881
2882 if (vcall_offset == 0)
2883 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2884 else
2885 {
2886 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2887
2888 this_rtx = gen_rtx_REG (Pmode, this_regno);
2889 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2890 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2891
2892 addr = this_rtx;
2893 if (delta != 0)
2894 {
2895 if (delta >= -256 && delta < 256)
2896 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2897 plus_constant (Pmode, this_rtx, delta));
2898 else
2899 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2900 }
2901
2902 if (Pmode == ptr_mode)
2903 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2904 else
2905 aarch64_emit_move (temp0,
2906 gen_rtx_ZERO_EXTEND (Pmode,
2907 gen_rtx_MEM (ptr_mode, addr)));
2908
2909 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2910 addr = plus_constant (Pmode, temp0, vcall_offset);
2911 else
2912 {
2913 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2914 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2915 }
2916
2917 if (Pmode == ptr_mode)
2918 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2919 else
2920 aarch64_emit_move (temp1,
2921 gen_rtx_SIGN_EXTEND (Pmode,
2922 gen_rtx_MEM (ptr_mode, addr)));
2923
2924 emit_insn (gen_add2_insn (this_rtx, temp1));
2925 }
2926
2927 /* Generate a tail call to the target function. */
2928 if (!TREE_USED (function))
2929 {
2930 assemble_external (function);
2931 TREE_USED (function) = 1;
2932 }
2933 funexp = XEXP (DECL_RTL (function), 0);
2934 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2935 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2936 SIBLING_CALL_P (insn) = 1;
2937
2938 insn = get_insns ();
2939 shorten_branches (insn);
2940 final_start_function (insn, file, 1);
2941 final (insn, file, 1);
2942 final_end_function ();
2943
2944 /* Stop pretending to be a post-reload pass. */
2945 reload_completed = 0;
2946 }
2947
2948 static bool
2949 aarch64_tls_referenced_p (rtx x)
2950 {
2951 if (!TARGET_HAVE_TLS)
2952 return false;
2953 subrtx_iterator::array_type array;
2954 FOR_EACH_SUBRTX (iter, array, x, ALL)
2955 {
2956 const_rtx x = *iter;
2957 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
2958 return true;
2959 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2960 TLS offsets, not real symbol references. */
2961 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
2962 iter.skip_subrtxes ();
2963 }
2964 return false;
2965 }
2966
2967
2968 static int
2969 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2970 {
2971 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2972 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2973
2974 if (*imm1 < *imm2)
2975 return -1;
2976 if (*imm1 > *imm2)
2977 return +1;
2978 return 0;
2979 }
2980
2981
2982 static void
2983 aarch64_build_bitmask_table (void)
2984 {
2985 unsigned HOST_WIDE_INT mask, imm;
2986 unsigned int log_e, e, s, r;
2987 unsigned int nimms = 0;
2988
2989 for (log_e = 1; log_e <= 6; log_e++)
2990 {
2991 e = 1 << log_e;
2992 if (e == 64)
2993 mask = ~(HOST_WIDE_INT) 0;
2994 else
2995 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2996 for (s = 1; s < e; s++)
2997 {
2998 for (r = 0; r < e; r++)
2999 {
3000 /* set s consecutive bits to 1 (s < 64) */
3001 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
3002 /* rotate right by r */
3003 if (r != 0)
3004 imm = ((imm >> r) | (imm << (e - r))) & mask;
3005 /* replicate the constant depending on SIMD size */
3006 switch (log_e) {
3007 case 1: imm |= (imm << 2);
3008 case 2: imm |= (imm << 4);
3009 case 3: imm |= (imm << 8);
3010 case 4: imm |= (imm << 16);
3011 case 5: imm |= (imm << 32);
3012 case 6:
3013 break;
3014 default:
3015 gcc_unreachable ();
3016 }
3017 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
3018 aarch64_bitmasks[nimms++] = imm;
3019 }
3020 }
3021 }
3022
3023 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
3024 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
3025 aarch64_bitmasks_cmp);
3026 }
3027
3028
3029 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3030 a left shift of 0 or 12 bits. */
3031 bool
3032 aarch64_uimm12_shift (HOST_WIDE_INT val)
3033 {
3034 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
3035 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
3036 );
3037 }
3038
3039
3040 /* Return true if val is an immediate that can be loaded into a
3041 register by a MOVZ instruction. */
3042 static bool
3043 aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
3044 {
3045 if (GET_MODE_SIZE (mode) > 4)
3046 {
3047 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
3048 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
3049 return 1;
3050 }
3051 else
3052 {
3053 /* Ignore sign extension. */
3054 val &= (HOST_WIDE_INT) 0xffffffff;
3055 }
3056 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
3057 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
3058 }
3059
3060
3061 /* Return true if val is a valid bitmask immediate. */
3062 bool
3063 aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
3064 {
3065 if (GET_MODE_SIZE (mode) < 8)
3066 {
3067 /* Replicate bit pattern. */
3068 val &= (HOST_WIDE_INT) 0xffffffff;
3069 val |= val << 32;
3070 }
3071 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
3072 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
3073 }
3074
3075
3076 /* Return true if val is an immediate that can be loaded into a
3077 register in a single instruction. */
3078 bool
3079 aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
3080 {
3081 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
3082 return 1;
3083 return aarch64_bitmask_imm (val, mode);
3084 }
3085
3086 static bool
3087 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
3088 {
3089 rtx base, offset;
3090
3091 if (GET_CODE (x) == HIGH)
3092 return true;
3093
3094 split_const (x, &base, &offset);
3095 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
3096 {
3097 if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
3098 != SYMBOL_FORCE_TO_MEM)
3099 return true;
3100 else
3101 /* Avoid generating a 64-bit relocation in ILP32; leave
3102 to aarch64_expand_mov_immediate to handle it properly. */
3103 return mode != ptr_mode;
3104 }
3105
3106 return aarch64_tls_referenced_p (x);
3107 }
3108
3109 /* Return true if register REGNO is a valid index register.
3110 STRICT_P is true if REG_OK_STRICT is in effect. */
3111
3112 bool
3113 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
3114 {
3115 if (!HARD_REGISTER_NUM_P (regno))
3116 {
3117 if (!strict_p)
3118 return true;
3119
3120 if (!reg_renumber)
3121 return false;
3122
3123 regno = reg_renumber[regno];
3124 }
3125 return GP_REGNUM_P (regno);
3126 }
3127
3128 /* Return true if register REGNO is a valid base register for mode MODE.
3129 STRICT_P is true if REG_OK_STRICT is in effect. */
3130
3131 bool
3132 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3133 {
3134 if (!HARD_REGISTER_NUM_P (regno))
3135 {
3136 if (!strict_p)
3137 return true;
3138
3139 if (!reg_renumber)
3140 return false;
3141
3142 regno = reg_renumber[regno];
3143 }
3144
3145 /* The fake registers will be eliminated to either the stack or
3146 hard frame pointer, both of which are usually valid base registers.
3147 Reload deals with the cases where the eliminated form isn't valid. */
3148 return (GP_REGNUM_P (regno)
3149 || regno == SP_REGNUM
3150 || regno == FRAME_POINTER_REGNUM
3151 || regno == ARG_POINTER_REGNUM);
3152 }
3153
3154 /* Return true if X is a valid base register for mode MODE.
3155 STRICT_P is true if REG_OK_STRICT is in effect. */
3156
3157 static bool
3158 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3159 {
3160 if (!strict_p && GET_CODE (x) == SUBREG)
3161 x = SUBREG_REG (x);
3162
3163 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3164 }
3165
3166 /* Return true if address offset is a valid index. If it is, fill in INFO
3167 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3168
3169 static bool
3170 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3171 machine_mode mode, bool strict_p)
3172 {
3173 enum aarch64_address_type type;
3174 rtx index;
3175 int shift;
3176
3177 /* (reg:P) */
3178 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3179 && GET_MODE (x) == Pmode)
3180 {
3181 type = ADDRESS_REG_REG;
3182 index = x;
3183 shift = 0;
3184 }
3185 /* (sign_extend:DI (reg:SI)) */
3186 else if ((GET_CODE (x) == SIGN_EXTEND
3187 || GET_CODE (x) == ZERO_EXTEND)
3188 && GET_MODE (x) == DImode
3189 && GET_MODE (XEXP (x, 0)) == SImode)
3190 {
3191 type = (GET_CODE (x) == SIGN_EXTEND)
3192 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3193 index = XEXP (x, 0);
3194 shift = 0;
3195 }
3196 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3197 else if (GET_CODE (x) == MULT
3198 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3199 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3200 && GET_MODE (XEXP (x, 0)) == DImode
3201 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3202 && CONST_INT_P (XEXP (x, 1)))
3203 {
3204 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3205 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3206 index = XEXP (XEXP (x, 0), 0);
3207 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3208 }
3209 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3210 else if (GET_CODE (x) == ASHIFT
3211 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3212 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3213 && GET_MODE (XEXP (x, 0)) == DImode
3214 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3215 && CONST_INT_P (XEXP (x, 1)))
3216 {
3217 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3218 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3219 index = XEXP (XEXP (x, 0), 0);
3220 shift = INTVAL (XEXP (x, 1));
3221 }
3222 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3223 else if ((GET_CODE (x) == SIGN_EXTRACT
3224 || GET_CODE (x) == ZERO_EXTRACT)
3225 && GET_MODE (x) == DImode
3226 && GET_CODE (XEXP (x, 0)) == MULT
3227 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3228 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3229 {
3230 type = (GET_CODE (x) == SIGN_EXTRACT)
3231 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3232 index = XEXP (XEXP (x, 0), 0);
3233 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3234 if (INTVAL (XEXP (x, 1)) != 32 + shift
3235 || INTVAL (XEXP (x, 2)) != 0)
3236 shift = -1;
3237 }
3238 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3239 (const_int 0xffffffff<<shift)) */
3240 else if (GET_CODE (x) == AND
3241 && GET_MODE (x) == DImode
3242 && GET_CODE (XEXP (x, 0)) == MULT
3243 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3244 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3245 && CONST_INT_P (XEXP (x, 1)))
3246 {
3247 type = ADDRESS_REG_UXTW;
3248 index = XEXP (XEXP (x, 0), 0);
3249 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3250 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3251 shift = -1;
3252 }
3253 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3254 else if ((GET_CODE (x) == SIGN_EXTRACT
3255 || GET_CODE (x) == ZERO_EXTRACT)
3256 && GET_MODE (x) == DImode
3257 && GET_CODE (XEXP (x, 0)) == ASHIFT
3258 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3259 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3260 {
3261 type = (GET_CODE (x) == SIGN_EXTRACT)
3262 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3263 index = XEXP (XEXP (x, 0), 0);
3264 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3265 if (INTVAL (XEXP (x, 1)) != 32 + shift
3266 || INTVAL (XEXP (x, 2)) != 0)
3267 shift = -1;
3268 }
3269 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3270 (const_int 0xffffffff<<shift)) */
3271 else if (GET_CODE (x) == AND
3272 && GET_MODE (x) == DImode
3273 && GET_CODE (XEXP (x, 0)) == ASHIFT
3274 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3275 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3276 && CONST_INT_P (XEXP (x, 1)))
3277 {
3278 type = ADDRESS_REG_UXTW;
3279 index = XEXP (XEXP (x, 0), 0);
3280 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3281 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3282 shift = -1;
3283 }
3284 /* (mult:P (reg:P) (const_int scale)) */
3285 else if (GET_CODE (x) == MULT
3286 && GET_MODE (x) == Pmode
3287 && GET_MODE (XEXP (x, 0)) == Pmode
3288 && CONST_INT_P (XEXP (x, 1)))
3289 {
3290 type = ADDRESS_REG_REG;
3291 index = XEXP (x, 0);
3292 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3293 }
3294 /* (ashift:P (reg:P) (const_int shift)) */
3295 else if (GET_CODE (x) == ASHIFT
3296 && GET_MODE (x) == Pmode
3297 && GET_MODE (XEXP (x, 0)) == Pmode
3298 && CONST_INT_P (XEXP (x, 1)))
3299 {
3300 type = ADDRESS_REG_REG;
3301 index = XEXP (x, 0);
3302 shift = INTVAL (XEXP (x, 1));
3303 }
3304 else
3305 return false;
3306
3307 if (GET_CODE (index) == SUBREG)
3308 index = SUBREG_REG (index);
3309
3310 if ((shift == 0 ||
3311 (shift > 0 && shift <= 3
3312 && (1 << shift) == GET_MODE_SIZE (mode)))
3313 && REG_P (index)
3314 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3315 {
3316 info->type = type;
3317 info->offset = index;
3318 info->shift = shift;
3319 return true;
3320 }
3321
3322 return false;
3323 }
3324
3325 bool
3326 aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3327 {
3328 return (offset >= -64 * GET_MODE_SIZE (mode)
3329 && offset < 64 * GET_MODE_SIZE (mode)
3330 && offset % GET_MODE_SIZE (mode) == 0);
3331 }
3332
3333 static inline bool
3334 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
3335 HOST_WIDE_INT offset)
3336 {
3337 return offset >= -256 && offset < 256;
3338 }
3339
3340 static inline bool
3341 offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
3342 {
3343 return (offset >= 0
3344 && offset < 4096 * GET_MODE_SIZE (mode)
3345 && offset % GET_MODE_SIZE (mode) == 0);
3346 }
3347
3348 /* Return true if X is a valid address for machine mode MODE. If it is,
3349 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3350 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3351
3352 static bool
3353 aarch64_classify_address (struct aarch64_address_info *info,
3354 rtx x, machine_mode mode,
3355 RTX_CODE outer_code, bool strict_p)
3356 {
3357 enum rtx_code code = GET_CODE (x);
3358 rtx op0, op1;
3359 bool allow_reg_index_p =
3360 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3361 || aarch64_vector_mode_supported_p (mode));
3362 /* Don't support anything other than POST_INC or REG addressing for
3363 AdvSIMD. */
3364 if (aarch64_vect_struct_mode_p (mode)
3365 && (code != POST_INC && code != REG))
3366 return false;
3367
3368 switch (code)
3369 {
3370 case REG:
3371 case SUBREG:
3372 info->type = ADDRESS_REG_IMM;
3373 info->base = x;
3374 info->offset = const0_rtx;
3375 return aarch64_base_register_rtx_p (x, strict_p);
3376
3377 case PLUS:
3378 op0 = XEXP (x, 0);
3379 op1 = XEXP (x, 1);
3380
3381 if (! strict_p
3382 && REG_P (op0)
3383 && (op0 == virtual_stack_vars_rtx
3384 || op0 == frame_pointer_rtx
3385 || op0 == arg_pointer_rtx)
3386 && CONST_INT_P (op1))
3387 {
3388 info->type = ADDRESS_REG_IMM;
3389 info->base = op0;
3390 info->offset = op1;
3391
3392 return true;
3393 }
3394
3395 if (GET_MODE_SIZE (mode) != 0
3396 && CONST_INT_P (op1)
3397 && aarch64_base_register_rtx_p (op0, strict_p))
3398 {
3399 HOST_WIDE_INT offset = INTVAL (op1);
3400
3401 info->type = ADDRESS_REG_IMM;
3402 info->base = op0;
3403 info->offset = op1;
3404
3405 /* TImode and TFmode values are allowed in both pairs of X
3406 registers and individual Q registers. The available
3407 address modes are:
3408 X,X: 7-bit signed scaled offset
3409 Q: 9-bit signed offset
3410 We conservatively require an offset representable in either mode.
3411 */
3412 if (mode == TImode || mode == TFmode)
3413 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3414 && offset_9bit_signed_unscaled_p (mode, offset));
3415
3416 if (outer_code == PARALLEL)
3417 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3418 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3419 else
3420 return (offset_9bit_signed_unscaled_p (mode, offset)
3421 || offset_12bit_unsigned_scaled_p (mode, offset));
3422 }
3423
3424 if (allow_reg_index_p)
3425 {
3426 /* Look for base + (scaled/extended) index register. */
3427 if (aarch64_base_register_rtx_p (op0, strict_p)
3428 && aarch64_classify_index (info, op1, mode, strict_p))
3429 {
3430 info->base = op0;
3431 return true;
3432 }
3433 if (aarch64_base_register_rtx_p (op1, strict_p)
3434 && aarch64_classify_index (info, op0, mode, strict_p))
3435 {
3436 info->base = op1;
3437 return true;
3438 }
3439 }
3440
3441 return false;
3442
3443 case POST_INC:
3444 case POST_DEC:
3445 case PRE_INC:
3446 case PRE_DEC:
3447 info->type = ADDRESS_REG_WB;
3448 info->base = XEXP (x, 0);
3449 info->offset = NULL_RTX;
3450 return aarch64_base_register_rtx_p (info->base, strict_p);
3451
3452 case POST_MODIFY:
3453 case PRE_MODIFY:
3454 info->type = ADDRESS_REG_WB;
3455 info->base = XEXP (x, 0);
3456 if (GET_CODE (XEXP (x, 1)) == PLUS
3457 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3458 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3459 && aarch64_base_register_rtx_p (info->base, strict_p))
3460 {
3461 HOST_WIDE_INT offset;
3462 info->offset = XEXP (XEXP (x, 1), 1);
3463 offset = INTVAL (info->offset);
3464
3465 /* TImode and TFmode values are allowed in both pairs of X
3466 registers and individual Q registers. The available
3467 address modes are:
3468 X,X: 7-bit signed scaled offset
3469 Q: 9-bit signed offset
3470 We conservatively require an offset representable in either mode.
3471 */
3472 if (mode == TImode || mode == TFmode)
3473 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3474 && offset_9bit_signed_unscaled_p (mode, offset));
3475
3476 if (outer_code == PARALLEL)
3477 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3478 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3479 else
3480 return offset_9bit_signed_unscaled_p (mode, offset);
3481 }
3482 return false;
3483
3484 case CONST:
3485 case SYMBOL_REF:
3486 case LABEL_REF:
3487 /* load literal: pc-relative constant pool entry. Only supported
3488 for SI mode or larger. */
3489 info->type = ADDRESS_SYMBOLIC;
3490 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3491 {
3492 rtx sym, addend;
3493
3494 split_const (x, &sym, &addend);
3495 return (GET_CODE (sym) == LABEL_REF
3496 || (GET_CODE (sym) == SYMBOL_REF
3497 && CONSTANT_POOL_ADDRESS_P (sym)));
3498 }
3499 return false;
3500
3501 case LO_SUM:
3502 info->type = ADDRESS_LO_SUM;
3503 info->base = XEXP (x, 0);
3504 info->offset = XEXP (x, 1);
3505 if (allow_reg_index_p
3506 && aarch64_base_register_rtx_p (info->base, strict_p))
3507 {
3508 rtx sym, offs;
3509 split_const (info->offset, &sym, &offs);
3510 if (GET_CODE (sym) == SYMBOL_REF
3511 && (aarch64_classify_symbol (sym, offs, SYMBOL_CONTEXT_MEM)
3512 == SYMBOL_SMALL_ABSOLUTE))
3513 {
3514 /* The symbol and offset must be aligned to the access size. */
3515 unsigned int align;
3516 unsigned int ref_size;
3517
3518 if (CONSTANT_POOL_ADDRESS_P (sym))
3519 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3520 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3521 {
3522 tree exp = SYMBOL_REF_DECL (sym);
3523 align = TYPE_ALIGN (TREE_TYPE (exp));
3524 align = CONSTANT_ALIGNMENT (exp, align);
3525 }
3526 else if (SYMBOL_REF_DECL (sym))
3527 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3528 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3529 && SYMBOL_REF_BLOCK (sym) != NULL)
3530 align = SYMBOL_REF_BLOCK (sym)->alignment;
3531 else
3532 align = BITS_PER_UNIT;
3533
3534 ref_size = GET_MODE_SIZE (mode);
3535 if (ref_size == 0)
3536 ref_size = GET_MODE_SIZE (DImode);
3537
3538 return ((INTVAL (offs) & (ref_size - 1)) == 0
3539 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3540 }
3541 }
3542 return false;
3543
3544 default:
3545 return false;
3546 }
3547 }
3548
3549 bool
3550 aarch64_symbolic_address_p (rtx x)
3551 {
3552 rtx offset;
3553
3554 split_const (x, &x, &offset);
3555 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3556 }
3557
3558 /* Classify the base of symbolic expression X, given that X appears in
3559 context CONTEXT. */
3560
3561 enum aarch64_symbol_type
3562 aarch64_classify_symbolic_expression (rtx x,
3563 enum aarch64_symbol_context context)
3564 {
3565 rtx offset;
3566
3567 split_const (x, &x, &offset);
3568 return aarch64_classify_symbol (x, offset, context);
3569 }
3570
3571
3572 /* Return TRUE if X is a legitimate address for accessing memory in
3573 mode MODE. */
3574 static bool
3575 aarch64_legitimate_address_hook_p (machine_mode mode, rtx x, bool strict_p)
3576 {
3577 struct aarch64_address_info addr;
3578
3579 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3580 }
3581
3582 /* Return TRUE if X is a legitimate address for accessing memory in
3583 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3584 pair operation. */
3585 bool
3586 aarch64_legitimate_address_p (machine_mode mode, rtx x,
3587 RTX_CODE outer_code, bool strict_p)
3588 {
3589 struct aarch64_address_info addr;
3590
3591 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3592 }
3593
3594 /* Return TRUE if rtx X is immediate constant 0.0 */
3595 bool
3596 aarch64_float_const_zero_rtx_p (rtx x)
3597 {
3598 REAL_VALUE_TYPE r;
3599
3600 if (GET_MODE (x) == VOIDmode)
3601 return false;
3602
3603 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3604 if (REAL_VALUE_MINUS_ZERO (r))
3605 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3606 return REAL_VALUES_EQUAL (r, dconst0);
3607 }
3608
3609 /* Return the fixed registers used for condition codes. */
3610
3611 static bool
3612 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3613 {
3614 *p1 = CC_REGNUM;
3615 *p2 = INVALID_REGNUM;
3616 return true;
3617 }
3618
3619 /* Emit call insn with PAT and do aarch64-specific handling. */
3620
3621 void
3622 aarch64_emit_call_insn (rtx pat)
3623 {
3624 rtx insn = emit_call_insn (pat);
3625
3626 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3627 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3628 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3629 }
3630
3631 machine_mode
3632 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3633 {
3634 /* All floating point compares return CCFP if it is an equality
3635 comparison, and CCFPE otherwise. */
3636 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3637 {
3638 switch (code)
3639 {
3640 case EQ:
3641 case NE:
3642 case UNORDERED:
3643 case ORDERED:
3644 case UNLT:
3645 case UNLE:
3646 case UNGT:
3647 case UNGE:
3648 case UNEQ:
3649 case LTGT:
3650 return CCFPmode;
3651
3652 case LT:
3653 case LE:
3654 case GT:
3655 case GE:
3656 return CCFPEmode;
3657
3658 default:
3659 gcc_unreachable ();
3660 }
3661 }
3662
3663 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3664 && y == const0_rtx
3665 && (code == EQ || code == NE || code == LT || code == GE)
3666 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3667 || GET_CODE (x) == NEG))
3668 return CC_NZmode;
3669
3670 /* A compare with a shifted operand. Because of canonicalization,
3671 the comparison will have to be swapped when we emit the assembly
3672 code. */
3673 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3674 && (REG_P (y) || GET_CODE (y) == SUBREG)
3675 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3676 || GET_CODE (x) == LSHIFTRT
3677 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3678 return CC_SWPmode;
3679
3680 /* Similarly for a negated operand, but we can only do this for
3681 equalities. */
3682 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3683 && (REG_P (y) || GET_CODE (y) == SUBREG)
3684 && (code == EQ || code == NE)
3685 && GET_CODE (x) == NEG)
3686 return CC_Zmode;
3687
3688 /* A compare of a mode narrower than SI mode against zero can be done
3689 by extending the value in the comparison. */
3690 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3691 && y == const0_rtx)
3692 /* Only use sign-extension if we really need it. */
3693 return ((code == GT || code == GE || code == LE || code == LT)
3694 ? CC_SESWPmode : CC_ZESWPmode);
3695
3696 /* For everything else, return CCmode. */
3697 return CCmode;
3698 }
3699
3700 static int
3701 aarch64_get_condition_code_1 (enum machine_mode, enum rtx_code);
3702
3703 int
3704 aarch64_get_condition_code (rtx x)
3705 {
3706 machine_mode mode = GET_MODE (XEXP (x, 0));
3707 enum rtx_code comp_code = GET_CODE (x);
3708
3709 if (GET_MODE_CLASS (mode) != MODE_CC)
3710 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3711 return aarch64_get_condition_code_1 (mode, comp_code);
3712 }
3713
3714 static int
3715 aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code)
3716 {
3717 int ne = -1, eq = -1;
3718 switch (mode)
3719 {
3720 case CCFPmode:
3721 case CCFPEmode:
3722 switch (comp_code)
3723 {
3724 case GE: return AARCH64_GE;
3725 case GT: return AARCH64_GT;
3726 case LE: return AARCH64_LS;
3727 case LT: return AARCH64_MI;
3728 case NE: return AARCH64_NE;
3729 case EQ: return AARCH64_EQ;
3730 case ORDERED: return AARCH64_VC;
3731 case UNORDERED: return AARCH64_VS;
3732 case UNLT: return AARCH64_LT;
3733 case UNLE: return AARCH64_LE;
3734 case UNGT: return AARCH64_HI;
3735 case UNGE: return AARCH64_PL;
3736 default: return -1;
3737 }
3738 break;
3739
3740 case CC_DNEmode:
3741 ne = AARCH64_NE;
3742 eq = AARCH64_EQ;
3743 break;
3744
3745 case CC_DEQmode:
3746 ne = AARCH64_EQ;
3747 eq = AARCH64_NE;
3748 break;
3749
3750 case CC_DGEmode:
3751 ne = AARCH64_GE;
3752 eq = AARCH64_LT;
3753 break;
3754
3755 case CC_DLTmode:
3756 ne = AARCH64_LT;
3757 eq = AARCH64_GE;
3758 break;
3759
3760 case CC_DGTmode:
3761 ne = AARCH64_GT;
3762 eq = AARCH64_LE;
3763 break;
3764
3765 case CC_DLEmode:
3766 ne = AARCH64_LE;
3767 eq = AARCH64_GT;
3768 break;
3769
3770 case CC_DGEUmode:
3771 ne = AARCH64_CS;
3772 eq = AARCH64_CC;
3773 break;
3774
3775 case CC_DLTUmode:
3776 ne = AARCH64_CC;
3777 eq = AARCH64_CS;
3778 break;
3779
3780 case CC_DGTUmode:
3781 ne = AARCH64_HI;
3782 eq = AARCH64_LS;
3783 break;
3784
3785 case CC_DLEUmode:
3786 ne = AARCH64_LS;
3787 eq = AARCH64_HI;
3788 break;
3789
3790 case CCmode:
3791 switch (comp_code)
3792 {
3793 case NE: return AARCH64_NE;
3794 case EQ: return AARCH64_EQ;
3795 case GE: return AARCH64_GE;
3796 case GT: return AARCH64_GT;
3797 case LE: return AARCH64_LE;
3798 case LT: return AARCH64_LT;
3799 case GEU: return AARCH64_CS;
3800 case GTU: return AARCH64_HI;
3801 case LEU: return AARCH64_LS;
3802 case LTU: return AARCH64_CC;
3803 default: return -1;
3804 }
3805 break;
3806
3807 case CC_SWPmode:
3808 case CC_ZESWPmode:
3809 case CC_SESWPmode:
3810 switch (comp_code)
3811 {
3812 case NE: return AARCH64_NE;
3813 case EQ: return AARCH64_EQ;
3814 case GE: return AARCH64_LE;
3815 case GT: return AARCH64_LT;
3816 case LE: return AARCH64_GE;
3817 case LT: return AARCH64_GT;
3818 case GEU: return AARCH64_LS;
3819 case GTU: return AARCH64_CC;
3820 case LEU: return AARCH64_CS;
3821 case LTU: return AARCH64_HI;
3822 default: return -1;
3823 }
3824 break;
3825
3826 case CC_NZmode:
3827 switch (comp_code)
3828 {
3829 case NE: return AARCH64_NE;
3830 case EQ: return AARCH64_EQ;
3831 case GE: return AARCH64_PL;
3832 case LT: return AARCH64_MI;
3833 default: return -1;
3834 }
3835 break;
3836
3837 case CC_Zmode:
3838 switch (comp_code)
3839 {
3840 case NE: return AARCH64_NE;
3841 case EQ: return AARCH64_EQ;
3842 default: return -1;
3843 }
3844 break;
3845
3846 default:
3847 return -1;
3848 break;
3849 }
3850
3851 if (comp_code == NE)
3852 return ne;
3853
3854 if (comp_code == EQ)
3855 return eq;
3856
3857 return -1;
3858 }
3859
3860 bool
3861 aarch64_const_vec_all_same_in_range_p (rtx x,
3862 HOST_WIDE_INT minval,
3863 HOST_WIDE_INT maxval)
3864 {
3865 HOST_WIDE_INT firstval;
3866 int count, i;
3867
3868 if (GET_CODE (x) != CONST_VECTOR
3869 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
3870 return false;
3871
3872 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
3873 if (firstval < minval || firstval > maxval)
3874 return false;
3875
3876 count = CONST_VECTOR_NUNITS (x);
3877 for (i = 1; i < count; i++)
3878 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
3879 return false;
3880
3881 return true;
3882 }
3883
3884 bool
3885 aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
3886 {
3887 return aarch64_const_vec_all_same_in_range_p (x, val, val);
3888 }
3889
3890 static unsigned
3891 bit_count (unsigned HOST_WIDE_INT value)
3892 {
3893 unsigned count = 0;
3894
3895 while (value)
3896 {
3897 count++;
3898 value &= value - 1;
3899 }
3900
3901 return count;
3902 }
3903
3904 /* N Z C V. */
3905 #define AARCH64_CC_V 1
3906 #define AARCH64_CC_C (1 << 1)
3907 #define AARCH64_CC_Z (1 << 2)
3908 #define AARCH64_CC_N (1 << 3)
3909
3910 /* N Z C V flags for ccmp. The first code is for AND op and the other
3911 is for IOR op. Indexed by AARCH64_COND_CODE. */
3912 static const int aarch64_nzcv_codes[][2] =
3913 {
3914 {AARCH64_CC_Z, 0}, /* EQ, Z == 1. */
3915 {0, AARCH64_CC_Z}, /* NE, Z == 0. */
3916 {AARCH64_CC_C, 0}, /* CS, C == 1. */
3917 {0, AARCH64_CC_C}, /* CC, C == 0. */
3918 {AARCH64_CC_N, 0}, /* MI, N == 1. */
3919 {0, AARCH64_CC_N}, /* PL, N == 0. */
3920 {AARCH64_CC_V, 0}, /* VS, V == 1. */
3921 {0, AARCH64_CC_V}, /* VC, V == 0. */
3922 {AARCH64_CC_C, 0}, /* HI, C ==1 && Z == 0. */
3923 {0, AARCH64_CC_C}, /* LS, !(C == 1 && Z == 0). */
3924 {0, AARCH64_CC_V}, /* GE, N == V. */
3925 {AARCH64_CC_V, 0}, /* LT, N != V. */
3926 {0, AARCH64_CC_Z}, /* GT, Z == 0 && N == V. */
3927 {AARCH64_CC_Z, 0}, /* LE, !(Z == 0 && N == V). */
3928 {0, 0}, /* AL, Any. */
3929 {0, 0}, /* NV, Any. */
3930 };
3931
3932 int
3933 aarch64_ccmp_mode_to_code (enum machine_mode mode)
3934 {
3935 switch (mode)
3936 {
3937 case CC_DNEmode:
3938 return NE;
3939
3940 case CC_DEQmode:
3941 return EQ;
3942
3943 case CC_DLEmode:
3944 return LE;
3945
3946 case CC_DGTmode:
3947 return GT;
3948
3949 case CC_DLTmode:
3950 return LT;
3951
3952 case CC_DGEmode:
3953 return GE;
3954
3955 case CC_DLEUmode:
3956 return LEU;
3957
3958 case CC_DGTUmode:
3959 return GTU;
3960
3961 case CC_DLTUmode:
3962 return LTU;
3963
3964 case CC_DGEUmode:
3965 return GEU;
3966
3967 default:
3968 gcc_unreachable ();
3969 }
3970 }
3971
3972
3973 void
3974 aarch64_print_operand (FILE *f, rtx x, char code)
3975 {
3976 switch (code)
3977 {
3978 /* An integer or symbol address without a preceding # sign. */
3979 case 'c':
3980 switch (GET_CODE (x))
3981 {
3982 case CONST_INT:
3983 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3984 break;
3985
3986 case SYMBOL_REF:
3987 output_addr_const (f, x);
3988 break;
3989
3990 case CONST:
3991 if (GET_CODE (XEXP (x, 0)) == PLUS
3992 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3993 {
3994 output_addr_const (f, x);
3995 break;
3996 }
3997 /* Fall through. */
3998
3999 default:
4000 output_operand_lossage ("Unsupported operand for code '%c'", code);
4001 }
4002 break;
4003
4004 case 'e':
4005 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4006 {
4007 int n;
4008
4009 if (!CONST_INT_P (x)
4010 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
4011 {
4012 output_operand_lossage ("invalid operand for '%%%c'", code);
4013 return;
4014 }
4015
4016 switch (n)
4017 {
4018 case 3:
4019 fputc ('b', f);
4020 break;
4021 case 4:
4022 fputc ('h', f);
4023 break;
4024 case 5:
4025 fputc ('w', f);
4026 break;
4027 default:
4028 output_operand_lossage ("invalid operand for '%%%c'", code);
4029 return;
4030 }
4031 }
4032 break;
4033
4034 case 'p':
4035 {
4036 int n;
4037
4038 /* Print N such that 2^N == X. */
4039 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
4040 {
4041 output_operand_lossage ("invalid operand for '%%%c'", code);
4042 return;
4043 }
4044
4045 asm_fprintf (f, "%d", n);
4046 }
4047 break;
4048
4049 case 'P':
4050 /* Print the number of non-zero bits in X (a const_int). */
4051 if (!CONST_INT_P (x))
4052 {
4053 output_operand_lossage ("invalid operand for '%%%c'", code);
4054 return;
4055 }
4056
4057 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
4058 break;
4059
4060 case 'H':
4061 /* Print the higher numbered register of a pair (TImode) of regs. */
4062 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
4063 {
4064 output_operand_lossage ("invalid operand for '%%%c'", code);
4065 return;
4066 }
4067
4068 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
4069 break;
4070
4071 case 'm':
4072 {
4073 int cond_code;
4074 /* Print a condition (eq, ne, etc). */
4075
4076 /* CONST_TRUE_RTX means always -- that's the default. */
4077 if (x == const_true_rtx)
4078 return;
4079
4080 if (!COMPARISON_P (x))
4081 {
4082 output_operand_lossage ("invalid operand for '%%%c'", code);
4083 return;
4084 }
4085
4086 cond_code = aarch64_get_condition_code (x);
4087 gcc_assert (cond_code >= 0);
4088 fputs (aarch64_condition_codes[cond_code], f);
4089 }
4090 break;
4091
4092 case 'M':
4093 {
4094 int cond_code;
4095 /* Print the inverse of a condition (eq <-> ne, etc). */
4096
4097 /* CONST_TRUE_RTX means never -- that's the default. */
4098 if (x == const_true_rtx)
4099 {
4100 fputs ("nv", f);
4101 return;
4102 }
4103
4104 if (!COMPARISON_P (x))
4105 {
4106 output_operand_lossage ("invalid operand for '%%%c'", code);
4107 return;
4108 }
4109 cond_code = aarch64_get_condition_code (x);
4110 gcc_assert (cond_code >= 0);
4111 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
4112 (cond_code)], f);
4113 }
4114 break;
4115
4116 case 'b':
4117 case 'h':
4118 case 's':
4119 case 'd':
4120 case 'q':
4121 /* Print a scalar FP/SIMD register name. */
4122 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4123 {
4124 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4125 return;
4126 }
4127 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
4128 break;
4129
4130 case 'S':
4131 case 'T':
4132 case 'U':
4133 case 'V':
4134 /* Print the first FP/SIMD register name in a list. */
4135 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
4136 {
4137 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
4138 return;
4139 }
4140 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
4141 break;
4142
4143 case 'X':
4144 /* Print bottom 16 bits of integer constant in hex. */
4145 if (!CONST_INT_P (x))
4146 {
4147 output_operand_lossage ("invalid operand for '%%%c'", code);
4148 return;
4149 }
4150 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
4151 break;
4152
4153 case 'w':
4154 case 'x':
4155 /* Print a general register name or the zero register (32-bit or
4156 64-bit). */
4157 if (x == const0_rtx
4158 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
4159 {
4160 asm_fprintf (f, "%czr", code);
4161 break;
4162 }
4163
4164 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
4165 {
4166 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
4167 break;
4168 }
4169
4170 if (REG_P (x) && REGNO (x) == SP_REGNUM)
4171 {
4172 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
4173 break;
4174 }
4175
4176 /* Fall through */
4177
4178 case 0:
4179 /* Print a normal operand, if it's a general register, then we
4180 assume DImode. */
4181 if (x == NULL)
4182 {
4183 output_operand_lossage ("missing operand");
4184 return;
4185 }
4186
4187 switch (GET_CODE (x))
4188 {
4189 case REG:
4190 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
4191 break;
4192
4193 case MEM:
4194 aarch64_memory_reference_mode = GET_MODE (x);
4195 output_address (XEXP (x, 0));
4196 break;
4197
4198 case LABEL_REF:
4199 case SYMBOL_REF:
4200 output_addr_const (asm_out_file, x);
4201 break;
4202
4203 case CONST_INT:
4204 asm_fprintf (f, "%wd", INTVAL (x));
4205 break;
4206
4207 case CONST_VECTOR:
4208 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
4209 {
4210 gcc_assert (
4211 aarch64_const_vec_all_same_in_range_p (x,
4212 HOST_WIDE_INT_MIN,
4213 HOST_WIDE_INT_MAX));
4214 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
4215 }
4216 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
4217 {
4218 fputc ('0', f);
4219 }
4220 else
4221 gcc_unreachable ();
4222 break;
4223
4224 case CONST_DOUBLE:
4225 /* CONST_DOUBLE can represent a double-width integer.
4226 In this case, the mode of x is VOIDmode. */
4227 if (GET_MODE (x) == VOIDmode)
4228 ; /* Do Nothing. */
4229 else if (aarch64_float_const_zero_rtx_p (x))
4230 {
4231 fputc ('0', f);
4232 break;
4233 }
4234 else if (aarch64_float_const_representable_p (x))
4235 {
4236 #define buf_size 20
4237 char float_buf[buf_size] = {'\0'};
4238 REAL_VALUE_TYPE r;
4239 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4240 real_to_decimal_for_mode (float_buf, &r,
4241 buf_size, buf_size,
4242 1, GET_MODE (x));
4243 asm_fprintf (asm_out_file, "%s", float_buf);
4244 break;
4245 #undef buf_size
4246 }
4247 output_operand_lossage ("invalid constant");
4248 return;
4249 default:
4250 output_operand_lossage ("invalid operand");
4251 return;
4252 }
4253 break;
4254
4255 case 'A':
4256 if (GET_CODE (x) == HIGH)
4257 x = XEXP (x, 0);
4258
4259 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4260 {
4261 case SYMBOL_SMALL_GOT:
4262 asm_fprintf (asm_out_file, ":got:");
4263 break;
4264
4265 case SYMBOL_SMALL_TLSGD:
4266 asm_fprintf (asm_out_file, ":tlsgd:");
4267 break;
4268
4269 case SYMBOL_SMALL_TLSDESC:
4270 asm_fprintf (asm_out_file, ":tlsdesc:");
4271 break;
4272
4273 case SYMBOL_SMALL_GOTTPREL:
4274 asm_fprintf (asm_out_file, ":gottprel:");
4275 break;
4276
4277 case SYMBOL_SMALL_TPREL:
4278 asm_fprintf (asm_out_file, ":tprel:");
4279 break;
4280
4281 case SYMBOL_TINY_GOT:
4282 gcc_unreachable ();
4283 break;
4284
4285 default:
4286 break;
4287 }
4288 output_addr_const (asm_out_file, x);
4289 break;
4290
4291 case 'L':
4292 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4293 {
4294 case SYMBOL_SMALL_GOT:
4295 asm_fprintf (asm_out_file, ":lo12:");
4296 break;
4297
4298 case SYMBOL_SMALL_TLSGD:
4299 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
4300 break;
4301
4302 case SYMBOL_SMALL_TLSDESC:
4303 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
4304 break;
4305
4306 case SYMBOL_SMALL_GOTTPREL:
4307 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4308 break;
4309
4310 case SYMBOL_SMALL_TPREL:
4311 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4312 break;
4313
4314 case SYMBOL_TINY_GOT:
4315 asm_fprintf (asm_out_file, ":got:");
4316 break;
4317
4318 default:
4319 break;
4320 }
4321 output_addr_const (asm_out_file, x);
4322 break;
4323
4324 case 'G':
4325
4326 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4327 {
4328 case SYMBOL_SMALL_TPREL:
4329 asm_fprintf (asm_out_file, ":tprel_hi12:");
4330 break;
4331 default:
4332 break;
4333 }
4334 output_addr_const (asm_out_file, x);
4335 break;
4336
4337 case 'K':
4338 {
4339 int cond_code;
4340 /* Print nzcv. */
4341
4342 if (!COMPARISON_P (x))
4343 {
4344 output_operand_lossage ("invalid operand for '%%%c'", code);
4345 return;
4346 }
4347
4348 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4349 gcc_assert (cond_code >= 0);
4350 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][0]);
4351 }
4352 break;
4353
4354 case 'k':
4355 {
4356 int cond_code;
4357 /* Print nzcv. */
4358
4359 if (!COMPARISON_P (x))
4360 {
4361 output_operand_lossage ("invalid operand for '%%%c'", code);
4362 return;
4363 }
4364
4365 cond_code = aarch64_get_condition_code_1 (CCmode, GET_CODE (x));
4366 gcc_assert (cond_code >= 0);
4367 asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code][1]);
4368 }
4369 break;
4370
4371 default:
4372 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4373 return;
4374 }
4375 }
4376
4377 void
4378 aarch64_print_operand_address (FILE *f, rtx x)
4379 {
4380 struct aarch64_address_info addr;
4381
4382 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4383 MEM, true))
4384 switch (addr.type)
4385 {
4386 case ADDRESS_REG_IMM:
4387 if (addr.offset == const0_rtx)
4388 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4389 else
4390 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4391 INTVAL (addr.offset));
4392 return;
4393
4394 case ADDRESS_REG_REG:
4395 if (addr.shift == 0)
4396 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4397 reg_names [REGNO (addr.offset)]);
4398 else
4399 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4400 reg_names [REGNO (addr.offset)], addr.shift);
4401 return;
4402
4403 case ADDRESS_REG_UXTW:
4404 if (addr.shift == 0)
4405 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4406 REGNO (addr.offset) - R0_REGNUM);
4407 else
4408 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4409 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4410 return;
4411
4412 case ADDRESS_REG_SXTW:
4413 if (addr.shift == 0)
4414 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4415 REGNO (addr.offset) - R0_REGNUM);
4416 else
4417 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4418 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4419 return;
4420
4421 case ADDRESS_REG_WB:
4422 switch (GET_CODE (x))
4423 {
4424 case PRE_INC:
4425 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4426 GET_MODE_SIZE (aarch64_memory_reference_mode));
4427 return;
4428 case POST_INC:
4429 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4430 GET_MODE_SIZE (aarch64_memory_reference_mode));
4431 return;
4432 case PRE_DEC:
4433 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4434 GET_MODE_SIZE (aarch64_memory_reference_mode));
4435 return;
4436 case POST_DEC:
4437 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4438 GET_MODE_SIZE (aarch64_memory_reference_mode));
4439 return;
4440 case PRE_MODIFY:
4441 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4442 INTVAL (addr.offset));
4443 return;
4444 case POST_MODIFY:
4445 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4446 INTVAL (addr.offset));
4447 return;
4448 default:
4449 break;
4450 }
4451 break;
4452
4453 case ADDRESS_LO_SUM:
4454 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4455 output_addr_const (f, addr.offset);
4456 asm_fprintf (f, "]");
4457 return;
4458
4459 case ADDRESS_SYMBOLIC:
4460 break;
4461 }
4462
4463 output_addr_const (f, x);
4464 }
4465
4466 bool
4467 aarch64_label_mentioned_p (rtx x)
4468 {
4469 const char *fmt;
4470 int i;
4471
4472 if (GET_CODE (x) == LABEL_REF)
4473 return true;
4474
4475 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4476 referencing instruction, but they are constant offsets, not
4477 symbols. */
4478 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4479 return false;
4480
4481 fmt = GET_RTX_FORMAT (GET_CODE (x));
4482 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4483 {
4484 if (fmt[i] == 'E')
4485 {
4486 int j;
4487
4488 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4489 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4490 return 1;
4491 }
4492 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4493 return 1;
4494 }
4495
4496 return 0;
4497 }
4498
4499 /* Implement REGNO_REG_CLASS. */
4500
4501 enum reg_class
4502 aarch64_regno_regclass (unsigned regno)
4503 {
4504 if (GP_REGNUM_P (regno))
4505 return GENERAL_REGS;
4506
4507 if (regno == SP_REGNUM)
4508 return STACK_REG;
4509
4510 if (regno == FRAME_POINTER_REGNUM
4511 || regno == ARG_POINTER_REGNUM)
4512 return POINTER_REGS;
4513
4514 if (FP_REGNUM_P (regno))
4515 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4516
4517 return NO_REGS;
4518 }
4519
4520 static rtx
4521 aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode)
4522 {
4523 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4524 where mask is selected by alignment and size of the offset.
4525 We try to pick as large a range for the offset as possible to
4526 maximize the chance of a CSE. However, for aligned addresses
4527 we limit the range to 4k so that structures with different sized
4528 elements are likely to use the same base. */
4529
4530 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
4531 {
4532 HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
4533 HOST_WIDE_INT base_offset;
4534
4535 /* Does it look like we'll need a load/store-pair operation? */
4536 if (GET_MODE_SIZE (mode) > 16
4537 || mode == TImode)
4538 base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
4539 & ~((128 * GET_MODE_SIZE (mode)) - 1));
4540 /* For offsets aren't a multiple of the access size, the limit is
4541 -256...255. */
4542 else if (offset & (GET_MODE_SIZE (mode) - 1))
4543 base_offset = (offset + 0x100) & ~0x1ff;
4544 else
4545 base_offset = offset & ~0xfff;
4546
4547 if (base_offset == 0)
4548 return x;
4549
4550 offset -= base_offset;
4551 rtx base_reg = gen_reg_rtx (Pmode);
4552 rtx val = force_operand (plus_constant (Pmode, XEXP (x, 0), base_offset),
4553 NULL_RTX);
4554 emit_move_insn (base_reg, val);
4555 x = plus_constant (Pmode, base_reg, offset);
4556 }
4557
4558 return x;
4559 }
4560
4561 /* Try a machine-dependent way of reloading an illegitimate address
4562 operand. If we find one, push the reload and return the new rtx. */
4563
4564 rtx
4565 aarch64_legitimize_reload_address (rtx *x_p,
4566 machine_mode mode,
4567 int opnum, int type,
4568 int ind_levels ATTRIBUTE_UNUSED)
4569 {
4570 rtx x = *x_p;
4571
4572 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4573 if (aarch64_vect_struct_mode_p (mode)
4574 && GET_CODE (x) == PLUS
4575 && REG_P (XEXP (x, 0))
4576 && CONST_INT_P (XEXP (x, 1)))
4577 {
4578 rtx orig_rtx = x;
4579 x = copy_rtx (x);
4580 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4581 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4582 opnum, (enum reload_type) type);
4583 return x;
4584 }
4585
4586 /* We must recognize output that we have already generated ourselves. */
4587 if (GET_CODE (x) == PLUS
4588 && GET_CODE (XEXP (x, 0)) == PLUS
4589 && REG_P (XEXP (XEXP (x, 0), 0))
4590 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4591 && CONST_INT_P (XEXP (x, 1)))
4592 {
4593 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4594 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4595 opnum, (enum reload_type) type);
4596 return x;
4597 }
4598
4599 /* We wish to handle large displacements off a base register by splitting
4600 the addend across an add and the mem insn. This can cut the number of
4601 extra insns needed from 3 to 1. It is only useful for load/store of a
4602 single register with 12 bit offset field. */
4603 if (GET_CODE (x) == PLUS
4604 && REG_P (XEXP (x, 0))
4605 && CONST_INT_P (XEXP (x, 1))
4606 && HARD_REGISTER_P (XEXP (x, 0))
4607 && mode != TImode
4608 && mode != TFmode
4609 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4610 {
4611 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4612 HOST_WIDE_INT low = val & 0xfff;
4613 HOST_WIDE_INT high = val - low;
4614 HOST_WIDE_INT offs;
4615 rtx cst;
4616 machine_mode xmode = GET_MODE (x);
4617
4618 /* In ILP32, xmode can be either DImode or SImode. */
4619 gcc_assert (xmode == DImode || xmode == SImode);
4620
4621 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4622 BLKmode alignment. */
4623 if (GET_MODE_SIZE (mode) == 0)
4624 return NULL_RTX;
4625
4626 offs = low % GET_MODE_SIZE (mode);
4627
4628 /* Align misaligned offset by adjusting high part to compensate. */
4629 if (offs != 0)
4630 {
4631 if (aarch64_uimm12_shift (high + offs))
4632 {
4633 /* Align down. */
4634 low = low - offs;
4635 high = high + offs;
4636 }
4637 else
4638 {
4639 /* Align up. */
4640 offs = GET_MODE_SIZE (mode) - offs;
4641 low = low + offs;
4642 high = high + (low & 0x1000) - offs;
4643 low &= 0xfff;
4644 }
4645 }
4646
4647 /* Check for overflow. */
4648 if (high + low != val)
4649 return NULL_RTX;
4650
4651 cst = GEN_INT (high);
4652 if (!aarch64_uimm12_shift (high))
4653 cst = force_const_mem (xmode, cst);
4654
4655 /* Reload high part into base reg, leaving the low part
4656 in the mem instruction.
4657 Note that replacing this gen_rtx_PLUS with plus_constant is
4658 wrong in this case because we rely on the
4659 (plus (plus reg c1) c2) structure being preserved so that
4660 XEXP (*p, 0) in push_reload below uses the correct term. */
4661 x = gen_rtx_PLUS (xmode,
4662 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4663 GEN_INT (low));
4664
4665 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4666 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4667 opnum, (enum reload_type) type);
4668 return x;
4669 }
4670
4671 return NULL_RTX;
4672 }
4673
4674
4675 static reg_class_t
4676 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4677 reg_class_t rclass,
4678 machine_mode mode,
4679 secondary_reload_info *sri)
4680 {
4681 /* Without the TARGET_SIMD instructions we cannot move a Q register
4682 to a Q register directly. We need a scratch. */
4683 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4684 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4685 && reg_class_subset_p (rclass, FP_REGS))
4686 {
4687 if (mode == TFmode)
4688 sri->icode = CODE_FOR_aarch64_reload_movtf;
4689 else if (mode == TImode)
4690 sri->icode = CODE_FOR_aarch64_reload_movti;
4691 return NO_REGS;
4692 }
4693
4694 /* A TFmode or TImode memory access should be handled via an FP_REGS
4695 because AArch64 has richer addressing modes for LDR/STR instructions
4696 than LDP/STP instructions. */
4697 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4698 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4699 return FP_REGS;
4700
4701 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4702 return GENERAL_REGS;
4703
4704 return NO_REGS;
4705 }
4706
4707 static bool
4708 aarch64_can_eliminate (const int from, const int to)
4709 {
4710 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4711 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4712
4713 if (frame_pointer_needed)
4714 {
4715 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4716 return true;
4717 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4718 return false;
4719 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4720 && !cfun->calls_alloca)
4721 return true;
4722 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4723 return true;
4724
4725 return false;
4726 }
4727 else
4728 {
4729 /* If we decided that we didn't need a leaf frame pointer but then used
4730 LR in the function, then we'll want a frame pointer after all, so
4731 prevent this elimination to ensure a frame pointer is used. */
4732 if (to == STACK_POINTER_REGNUM
4733 && flag_omit_leaf_frame_pointer
4734 && df_regs_ever_live_p (LR_REGNUM))
4735 return false;
4736 }
4737
4738 return true;
4739 }
4740
4741 HOST_WIDE_INT
4742 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4743 {
4744 aarch64_layout_frame ();
4745
4746 if (to == HARD_FRAME_POINTER_REGNUM)
4747 {
4748 if (from == ARG_POINTER_REGNUM)
4749 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4750
4751 if (from == FRAME_POINTER_REGNUM)
4752 return (cfun->machine->frame.hard_fp_offset
4753 - cfun->machine->frame.saved_varargs_size);
4754 }
4755
4756 if (to == STACK_POINTER_REGNUM)
4757 {
4758 if (from == FRAME_POINTER_REGNUM)
4759 return (cfun->machine->frame.frame_size
4760 - cfun->machine->frame.saved_varargs_size);
4761 }
4762
4763 return cfun->machine->frame.frame_size;
4764 }
4765
4766 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4767 previous frame. */
4768
4769 rtx
4770 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4771 {
4772 if (count != 0)
4773 return const0_rtx;
4774 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4775 }
4776
4777
4778 static void
4779 aarch64_asm_trampoline_template (FILE *f)
4780 {
4781 if (TARGET_ILP32)
4782 {
4783 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4784 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4785 }
4786 else
4787 {
4788 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4789 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4790 }
4791 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4792 assemble_aligned_integer (4, const0_rtx);
4793 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4794 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4795 }
4796
4797 static void
4798 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4799 {
4800 rtx fnaddr, mem, a_tramp;
4801 const int tramp_code_sz = 16;
4802
4803 /* Don't need to copy the trailing D-words, we fill those in below. */
4804 emit_block_move (m_tramp, assemble_trampoline_template (),
4805 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4806 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4807 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4808 if (GET_MODE (fnaddr) != ptr_mode)
4809 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4810 emit_move_insn (mem, fnaddr);
4811
4812 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4813 emit_move_insn (mem, chain_value);
4814
4815 /* XXX We should really define a "clear_cache" pattern and use
4816 gen_clear_cache(). */
4817 a_tramp = XEXP (m_tramp, 0);
4818 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4819 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4820 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4821 ptr_mode);
4822 }
4823
4824 static unsigned char
4825 aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
4826 {
4827 switch (regclass)
4828 {
4829 case CALLER_SAVE_REGS:
4830 case POINTER_REGS:
4831 case GENERAL_REGS:
4832 case ALL_REGS:
4833 case FP_REGS:
4834 case FP_LO_REGS:
4835 return
4836 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4837 (GET_MODE_SIZE (mode) + 7) / 8;
4838 case STACK_REG:
4839 return 1;
4840
4841 case NO_REGS:
4842 return 0;
4843
4844 default:
4845 break;
4846 }
4847 gcc_unreachable ();
4848 }
4849
4850 static reg_class_t
4851 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4852 {
4853 if (regclass == POINTER_REGS)
4854 return GENERAL_REGS;
4855
4856 if (regclass == STACK_REG)
4857 {
4858 if (REG_P(x)
4859 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4860 return regclass;
4861
4862 return NO_REGS;
4863 }
4864
4865 /* If it's an integer immediate that MOVI can't handle, then
4866 FP_REGS is not an option, so we return NO_REGS instead. */
4867 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4868 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4869 return NO_REGS;
4870
4871 /* Register eliminiation can result in a request for
4872 SP+constant->FP_REGS. We cannot support such operations which
4873 use SP as source and an FP_REG as destination, so reject out
4874 right now. */
4875 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4876 {
4877 rtx lhs = XEXP (x, 0);
4878
4879 /* Look through a possible SUBREG introduced by ILP32. */
4880 if (GET_CODE (lhs) == SUBREG)
4881 lhs = SUBREG_REG (lhs);
4882
4883 gcc_assert (REG_P (lhs));
4884 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4885 POINTER_REGS));
4886 return NO_REGS;
4887 }
4888
4889 return regclass;
4890 }
4891
4892 void
4893 aarch64_asm_output_labelref (FILE* f, const char *name)
4894 {
4895 asm_fprintf (f, "%U%s", name);
4896 }
4897
4898 static void
4899 aarch64_elf_asm_constructor (rtx symbol, int priority)
4900 {
4901 if (priority == DEFAULT_INIT_PRIORITY)
4902 default_ctor_section_asm_out_constructor (symbol, priority);
4903 else
4904 {
4905 section *s;
4906 char buf[18];
4907 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4908 s = get_section (buf, SECTION_WRITE, NULL);
4909 switch_to_section (s);
4910 assemble_align (POINTER_SIZE);
4911 assemble_aligned_integer (POINTER_BYTES, symbol);
4912 }
4913 }
4914
4915 static void
4916 aarch64_elf_asm_destructor (rtx symbol, int priority)
4917 {
4918 if (priority == DEFAULT_INIT_PRIORITY)
4919 default_dtor_section_asm_out_destructor (symbol, priority);
4920 else
4921 {
4922 section *s;
4923 char buf[18];
4924 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4925 s = get_section (buf, SECTION_WRITE, NULL);
4926 switch_to_section (s);
4927 assemble_align (POINTER_SIZE);
4928 assemble_aligned_integer (POINTER_BYTES, symbol);
4929 }
4930 }
4931
4932 const char*
4933 aarch64_output_casesi (rtx *operands)
4934 {
4935 char buf[100];
4936 char label[100];
4937 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
4938 int index;
4939 static const char *const patterns[4][2] =
4940 {
4941 {
4942 "ldrb\t%w3, [%0,%w1,uxtw]",
4943 "add\t%3, %4, %w3, sxtb #2"
4944 },
4945 {
4946 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4947 "add\t%3, %4, %w3, sxth #2"
4948 },
4949 {
4950 "ldr\t%w3, [%0,%w1,uxtw #2]",
4951 "add\t%3, %4, %w3, sxtw #2"
4952 },
4953 /* We assume that DImode is only generated when not optimizing and
4954 that we don't really need 64-bit address offsets. That would
4955 imply an object file with 8GB of code in a single function! */
4956 {
4957 "ldr\t%w3, [%0,%w1,uxtw #2]",
4958 "add\t%3, %4, %w3, sxtw #2"
4959 }
4960 };
4961
4962 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4963
4964 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4965
4966 gcc_assert (index >= 0 && index <= 3);
4967
4968 /* Need to implement table size reduction, by chaning the code below. */
4969 output_asm_insn (patterns[index][0], operands);
4970 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4971 snprintf (buf, sizeof (buf),
4972 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4973 output_asm_insn (buf, operands);
4974 output_asm_insn (patterns[index][1], operands);
4975 output_asm_insn ("br\t%3", operands);
4976 assemble_label (asm_out_file, label);
4977 return "";
4978 }
4979
4980
4981 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4982 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4983 operator. */
4984
4985 int
4986 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4987 {
4988 if (shift >= 0 && shift <= 3)
4989 {
4990 int size;
4991 for (size = 8; size <= 32; size *= 2)
4992 {
4993 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4994 if (mask == bits << shift)
4995 return size;
4996 }
4997 }
4998 return 0;
4999 }
5000
5001 static bool
5002 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED,
5003 const_rtx x ATTRIBUTE_UNUSED)
5004 {
5005 /* We can't use blocks for constants when we're using a per-function
5006 constant pool. */
5007 return false;
5008 }
5009
5010 static section *
5011 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED,
5012 rtx x ATTRIBUTE_UNUSED,
5013 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
5014 {
5015 /* Force all constant pool entries into the current function section. */
5016 return function_section (current_function_decl);
5017 }
5018
5019
5020 /* Costs. */
5021
5022 /* Helper function for rtx cost calculation. Strip a shift expression
5023 from X. Returns the inner operand if successful, or the original
5024 expression on failure. */
5025 static rtx
5026 aarch64_strip_shift (rtx x)
5027 {
5028 rtx op = x;
5029
5030 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5031 we can convert both to ROR during final output. */
5032 if ((GET_CODE (op) == ASHIFT
5033 || GET_CODE (op) == ASHIFTRT
5034 || GET_CODE (op) == LSHIFTRT
5035 || GET_CODE (op) == ROTATERT
5036 || GET_CODE (op) == ROTATE)
5037 && CONST_INT_P (XEXP (op, 1)))
5038 return XEXP (op, 0);
5039
5040 if (GET_CODE (op) == MULT
5041 && CONST_INT_P (XEXP (op, 1))
5042 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
5043 return XEXP (op, 0);
5044
5045 return x;
5046 }
5047
5048 /* Helper function for rtx cost calculation. Strip an extend
5049 expression from X. Returns the inner operand if successful, or the
5050 original expression on failure. We deal with a number of possible
5051 canonicalization variations here. */
5052 static rtx
5053 aarch64_strip_extend (rtx x)
5054 {
5055 rtx op = x;
5056
5057 /* Zero and sign extraction of a widened value. */
5058 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
5059 && XEXP (op, 2) == const0_rtx
5060 && GET_CODE (XEXP (op, 0)) == MULT
5061 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
5062 XEXP (op, 1)))
5063 return XEXP (XEXP (op, 0), 0);
5064
5065 /* It can also be represented (for zero-extend) as an AND with an
5066 immediate. */
5067 if (GET_CODE (op) == AND
5068 && GET_CODE (XEXP (op, 0)) == MULT
5069 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
5070 && CONST_INT_P (XEXP (op, 1))
5071 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
5072 INTVAL (XEXP (op, 1))) != 0)
5073 return XEXP (XEXP (op, 0), 0);
5074
5075 /* Now handle extended register, as this may also have an optional
5076 left shift by 1..4. */
5077 if (GET_CODE (op) == ASHIFT
5078 && CONST_INT_P (XEXP (op, 1))
5079 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
5080 op = XEXP (op, 0);
5081
5082 if (GET_CODE (op) == ZERO_EXTEND
5083 || GET_CODE (op) == SIGN_EXTEND)
5084 op = XEXP (op, 0);
5085
5086 if (op != x)
5087 return op;
5088
5089 return x;
5090 }
5091
5092 /* Helper function for rtx cost calculation. Calculate the cost of
5093 a MULT, which may be part of a multiply-accumulate rtx. Return
5094 the calculated cost of the expression, recursing manually in to
5095 operands where needed. */
5096
5097 static int
5098 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
5099 {
5100 rtx op0, op1;
5101 const struct cpu_cost_table *extra_cost
5102 = aarch64_tune_params->insn_extra_cost;
5103 int cost = 0;
5104 bool maybe_fma = (outer == PLUS || outer == MINUS);
5105 machine_mode mode = GET_MODE (x);
5106
5107 gcc_checking_assert (code == MULT);
5108
5109 op0 = XEXP (x, 0);
5110 op1 = XEXP (x, 1);
5111
5112 if (VECTOR_MODE_P (mode))
5113 mode = GET_MODE_INNER (mode);
5114
5115 /* Integer multiply/fma. */
5116 if (GET_MODE_CLASS (mode) == MODE_INT)
5117 {
5118 /* The multiply will be canonicalized as a shift, cost it as such. */
5119 if (CONST_INT_P (op1)
5120 && exact_log2 (INTVAL (op1)) > 0)
5121 {
5122 if (speed)
5123 {
5124 if (maybe_fma)
5125 /* ADD (shifted register). */
5126 cost += extra_cost->alu.arith_shift;
5127 else
5128 /* LSL (immediate). */
5129 cost += extra_cost->alu.shift;
5130 }
5131
5132 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
5133
5134 return cost;
5135 }
5136
5137 /* Integer multiplies or FMAs have zero/sign extending variants. */
5138 if ((GET_CODE (op0) == ZERO_EXTEND
5139 && GET_CODE (op1) == ZERO_EXTEND)
5140 || (GET_CODE (op0) == SIGN_EXTEND
5141 && GET_CODE (op1) == SIGN_EXTEND))
5142 {
5143 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
5144 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
5145
5146 if (speed)
5147 {
5148 if (maybe_fma)
5149 /* MADD/SMADDL/UMADDL. */
5150 cost += extra_cost->mult[0].extend_add;
5151 else
5152 /* MUL/SMULL/UMULL. */
5153 cost += extra_cost->mult[0].extend;
5154 }
5155
5156 return cost;
5157 }
5158
5159 /* This is either an integer multiply or an FMA. In both cases
5160 we want to recurse and cost the operands. */
5161 cost += rtx_cost (op0, MULT, 0, speed)
5162 + rtx_cost (op1, MULT, 1, speed);
5163
5164 if (speed)
5165 {
5166 if (maybe_fma)
5167 /* MADD. */
5168 cost += extra_cost->mult[mode == DImode].add;
5169 else
5170 /* MUL. */
5171 cost += extra_cost->mult[mode == DImode].simple;
5172 }
5173
5174 return cost;
5175 }
5176 else
5177 {
5178 if (speed)
5179 {
5180 /* Floating-point FMA/FMUL can also support negations of the
5181 operands. */
5182 if (GET_CODE (op0) == NEG)
5183 op0 = XEXP (op0, 0);
5184 if (GET_CODE (op1) == NEG)
5185 op1 = XEXP (op1, 0);
5186
5187 if (maybe_fma)
5188 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5189 cost += extra_cost->fp[mode == DFmode].fma;
5190 else
5191 /* FMUL/FNMUL. */
5192 cost += extra_cost->fp[mode == DFmode].mult;
5193 }
5194
5195 cost += rtx_cost (op0, MULT, 0, speed)
5196 + rtx_cost (op1, MULT, 1, speed);
5197 return cost;
5198 }
5199 }
5200
5201 static int
5202 aarch64_address_cost (rtx x,
5203 machine_mode mode,
5204 addr_space_t as ATTRIBUTE_UNUSED,
5205 bool speed)
5206 {
5207 enum rtx_code c = GET_CODE (x);
5208 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
5209 struct aarch64_address_info info;
5210 int cost = 0;
5211 info.shift = 0;
5212
5213 if (!aarch64_classify_address (&info, x, mode, c, false))
5214 {
5215 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
5216 {
5217 /* This is a CONST or SYMBOL ref which will be split
5218 in a different way depending on the code model in use.
5219 Cost it through the generic infrastructure. */
5220 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
5221 /* Divide through by the cost of one instruction to
5222 bring it to the same units as the address costs. */
5223 cost_symbol_ref /= COSTS_N_INSNS (1);
5224 /* The cost is then the cost of preparing the address,
5225 followed by an immediate (possibly 0) offset. */
5226 return cost_symbol_ref + addr_cost->imm_offset;
5227 }
5228 else
5229 {
5230 /* This is most likely a jump table from a case
5231 statement. */
5232 return addr_cost->register_offset;
5233 }
5234 }
5235
5236 switch (info.type)
5237 {
5238 case ADDRESS_LO_SUM:
5239 case ADDRESS_SYMBOLIC:
5240 case ADDRESS_REG_IMM:
5241 cost += addr_cost->imm_offset;
5242 break;
5243
5244 case ADDRESS_REG_WB:
5245 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
5246 cost += addr_cost->pre_modify;
5247 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
5248 cost += addr_cost->post_modify;
5249 else
5250 gcc_unreachable ();
5251
5252 break;
5253
5254 case ADDRESS_REG_REG:
5255 cost += addr_cost->register_offset;
5256 break;
5257
5258 case ADDRESS_REG_UXTW:
5259 case ADDRESS_REG_SXTW:
5260 cost += addr_cost->register_extend;
5261 break;
5262
5263 default:
5264 gcc_unreachable ();
5265 }
5266
5267
5268 if (info.shift > 0)
5269 {
5270 /* For the sake of calculating the cost of the shifted register
5271 component, we can treat same sized modes in the same way. */
5272 switch (GET_MODE_BITSIZE (mode))
5273 {
5274 case 16:
5275 cost += addr_cost->addr_scale_costs.hi;
5276 break;
5277
5278 case 32:
5279 cost += addr_cost->addr_scale_costs.si;
5280 break;
5281
5282 case 64:
5283 cost += addr_cost->addr_scale_costs.di;
5284 break;
5285
5286 /* We can't tell, or this is a 128-bit vector. */
5287 default:
5288 cost += addr_cost->addr_scale_costs.ti;
5289 break;
5290 }
5291 }
5292
5293 return cost;
5294 }
5295
5296 /* Return true if the RTX X in mode MODE is a zero or sign extract
5297 usable in an ADD or SUB (extended register) instruction. */
5298 static bool
5299 aarch64_rtx_arith_op_extract_p (rtx x, machine_mode mode)
5300 {
5301 /* Catch add with a sign extract.
5302 This is add_<optab><mode>_multp2. */
5303 if (GET_CODE (x) == SIGN_EXTRACT
5304 || GET_CODE (x) == ZERO_EXTRACT)
5305 {
5306 rtx op0 = XEXP (x, 0);
5307 rtx op1 = XEXP (x, 1);
5308 rtx op2 = XEXP (x, 2);
5309
5310 if (GET_CODE (op0) == MULT
5311 && CONST_INT_P (op1)
5312 && op2 == const0_rtx
5313 && CONST_INT_P (XEXP (op0, 1))
5314 && aarch64_is_extend_from_extract (mode,
5315 XEXP (op0, 1),
5316 op1))
5317 {
5318 return true;
5319 }
5320 }
5321
5322 return false;
5323 }
5324
5325 static bool
5326 aarch64_frint_unspec_p (unsigned int u)
5327 {
5328 switch (u)
5329 {
5330 case UNSPEC_FRINTZ:
5331 case UNSPEC_FRINTP:
5332 case UNSPEC_FRINTM:
5333 case UNSPEC_FRINTA:
5334 case UNSPEC_FRINTN:
5335 case UNSPEC_FRINTX:
5336 case UNSPEC_FRINTI:
5337 return true;
5338
5339 default:
5340 return false;
5341 }
5342 }
5343
5344 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5345 storing it in *COST. Result is true if the total cost of the operation
5346 has now been calculated. */
5347 static bool
5348 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
5349 {
5350 rtx inner;
5351 rtx comparator;
5352 enum rtx_code cmpcode;
5353
5354 if (COMPARISON_P (op0))
5355 {
5356 inner = XEXP (op0, 0);
5357 comparator = XEXP (op0, 1);
5358 cmpcode = GET_CODE (op0);
5359 }
5360 else
5361 {
5362 inner = op0;
5363 comparator = const0_rtx;
5364 cmpcode = NE;
5365 }
5366
5367 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
5368 {
5369 /* Conditional branch. */
5370 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5371 return true;
5372 else
5373 {
5374 if (cmpcode == NE || cmpcode == EQ)
5375 {
5376 if (comparator == const0_rtx)
5377 {
5378 /* TBZ/TBNZ/CBZ/CBNZ. */
5379 if (GET_CODE (inner) == ZERO_EXTRACT)
5380 /* TBZ/TBNZ. */
5381 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
5382 0, speed);
5383 else
5384 /* CBZ/CBNZ. */
5385 *cost += rtx_cost (inner, cmpcode, 0, speed);
5386
5387 return true;
5388 }
5389 }
5390 else if (cmpcode == LT || cmpcode == GE)
5391 {
5392 /* TBZ/TBNZ. */
5393 if (comparator == const0_rtx)
5394 return true;
5395 }
5396 }
5397 }
5398 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5399 {
5400 /* It's a conditional operation based on the status flags,
5401 so it must be some flavor of CSEL. */
5402
5403 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5404 if (GET_CODE (op1) == NEG
5405 || GET_CODE (op1) == NOT
5406 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5407 op1 = XEXP (op1, 0);
5408
5409 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5410 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5411 return true;
5412 }
5413
5414 /* We don't know what this is, cost all operands. */
5415 return false;
5416 }
5417
5418 /* Calculate the cost of calculating X, storing it in *COST. Result
5419 is true if the total cost of the operation has now been calculated. */
5420 static bool
5421 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5422 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5423 {
5424 rtx op0, op1, op2;
5425 const struct cpu_cost_table *extra_cost
5426 = aarch64_tune_params->insn_extra_cost;
5427 machine_mode mode = GET_MODE (x);
5428
5429 /* By default, assume that everything has equivalent cost to the
5430 cheapest instruction. Any additional costs are applied as a delta
5431 above this default. */
5432 *cost = COSTS_N_INSNS (1);
5433
5434 /* TODO: The cost infrastructure currently does not handle
5435 vector operations. Assume that all vector operations
5436 are equally expensive. */
5437 if (VECTOR_MODE_P (mode))
5438 {
5439 if (speed)
5440 *cost += extra_cost->vect.alu;
5441 return true;
5442 }
5443
5444 switch (code)
5445 {
5446 case SET:
5447 /* The cost depends entirely on the operands to SET. */
5448 *cost = 0;
5449 op0 = SET_DEST (x);
5450 op1 = SET_SRC (x);
5451
5452 switch (GET_CODE (op0))
5453 {
5454 case MEM:
5455 if (speed)
5456 {
5457 rtx address = XEXP (op0, 0);
5458 if (GET_MODE_CLASS (mode) == MODE_INT)
5459 *cost += extra_cost->ldst.store;
5460 else if (mode == SFmode)
5461 *cost += extra_cost->ldst.storef;
5462 else if (mode == DFmode)
5463 *cost += extra_cost->ldst.stored;
5464
5465 *cost +=
5466 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5467 0, speed));
5468 }
5469
5470 *cost += rtx_cost (op1, SET, 1, speed);
5471 return true;
5472
5473 case SUBREG:
5474 if (! REG_P (SUBREG_REG (op0)))
5475 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5476
5477 /* Fall through. */
5478 case REG:
5479 /* const0_rtx is in general free, but we will use an
5480 instruction to set a register to 0. */
5481 if (REG_P (op1) || op1 == const0_rtx)
5482 {
5483 /* The cost is 1 per register copied. */
5484 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5485 / UNITS_PER_WORD;
5486 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5487 }
5488 else
5489 /* Cost is just the cost of the RHS of the set. */
5490 *cost += rtx_cost (op1, SET, 1, speed);
5491 return true;
5492
5493 case ZERO_EXTRACT:
5494 case SIGN_EXTRACT:
5495 /* Bit-field insertion. Strip any redundant widening of
5496 the RHS to meet the width of the target. */
5497 if (GET_CODE (op1) == SUBREG)
5498 op1 = SUBREG_REG (op1);
5499 if ((GET_CODE (op1) == ZERO_EXTEND
5500 || GET_CODE (op1) == SIGN_EXTEND)
5501 && CONST_INT_P (XEXP (op0, 1))
5502 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5503 >= INTVAL (XEXP (op0, 1))))
5504 op1 = XEXP (op1, 0);
5505
5506 if (CONST_INT_P (op1))
5507 {
5508 /* MOV immediate is assumed to always be cheap. */
5509 *cost = COSTS_N_INSNS (1);
5510 }
5511 else
5512 {
5513 /* BFM. */
5514 if (speed)
5515 *cost += extra_cost->alu.bfi;
5516 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5517 }
5518
5519 return true;
5520
5521 default:
5522 /* We can't make sense of this, assume default cost. */
5523 *cost = COSTS_N_INSNS (1);
5524 return false;
5525 }
5526 return false;
5527
5528 case CONST_INT:
5529 /* If an instruction can incorporate a constant within the
5530 instruction, the instruction's expression avoids calling
5531 rtx_cost() on the constant. If rtx_cost() is called on a
5532 constant, then it is usually because the constant must be
5533 moved into a register by one or more instructions.
5534
5535 The exception is constant 0, which can be expressed
5536 as XZR/WZR and is therefore free. The exception to this is
5537 if we have (set (reg) (const0_rtx)) in which case we must cost
5538 the move. However, we can catch that when we cost the SET, so
5539 we don't need to consider that here. */
5540 if (x == const0_rtx)
5541 *cost = 0;
5542 else
5543 {
5544 /* To an approximation, building any other constant is
5545 proportionally expensive to the number of instructions
5546 required to build that constant. This is true whether we
5547 are compiling for SPEED or otherwise. */
5548 *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
5549 (NULL_RTX, x, false, mode));
5550 }
5551 return true;
5552
5553 case CONST_DOUBLE:
5554 if (speed)
5555 {
5556 /* mov[df,sf]_aarch64. */
5557 if (aarch64_float_const_representable_p (x))
5558 /* FMOV (scalar immediate). */
5559 *cost += extra_cost->fp[mode == DFmode].fpconst;
5560 else if (!aarch64_float_const_zero_rtx_p (x))
5561 {
5562 /* This will be a load from memory. */
5563 if (mode == DFmode)
5564 *cost += extra_cost->ldst.loadd;
5565 else
5566 *cost += extra_cost->ldst.loadf;
5567 }
5568 else
5569 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5570 or MOV v0.s[0], wzr - neither of which are modeled by the
5571 cost tables. Just use the default cost. */
5572 {
5573 }
5574 }
5575
5576 return true;
5577
5578 case MEM:
5579 if (speed)
5580 {
5581 /* For loads we want the base cost of a load, plus an
5582 approximation for the additional cost of the addressing
5583 mode. */
5584 rtx address = XEXP (x, 0);
5585 if (GET_MODE_CLASS (mode) == MODE_INT)
5586 *cost += extra_cost->ldst.load;
5587 else if (mode == SFmode)
5588 *cost += extra_cost->ldst.loadf;
5589 else if (mode == DFmode)
5590 *cost += extra_cost->ldst.loadd;
5591
5592 *cost +=
5593 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5594 0, speed));
5595 }
5596
5597 return true;
5598
5599 case NEG:
5600 op0 = XEXP (x, 0);
5601
5602 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5603 {
5604 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5605 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5606 {
5607 /* CSETM. */
5608 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5609 return true;
5610 }
5611
5612 /* Cost this as SUB wzr, X. */
5613 op0 = CONST0_RTX (GET_MODE (x));
5614 op1 = XEXP (x, 0);
5615 goto cost_minus;
5616 }
5617
5618 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5619 {
5620 /* Support (neg(fma...)) as a single instruction only if
5621 sign of zeros is unimportant. This matches the decision
5622 making in aarch64.md. */
5623 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5624 {
5625 /* FNMADD. */
5626 *cost = rtx_cost (op0, NEG, 0, speed);
5627 return true;
5628 }
5629 if (speed)
5630 /* FNEG. */
5631 *cost += extra_cost->fp[mode == DFmode].neg;
5632 return false;
5633 }
5634
5635 return false;
5636
5637 case CLRSB:
5638 case CLZ:
5639 if (speed)
5640 *cost += extra_cost->alu.clz;
5641
5642 return false;
5643
5644 case COMPARE:
5645 op0 = XEXP (x, 0);
5646 op1 = XEXP (x, 1);
5647
5648 if (op1 == const0_rtx
5649 && GET_CODE (op0) == AND)
5650 {
5651 x = op0;
5652 goto cost_logic;
5653 }
5654
5655 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5656 {
5657 /* TODO: A write to the CC flags possibly costs extra, this
5658 needs encoding in the cost tables. */
5659
5660 /* CC_ZESWPmode supports zero extend for free. */
5661 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5662 op0 = XEXP (op0, 0);
5663
5664 /* ANDS. */
5665 if (GET_CODE (op0) == AND)
5666 {
5667 x = op0;
5668 goto cost_logic;
5669 }
5670
5671 if (GET_CODE (op0) == PLUS)
5672 {
5673 /* ADDS (and CMN alias). */
5674 x = op0;
5675 goto cost_plus;
5676 }
5677
5678 if (GET_CODE (op0) == MINUS)
5679 {
5680 /* SUBS. */
5681 x = op0;
5682 goto cost_minus;
5683 }
5684
5685 if (GET_CODE (op1) == NEG)
5686 {
5687 /* CMN. */
5688 if (speed)
5689 *cost += extra_cost->alu.arith;
5690
5691 *cost += rtx_cost (op0, COMPARE, 0, speed);
5692 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5693 return true;
5694 }
5695
5696 /* CMP.
5697
5698 Compare can freely swap the order of operands, and
5699 canonicalization puts the more complex operation first.
5700 But the integer MINUS logic expects the shift/extend
5701 operation in op1. */
5702 if (! (REG_P (op0)
5703 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5704 {
5705 op0 = XEXP (x, 1);
5706 op1 = XEXP (x, 0);
5707 }
5708 goto cost_minus;
5709 }
5710
5711 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5712 {
5713 /* FCMP. */
5714 if (speed)
5715 *cost += extra_cost->fp[mode == DFmode].compare;
5716
5717 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5718 {
5719 /* FCMP supports constant 0.0 for no extra cost. */
5720 return true;
5721 }
5722 return false;
5723 }
5724
5725 return false;
5726
5727 case MINUS:
5728 {
5729 op0 = XEXP (x, 0);
5730 op1 = XEXP (x, 1);
5731
5732 cost_minus:
5733 /* Detect valid immediates. */
5734 if ((GET_MODE_CLASS (mode) == MODE_INT
5735 || (GET_MODE_CLASS (mode) == MODE_CC
5736 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5737 && CONST_INT_P (op1)
5738 && aarch64_uimm12_shift (INTVAL (op1)))
5739 {
5740 *cost += rtx_cost (op0, MINUS, 0, speed);
5741
5742 if (speed)
5743 /* SUB(S) (immediate). */
5744 *cost += extra_cost->alu.arith;
5745 return true;
5746
5747 }
5748
5749 /* Look for SUB (extended register). */
5750 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5751 {
5752 if (speed)
5753 *cost += extra_cost->alu.arith_shift;
5754
5755 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5756 (enum rtx_code) GET_CODE (op1),
5757 0, speed);
5758 return true;
5759 }
5760
5761 rtx new_op1 = aarch64_strip_extend (op1);
5762
5763 /* Cost this as an FMA-alike operation. */
5764 if ((GET_CODE (new_op1) == MULT
5765 || GET_CODE (new_op1) == ASHIFT)
5766 && code != COMPARE)
5767 {
5768 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5769 (enum rtx_code) code,
5770 speed);
5771 *cost += rtx_cost (op0, MINUS, 0, speed);
5772 return true;
5773 }
5774
5775 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5776
5777 if (speed)
5778 {
5779 if (GET_MODE_CLASS (mode) == MODE_INT)
5780 /* SUB(S). */
5781 *cost += extra_cost->alu.arith;
5782 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5783 /* FSUB. */
5784 *cost += extra_cost->fp[mode == DFmode].addsub;
5785 }
5786 return true;
5787 }
5788
5789 case PLUS:
5790 {
5791 rtx new_op0;
5792
5793 op0 = XEXP (x, 0);
5794 op1 = XEXP (x, 1);
5795
5796 cost_plus:
5797 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5798 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5799 {
5800 /* CSINC. */
5801 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5802 *cost += rtx_cost (op1, PLUS, 1, speed);
5803 return true;
5804 }
5805
5806 if (GET_MODE_CLASS (mode) == MODE_INT
5807 && CONST_INT_P (op1)
5808 && aarch64_uimm12_shift (INTVAL (op1)))
5809 {
5810 *cost += rtx_cost (op0, PLUS, 0, speed);
5811
5812 if (speed)
5813 /* ADD (immediate). */
5814 *cost += extra_cost->alu.arith;
5815 return true;
5816 }
5817
5818 /* Look for ADD (extended register). */
5819 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5820 {
5821 if (speed)
5822 *cost += extra_cost->alu.arith_shift;
5823
5824 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5825 (enum rtx_code) GET_CODE (op0),
5826 0, speed);
5827 return true;
5828 }
5829
5830 /* Strip any extend, leave shifts behind as we will
5831 cost them through mult_cost. */
5832 new_op0 = aarch64_strip_extend (op0);
5833
5834 if (GET_CODE (new_op0) == MULT
5835 || GET_CODE (new_op0) == ASHIFT)
5836 {
5837 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5838 speed);
5839 *cost += rtx_cost (op1, PLUS, 1, speed);
5840 return true;
5841 }
5842
5843 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5844 + rtx_cost (op1, PLUS, 1, speed));
5845
5846 if (speed)
5847 {
5848 if (GET_MODE_CLASS (mode) == MODE_INT)
5849 /* ADD. */
5850 *cost += extra_cost->alu.arith;
5851 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5852 /* FADD. */
5853 *cost += extra_cost->fp[mode == DFmode].addsub;
5854 }
5855 return true;
5856 }
5857
5858 case BSWAP:
5859 *cost = COSTS_N_INSNS (1);
5860
5861 if (speed)
5862 *cost += extra_cost->alu.rev;
5863
5864 return false;
5865
5866 case IOR:
5867 if (aarch_rev16_p (x))
5868 {
5869 *cost = COSTS_N_INSNS (1);
5870
5871 if (speed)
5872 *cost += extra_cost->alu.rev;
5873
5874 return true;
5875 }
5876 /* Fall through. */
5877 case XOR:
5878 case AND:
5879 cost_logic:
5880 op0 = XEXP (x, 0);
5881 op1 = XEXP (x, 1);
5882
5883 if (code == AND
5884 && GET_CODE (op0) == MULT
5885 && CONST_INT_P (XEXP (op0, 1))
5886 && CONST_INT_P (op1)
5887 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5888 INTVAL (op1)) != 0)
5889 {
5890 /* This is a UBFM/SBFM. */
5891 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5892 if (speed)
5893 *cost += extra_cost->alu.bfx;
5894 return true;
5895 }
5896
5897 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5898 {
5899 /* We possibly get the immediate for free, this is not
5900 modelled. */
5901 if (CONST_INT_P (op1)
5902 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5903 {
5904 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5905
5906 if (speed)
5907 *cost += extra_cost->alu.logical;
5908
5909 return true;
5910 }
5911 else
5912 {
5913 rtx new_op0 = op0;
5914
5915 /* Handle ORN, EON, or BIC. */
5916 if (GET_CODE (op0) == NOT)
5917 op0 = XEXP (op0, 0);
5918
5919 new_op0 = aarch64_strip_shift (op0);
5920
5921 /* If we had a shift on op0 then this is a logical-shift-
5922 by-register/immediate operation. Otherwise, this is just
5923 a logical operation. */
5924 if (speed)
5925 {
5926 if (new_op0 != op0)
5927 {
5928 /* Shift by immediate. */
5929 if (CONST_INT_P (XEXP (op0, 1)))
5930 *cost += extra_cost->alu.log_shift;
5931 else
5932 *cost += extra_cost->alu.log_shift_reg;
5933 }
5934 else
5935 *cost += extra_cost->alu.logical;
5936 }
5937
5938 /* In both cases we want to cost both operands. */
5939 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5940 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5941
5942 return true;
5943 }
5944 }
5945 return false;
5946
5947 case NOT:
5948 /* MVN. */
5949 if (speed)
5950 *cost += extra_cost->alu.logical;
5951
5952 /* The logical instruction could have the shifted register form,
5953 but the cost is the same if the shift is processed as a separate
5954 instruction, so we don't bother with it here. */
5955 return false;
5956
5957 case ZERO_EXTEND:
5958
5959 op0 = XEXP (x, 0);
5960 /* If a value is written in SI mode, then zero extended to DI
5961 mode, the operation will in general be free as a write to
5962 a 'w' register implicitly zeroes the upper bits of an 'x'
5963 register. However, if this is
5964
5965 (set (reg) (zero_extend (reg)))
5966
5967 we must cost the explicit register move. */
5968 if (mode == DImode
5969 && GET_MODE (op0) == SImode
5970 && outer == SET)
5971 {
5972 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5973
5974 if (!op_cost && speed)
5975 /* MOV. */
5976 *cost += extra_cost->alu.extend;
5977 else
5978 /* Free, the cost is that of the SI mode operation. */
5979 *cost = op_cost;
5980
5981 return true;
5982 }
5983 else if (MEM_P (XEXP (x, 0)))
5984 {
5985 /* All loads can zero extend to any size for free. */
5986 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5987 return true;
5988 }
5989
5990 /* UXTB/UXTH. */
5991 if (speed)
5992 *cost += extra_cost->alu.extend;
5993
5994 return false;
5995
5996 case SIGN_EXTEND:
5997 if (MEM_P (XEXP (x, 0)))
5998 {
5999 /* LDRSH. */
6000 if (speed)
6001 {
6002 rtx address = XEXP (XEXP (x, 0), 0);
6003 *cost += extra_cost->ldst.load_sign_extend;
6004
6005 *cost +=
6006 COSTS_N_INSNS (aarch64_address_cost (address, mode,
6007 0, speed));
6008 }
6009 return true;
6010 }
6011
6012 if (speed)
6013 *cost += extra_cost->alu.extend;
6014 return false;
6015
6016 case ASHIFT:
6017 op0 = XEXP (x, 0);
6018 op1 = XEXP (x, 1);
6019
6020 if (CONST_INT_P (op1))
6021 {
6022 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6023 aliases. */
6024 if (speed)
6025 *cost += extra_cost->alu.shift;
6026
6027 /* We can incorporate zero/sign extend for free. */
6028 if (GET_CODE (op0) == ZERO_EXTEND
6029 || GET_CODE (op0) == SIGN_EXTEND)
6030 op0 = XEXP (op0, 0);
6031
6032 *cost += rtx_cost (op0, ASHIFT, 0, speed);
6033 return true;
6034 }
6035 else
6036 {
6037 /* LSLV. */
6038 if (speed)
6039 *cost += extra_cost->alu.shift_reg;
6040
6041 return false; /* All arguments need to be in registers. */
6042 }
6043
6044 case ROTATE:
6045 case ROTATERT:
6046 case LSHIFTRT:
6047 case ASHIFTRT:
6048 op0 = XEXP (x, 0);
6049 op1 = XEXP (x, 1);
6050
6051 if (CONST_INT_P (op1))
6052 {
6053 /* ASR (immediate) and friends. */
6054 if (speed)
6055 *cost += extra_cost->alu.shift;
6056
6057 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
6058 return true;
6059 }
6060 else
6061 {
6062
6063 /* ASR (register) and friends. */
6064 if (speed)
6065 *cost += extra_cost->alu.shift_reg;
6066
6067 return false; /* All arguments need to be in registers. */
6068 }
6069
6070 case SYMBOL_REF:
6071
6072 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6073 {
6074 /* LDR. */
6075 if (speed)
6076 *cost += extra_cost->ldst.load;
6077 }
6078 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
6079 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
6080 {
6081 /* ADRP, followed by ADD. */
6082 *cost += COSTS_N_INSNS (1);
6083 if (speed)
6084 *cost += 2 * extra_cost->alu.arith;
6085 }
6086 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
6087 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
6088 {
6089 /* ADR. */
6090 if (speed)
6091 *cost += extra_cost->alu.arith;
6092 }
6093
6094 if (flag_pic)
6095 {
6096 /* One extra load instruction, after accessing the GOT. */
6097 *cost += COSTS_N_INSNS (1);
6098 if (speed)
6099 *cost += extra_cost->ldst.load;
6100 }
6101 return true;
6102
6103 case HIGH:
6104 case LO_SUM:
6105 /* ADRP/ADD (immediate). */
6106 if (speed)
6107 *cost += extra_cost->alu.arith;
6108 return true;
6109
6110 case ZERO_EXTRACT:
6111 case SIGN_EXTRACT:
6112 /* UBFX/SBFX. */
6113 if (speed)
6114 *cost += extra_cost->alu.bfx;
6115
6116 /* We can trust that the immediates used will be correct (there
6117 are no by-register forms), so we need only cost op0. */
6118 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
6119 return true;
6120
6121 case MULT:
6122 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
6123 /* aarch64_rtx_mult_cost always handles recursion to its
6124 operands. */
6125 return true;
6126
6127 case MOD:
6128 case UMOD:
6129 if (speed)
6130 {
6131 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
6132 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
6133 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
6134 else if (GET_MODE (x) == DFmode)
6135 *cost += (extra_cost->fp[1].mult
6136 + extra_cost->fp[1].div);
6137 else if (GET_MODE (x) == SFmode)
6138 *cost += (extra_cost->fp[0].mult
6139 + extra_cost->fp[0].div);
6140 }
6141 return false; /* All arguments need to be in registers. */
6142
6143 case DIV:
6144 case UDIV:
6145 case SQRT:
6146 if (speed)
6147 {
6148 if (GET_MODE_CLASS (mode) == MODE_INT)
6149 /* There is no integer SQRT, so only DIV and UDIV can get
6150 here. */
6151 *cost += extra_cost->mult[mode == DImode].idiv;
6152 else
6153 *cost += extra_cost->fp[mode == DFmode].div;
6154 }
6155 return false; /* All arguments need to be in registers. */
6156
6157 case IF_THEN_ELSE:
6158 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
6159 XEXP (x, 2), cost, speed);
6160
6161 case EQ:
6162 case NE:
6163 case GT:
6164 case GTU:
6165 case LT:
6166 case LTU:
6167 case GE:
6168 case GEU:
6169 case LE:
6170 case LEU:
6171
6172 return false; /* All arguments must be in registers. */
6173
6174 case FMA:
6175 op0 = XEXP (x, 0);
6176 op1 = XEXP (x, 1);
6177 op2 = XEXP (x, 2);
6178
6179 if (speed)
6180 *cost += extra_cost->fp[mode == DFmode].fma;
6181
6182 /* FMSUB, FNMADD, and FNMSUB are free. */
6183 if (GET_CODE (op0) == NEG)
6184 op0 = XEXP (op0, 0);
6185
6186 if (GET_CODE (op2) == NEG)
6187 op2 = XEXP (op2, 0);
6188
6189 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6190 and the by-element operand as operand 0. */
6191 if (GET_CODE (op1) == NEG)
6192 op1 = XEXP (op1, 0);
6193
6194 /* Catch vector-by-element operations. The by-element operand can
6195 either be (vec_duplicate (vec_select (x))) or just
6196 (vec_select (x)), depending on whether we are multiplying by
6197 a vector or a scalar.
6198
6199 Canonicalization is not very good in these cases, FMA4 will put the
6200 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6201 if (GET_CODE (op0) == VEC_DUPLICATE)
6202 op0 = XEXP (op0, 0);
6203 else if (GET_CODE (op1) == VEC_DUPLICATE)
6204 op1 = XEXP (op1, 0);
6205
6206 if (GET_CODE (op0) == VEC_SELECT)
6207 op0 = XEXP (op0, 0);
6208 else if (GET_CODE (op1) == VEC_SELECT)
6209 op1 = XEXP (op1, 0);
6210
6211 /* If the remaining parameters are not registers,
6212 get the cost to put them into registers. */
6213 *cost += rtx_cost (op0, FMA, 0, speed);
6214 *cost += rtx_cost (op1, FMA, 1, speed);
6215 *cost += rtx_cost (op2, FMA, 2, speed);
6216 return true;
6217
6218 case FLOAT_EXTEND:
6219 if (speed)
6220 *cost += extra_cost->fp[mode == DFmode].widen;
6221 return false;
6222
6223 case FLOAT_TRUNCATE:
6224 if (speed)
6225 *cost += extra_cost->fp[mode == DFmode].narrow;
6226 return false;
6227
6228 case FIX:
6229 case UNSIGNED_FIX:
6230 x = XEXP (x, 0);
6231 /* Strip the rounding part. They will all be implemented
6232 by the fcvt* family of instructions anyway. */
6233 if (GET_CODE (x) == UNSPEC)
6234 {
6235 unsigned int uns_code = XINT (x, 1);
6236
6237 if (uns_code == UNSPEC_FRINTA
6238 || uns_code == UNSPEC_FRINTM
6239 || uns_code == UNSPEC_FRINTN
6240 || uns_code == UNSPEC_FRINTP
6241 || uns_code == UNSPEC_FRINTZ)
6242 x = XVECEXP (x, 0, 0);
6243 }
6244
6245 if (speed)
6246 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
6247
6248 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
6249 return true;
6250
6251 case ABS:
6252 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6253 {
6254 /* FABS and FNEG are analogous. */
6255 if (speed)
6256 *cost += extra_cost->fp[mode == DFmode].neg;
6257 }
6258 else
6259 {
6260 /* Integer ABS will either be split to
6261 two arithmetic instructions, or will be an ABS
6262 (scalar), which we don't model. */
6263 *cost = COSTS_N_INSNS (2);
6264 if (speed)
6265 *cost += 2 * extra_cost->alu.arith;
6266 }
6267 return false;
6268
6269 case SMAX:
6270 case SMIN:
6271 if (speed)
6272 {
6273 /* FMAXNM/FMINNM/FMAX/FMIN.
6274 TODO: This may not be accurate for all implementations, but
6275 we do not model this in the cost tables. */
6276 *cost += extra_cost->fp[mode == DFmode].addsub;
6277 }
6278 return false;
6279
6280 case UNSPEC:
6281 /* The floating point round to integer frint* instructions. */
6282 if (aarch64_frint_unspec_p (XINT (x, 1)))
6283 {
6284 if (speed)
6285 *cost += extra_cost->fp[mode == DFmode].roundint;
6286
6287 return false;
6288 }
6289
6290 if (XINT (x, 1) == UNSPEC_RBIT)
6291 {
6292 if (speed)
6293 *cost += extra_cost->alu.rev;
6294
6295 return false;
6296 }
6297 break;
6298
6299 case TRUNCATE:
6300
6301 /* Decompose <su>muldi3_highpart. */
6302 if (/* (truncate:DI */
6303 mode == DImode
6304 /* (lshiftrt:TI */
6305 && GET_MODE (XEXP (x, 0)) == TImode
6306 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
6307 /* (mult:TI */
6308 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6309 /* (ANY_EXTEND:TI (reg:DI))
6310 (ANY_EXTEND:TI (reg:DI))) */
6311 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
6312 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
6313 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
6314 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
6315 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
6316 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
6317 /* (const_int 64) */
6318 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
6319 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
6320 {
6321 /* UMULH/SMULH. */
6322 if (speed)
6323 *cost += extra_cost->mult[mode == DImode].extend;
6324 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
6325 MULT, 0, speed);
6326 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
6327 MULT, 1, speed);
6328 return true;
6329 }
6330
6331 /* Fall through. */
6332 default:
6333 break;
6334 }
6335
6336 if (dump_file && (dump_flags & TDF_DETAILS))
6337 fprintf (dump_file,
6338 "\nFailed to cost RTX. Assuming default cost.\n");
6339
6340 return true;
6341 }
6342
6343 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6344 calculated for X. This cost is stored in *COST. Returns true
6345 if the total cost of X was calculated. */
6346 static bool
6347 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
6348 int param, int *cost, bool speed)
6349 {
6350 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
6351
6352 if (dump_file && (dump_flags & TDF_DETAILS))
6353 {
6354 print_rtl_single (dump_file, x);
6355 fprintf (dump_file, "\n%s cost: %d (%s)\n",
6356 speed ? "Hot" : "Cold",
6357 *cost, result ? "final" : "partial");
6358 }
6359
6360 return result;
6361 }
6362
6363 static int
6364 aarch64_register_move_cost (machine_mode mode,
6365 reg_class_t from_i, reg_class_t to_i)
6366 {
6367 enum reg_class from = (enum reg_class) from_i;
6368 enum reg_class to = (enum reg_class) to_i;
6369 const struct cpu_regmove_cost *regmove_cost
6370 = aarch64_tune_params->regmove_cost;
6371
6372 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6373 if (to == CALLER_SAVE_REGS || to == POINTER_REGS)
6374 to = GENERAL_REGS;
6375
6376 if (from == CALLER_SAVE_REGS || from == POINTER_REGS)
6377 from = GENERAL_REGS;
6378
6379 /* Moving between GPR and stack cost is the same as GP2GP. */
6380 if ((from == GENERAL_REGS && to == STACK_REG)
6381 || (to == GENERAL_REGS && from == STACK_REG))
6382 return regmove_cost->GP2GP;
6383
6384 /* To/From the stack register, we move via the gprs. */
6385 if (to == STACK_REG || from == STACK_REG)
6386 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
6387 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
6388
6389 if (GET_MODE_SIZE (mode) == 16)
6390 {
6391 /* 128-bit operations on general registers require 2 instructions. */
6392 if (from == GENERAL_REGS && to == GENERAL_REGS)
6393 return regmove_cost->GP2GP * 2;
6394 else if (from == GENERAL_REGS)
6395 return regmove_cost->GP2FP * 2;
6396 else if (to == GENERAL_REGS)
6397 return regmove_cost->FP2GP * 2;
6398
6399 /* When AdvSIMD instructions are disabled it is not possible to move
6400 a 128-bit value directly between Q registers. This is handled in
6401 secondary reload. A general register is used as a scratch to move
6402 the upper DI value and the lower DI value is moved directly,
6403 hence the cost is the sum of three moves. */
6404 if (! TARGET_SIMD)
6405 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6406
6407 return regmove_cost->FP2FP;
6408 }
6409
6410 if (from == GENERAL_REGS && to == GENERAL_REGS)
6411 return regmove_cost->GP2GP;
6412 else if (from == GENERAL_REGS)
6413 return regmove_cost->GP2FP;
6414 else if (to == GENERAL_REGS)
6415 return regmove_cost->FP2GP;
6416
6417 return regmove_cost->FP2FP;
6418 }
6419
6420 static int
6421 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
6422 reg_class_t rclass ATTRIBUTE_UNUSED,
6423 bool in ATTRIBUTE_UNUSED)
6424 {
6425 return aarch64_tune_params->memmov_cost;
6426 }
6427
6428 /* Return the number of instructions that can be issued per cycle. */
6429 static int
6430 aarch64_sched_issue_rate (void)
6431 {
6432 return aarch64_tune_params->issue_rate;
6433 }
6434
6435 /* Vectorizer cost model target hooks. */
6436
6437 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6438 static int
6439 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6440 tree vectype,
6441 int misalign ATTRIBUTE_UNUSED)
6442 {
6443 unsigned elements;
6444
6445 switch (type_of_cost)
6446 {
6447 case scalar_stmt:
6448 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6449
6450 case scalar_load:
6451 return aarch64_tune_params->vec_costs->scalar_load_cost;
6452
6453 case scalar_store:
6454 return aarch64_tune_params->vec_costs->scalar_store_cost;
6455
6456 case vector_stmt:
6457 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6458
6459 case vector_load:
6460 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6461
6462 case vector_store:
6463 return aarch64_tune_params->vec_costs->vec_store_cost;
6464
6465 case vec_to_scalar:
6466 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6467
6468 case scalar_to_vec:
6469 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6470
6471 case unaligned_load:
6472 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6473
6474 case unaligned_store:
6475 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6476
6477 case cond_branch_taken:
6478 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6479
6480 case cond_branch_not_taken:
6481 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6482
6483 case vec_perm:
6484 case vec_promote_demote:
6485 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6486
6487 case vec_construct:
6488 elements = TYPE_VECTOR_SUBPARTS (vectype);
6489 return elements / 2 + 1;
6490
6491 default:
6492 gcc_unreachable ();
6493 }
6494 }
6495
6496 /* Implement targetm.vectorize.add_stmt_cost. */
6497 static unsigned
6498 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6499 struct _stmt_vec_info *stmt_info, int misalign,
6500 enum vect_cost_model_location where)
6501 {
6502 unsigned *cost = (unsigned *) data;
6503 unsigned retval = 0;
6504
6505 if (flag_vect_cost_model)
6506 {
6507 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6508 int stmt_cost =
6509 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6510
6511 /* Statements in an inner loop relative to the loop being
6512 vectorized are weighted more heavily. The value here is
6513 a function (linear for now) of the loop nest level. */
6514 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6515 {
6516 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6517 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6518 unsigned nest_level = loop_depth (loop);
6519
6520 count *= nest_level;
6521 }
6522
6523 retval = (unsigned) (count * stmt_cost);
6524 cost[where] += retval;
6525 }
6526
6527 return retval;
6528 }
6529
6530 static void initialize_aarch64_code_model (void);
6531
6532 /* Parse the architecture extension string. */
6533
6534 static void
6535 aarch64_parse_extension (char *str)
6536 {
6537 /* The extension string is parsed left to right. */
6538 const struct aarch64_option_extension *opt = NULL;
6539
6540 /* Flag to say whether we are adding or removing an extension. */
6541 int adding_ext = -1;
6542
6543 while (str != NULL && *str != 0)
6544 {
6545 char *ext;
6546 size_t len;
6547
6548 str++;
6549 ext = strchr (str, '+');
6550
6551 if (ext != NULL)
6552 len = ext - str;
6553 else
6554 len = strlen (str);
6555
6556 if (len >= 2 && strncmp (str, "no", 2) == 0)
6557 {
6558 adding_ext = 0;
6559 len -= 2;
6560 str += 2;
6561 }
6562 else if (len > 0)
6563 adding_ext = 1;
6564
6565 if (len == 0)
6566 {
6567 error ("missing feature modifier after %qs", adding_ext ? "+"
6568 : "+no");
6569 return;
6570 }
6571
6572 /* Scan over the extensions table trying to find an exact match. */
6573 for (opt = all_extensions; opt->name != NULL; opt++)
6574 {
6575 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6576 {
6577 /* Add or remove the extension. */
6578 if (adding_ext)
6579 aarch64_isa_flags |= opt->flags_on;
6580 else
6581 aarch64_isa_flags &= ~(opt->flags_off);
6582 break;
6583 }
6584 }
6585
6586 if (opt->name == NULL)
6587 {
6588 /* Extension not found in list. */
6589 error ("unknown feature modifier %qs", str);
6590 return;
6591 }
6592
6593 str = ext;
6594 };
6595
6596 return;
6597 }
6598
6599 /* Parse the ARCH string. */
6600
6601 static void
6602 aarch64_parse_arch (void)
6603 {
6604 char *ext;
6605 const struct processor *arch;
6606 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6607 size_t len;
6608
6609 strcpy (str, aarch64_arch_string);
6610
6611 ext = strchr (str, '+');
6612
6613 if (ext != NULL)
6614 len = ext - str;
6615 else
6616 len = strlen (str);
6617
6618 if (len == 0)
6619 {
6620 error ("missing arch name in -march=%qs", str);
6621 return;
6622 }
6623
6624 /* Loop through the list of supported ARCHs to find a match. */
6625 for (arch = all_architectures; arch->name != NULL; arch++)
6626 {
6627 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6628 {
6629 selected_arch = arch;
6630 aarch64_isa_flags = selected_arch->flags;
6631
6632 if (!selected_cpu)
6633 selected_cpu = &all_cores[selected_arch->core];
6634
6635 if (ext != NULL)
6636 {
6637 /* ARCH string contains at least one extension. */
6638 aarch64_parse_extension (ext);
6639 }
6640
6641 if (strcmp (selected_arch->arch, selected_cpu->arch))
6642 {
6643 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6644 selected_cpu->name, selected_arch->name);
6645 }
6646
6647 return;
6648 }
6649 }
6650
6651 /* ARCH name not found in list. */
6652 error ("unknown value %qs for -march", str);
6653 return;
6654 }
6655
6656 /* Parse the CPU string. */
6657
6658 static void
6659 aarch64_parse_cpu (void)
6660 {
6661 char *ext;
6662 const struct processor *cpu;
6663 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6664 size_t len;
6665
6666 strcpy (str, aarch64_cpu_string);
6667
6668 ext = strchr (str, '+');
6669
6670 if (ext != NULL)
6671 len = ext - str;
6672 else
6673 len = strlen (str);
6674
6675 if (len == 0)
6676 {
6677 error ("missing cpu name in -mcpu=%qs", str);
6678 return;
6679 }
6680
6681 /* Loop through the list of supported CPUs to find a match. */
6682 for (cpu = all_cores; cpu->name != NULL; cpu++)
6683 {
6684 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6685 {
6686 selected_cpu = cpu;
6687 aarch64_isa_flags = selected_cpu->flags;
6688
6689 if (ext != NULL)
6690 {
6691 /* CPU string contains at least one extension. */
6692 aarch64_parse_extension (ext);
6693 }
6694
6695 return;
6696 }
6697 }
6698
6699 /* CPU name not found in list. */
6700 error ("unknown value %qs for -mcpu", str);
6701 return;
6702 }
6703
6704 /* Parse the TUNE string. */
6705
6706 static void
6707 aarch64_parse_tune (void)
6708 {
6709 const struct processor *cpu;
6710 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6711 strcpy (str, aarch64_tune_string);
6712
6713 /* Loop through the list of supported CPUs to find a match. */
6714 for (cpu = all_cores; cpu->name != NULL; cpu++)
6715 {
6716 if (strcmp (cpu->name, str) == 0)
6717 {
6718 selected_tune = cpu;
6719 return;
6720 }
6721 }
6722
6723 /* CPU name not found in list. */
6724 error ("unknown value %qs for -mtune", str);
6725 return;
6726 }
6727
6728
6729 /* Implement TARGET_OPTION_OVERRIDE. */
6730
6731 static void
6732 aarch64_override_options (void)
6733 {
6734 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6735 If either of -march or -mtune is given, they override their
6736 respective component of -mcpu.
6737
6738 So, first parse AARCH64_CPU_STRING, then the others, be careful
6739 with -march as, if -mcpu is not present on the command line, march
6740 must set a sensible default CPU. */
6741 if (aarch64_cpu_string)
6742 {
6743 aarch64_parse_cpu ();
6744 }
6745
6746 if (aarch64_arch_string)
6747 {
6748 aarch64_parse_arch ();
6749 }
6750
6751 if (aarch64_tune_string)
6752 {
6753 aarch64_parse_tune ();
6754 }
6755
6756 #ifndef HAVE_AS_MABI_OPTION
6757 /* The compiler may have been configured with 2.23.* binutils, which does
6758 not have support for ILP32. */
6759 if (TARGET_ILP32)
6760 error ("Assembler does not support -mabi=ilp32");
6761 #endif
6762
6763 initialize_aarch64_code_model ();
6764
6765 aarch64_build_bitmask_table ();
6766
6767 /* This target defaults to strict volatile bitfields. */
6768 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6769 flag_strict_volatile_bitfields = 1;
6770
6771 /* If the user did not specify a processor, choose the default
6772 one for them. This will be the CPU set during configuration using
6773 --with-cpu, otherwise it is "generic". */
6774 if (!selected_cpu)
6775 {
6776 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6777 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6778 }
6779
6780 gcc_assert (selected_cpu);
6781
6782 if (!selected_tune)
6783 selected_tune = selected_cpu;
6784
6785 aarch64_tune_flags = selected_tune->flags;
6786 aarch64_tune = selected_tune->core;
6787 aarch64_tune_params = selected_tune->tune;
6788 aarch64_architecture_version = selected_cpu->architecture_version;
6789
6790 if (aarch64_fix_a53_err835769 == 2)
6791 {
6792 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6793 aarch64_fix_a53_err835769 = 1;
6794 #else
6795 aarch64_fix_a53_err835769 = 0;
6796 #endif
6797 }
6798
6799 /* If not opzimizing for size, set the default
6800 alignment to what the target wants */
6801 if (!optimize_size)
6802 {
6803 if (align_loops <= 0)
6804 align_loops = aarch64_tune_params->loop_align;
6805 if (align_jumps <= 0)
6806 align_jumps = aarch64_tune_params->jump_align;
6807 if (align_functions <= 0)
6808 align_functions = aarch64_tune_params->function_align;
6809 }
6810
6811 aarch64_override_options_after_change ();
6812 }
6813
6814 /* Implement targetm.override_options_after_change. */
6815
6816 static void
6817 aarch64_override_options_after_change (void)
6818 {
6819 if (flag_omit_frame_pointer)
6820 flag_omit_leaf_frame_pointer = false;
6821 else if (flag_omit_leaf_frame_pointer)
6822 flag_omit_frame_pointer = true;
6823 }
6824
6825 static struct machine_function *
6826 aarch64_init_machine_status (void)
6827 {
6828 struct machine_function *machine;
6829 machine = ggc_cleared_alloc<machine_function> ();
6830 return machine;
6831 }
6832
6833 void
6834 aarch64_init_expanders (void)
6835 {
6836 init_machine_status = aarch64_init_machine_status;
6837 }
6838
6839 /* A checking mechanism for the implementation of the various code models. */
6840 static void
6841 initialize_aarch64_code_model (void)
6842 {
6843 if (flag_pic)
6844 {
6845 switch (aarch64_cmodel_var)
6846 {
6847 case AARCH64_CMODEL_TINY:
6848 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6849 break;
6850 case AARCH64_CMODEL_SMALL:
6851 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6852 break;
6853 case AARCH64_CMODEL_LARGE:
6854 sorry ("code model %qs with -f%s", "large",
6855 flag_pic > 1 ? "PIC" : "pic");
6856 default:
6857 gcc_unreachable ();
6858 }
6859 }
6860 else
6861 aarch64_cmodel = aarch64_cmodel_var;
6862 }
6863
6864 /* Return true if SYMBOL_REF X binds locally. */
6865
6866 static bool
6867 aarch64_symbol_binds_local_p (const_rtx x)
6868 {
6869 return (SYMBOL_REF_DECL (x)
6870 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6871 : SYMBOL_REF_LOCAL_P (x));
6872 }
6873
6874 /* Return true if SYMBOL_REF X is thread local */
6875 static bool
6876 aarch64_tls_symbol_p (rtx x)
6877 {
6878 if (! TARGET_HAVE_TLS)
6879 return false;
6880
6881 if (GET_CODE (x) != SYMBOL_REF)
6882 return false;
6883
6884 return SYMBOL_REF_TLS_MODEL (x) != 0;
6885 }
6886
6887 /* Classify a TLS symbol into one of the TLS kinds. */
6888 enum aarch64_symbol_type
6889 aarch64_classify_tls_symbol (rtx x)
6890 {
6891 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6892
6893 switch (tls_kind)
6894 {
6895 case TLS_MODEL_GLOBAL_DYNAMIC:
6896 case TLS_MODEL_LOCAL_DYNAMIC:
6897 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6898
6899 case TLS_MODEL_INITIAL_EXEC:
6900 return SYMBOL_SMALL_GOTTPREL;
6901
6902 case TLS_MODEL_LOCAL_EXEC:
6903 return SYMBOL_SMALL_TPREL;
6904
6905 case TLS_MODEL_EMULATED:
6906 case TLS_MODEL_NONE:
6907 return SYMBOL_FORCE_TO_MEM;
6908
6909 default:
6910 gcc_unreachable ();
6911 }
6912 }
6913
6914 /* Return the method that should be used to access SYMBOL_REF or
6915 LABEL_REF X in context CONTEXT. */
6916
6917 enum aarch64_symbol_type
6918 aarch64_classify_symbol (rtx x, rtx offset,
6919 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6920 {
6921 if (GET_CODE (x) == LABEL_REF)
6922 {
6923 switch (aarch64_cmodel)
6924 {
6925 case AARCH64_CMODEL_LARGE:
6926 return SYMBOL_FORCE_TO_MEM;
6927
6928 case AARCH64_CMODEL_TINY_PIC:
6929 case AARCH64_CMODEL_TINY:
6930 return SYMBOL_TINY_ABSOLUTE;
6931
6932 case AARCH64_CMODEL_SMALL_PIC:
6933 case AARCH64_CMODEL_SMALL:
6934 return SYMBOL_SMALL_ABSOLUTE;
6935
6936 default:
6937 gcc_unreachable ();
6938 }
6939 }
6940
6941 if (GET_CODE (x) == SYMBOL_REF)
6942 {
6943 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6944 return SYMBOL_FORCE_TO_MEM;
6945
6946 if (aarch64_tls_symbol_p (x))
6947 return aarch64_classify_tls_symbol (x);
6948
6949 switch (aarch64_cmodel)
6950 {
6951 case AARCH64_CMODEL_TINY:
6952 /* When we retreive symbol + offset address, we have to make sure
6953 the offset does not cause overflow of the final address. But
6954 we have no way of knowing the address of symbol at compile time
6955 so we can't accurately say if the distance between the PC and
6956 symbol + offset is outside the addressible range of +/-1M in the
6957 TINY code model. So we rely on images not being greater than
6958 1M and cap the offset at 1M and anything beyond 1M will have to
6959 be loaded using an alternative mechanism. */
6960 if (SYMBOL_REF_WEAK (x)
6961 || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575)
6962 return SYMBOL_FORCE_TO_MEM;
6963 return SYMBOL_TINY_ABSOLUTE;
6964
6965 case AARCH64_CMODEL_SMALL:
6966 /* Same reasoning as the tiny code model, but the offset cap here is
6967 4G. */
6968 if (SYMBOL_REF_WEAK (x)
6969 || INTVAL (offset) < (HOST_WIDE_INT) -4294967263
6970 || INTVAL (offset) > (HOST_WIDE_INT) 4294967264)
6971 return SYMBOL_FORCE_TO_MEM;
6972 return SYMBOL_SMALL_ABSOLUTE;
6973
6974 case AARCH64_CMODEL_TINY_PIC:
6975 if (!aarch64_symbol_binds_local_p (x))
6976 return SYMBOL_TINY_GOT;
6977 return SYMBOL_TINY_ABSOLUTE;
6978
6979 case AARCH64_CMODEL_SMALL_PIC:
6980 if (!aarch64_symbol_binds_local_p (x))
6981 return SYMBOL_SMALL_GOT;
6982 return SYMBOL_SMALL_ABSOLUTE;
6983
6984 default:
6985 gcc_unreachable ();
6986 }
6987 }
6988
6989 /* By default push everything into the constant pool. */
6990 return SYMBOL_FORCE_TO_MEM;
6991 }
6992
6993 bool
6994 aarch64_constant_address_p (rtx x)
6995 {
6996 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6997 }
6998
6999 bool
7000 aarch64_legitimate_pic_operand_p (rtx x)
7001 {
7002 if (GET_CODE (x) == SYMBOL_REF
7003 || (GET_CODE (x) == CONST
7004 && GET_CODE (XEXP (x, 0)) == PLUS
7005 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7006 return false;
7007
7008 return true;
7009 }
7010
7011 /* Return true if X holds either a quarter-precision or
7012 floating-point +0.0 constant. */
7013 static bool
7014 aarch64_valid_floating_const (machine_mode mode, rtx x)
7015 {
7016 if (!CONST_DOUBLE_P (x))
7017 return false;
7018
7019 /* TODO: We could handle moving 0.0 to a TFmode register,
7020 but first we would like to refactor the movtf_aarch64
7021 to be more amicable to split moves properly and
7022 correctly gate on TARGET_SIMD. For now - reject all
7023 constants which are not to SFmode or DFmode registers. */
7024 if (!(mode == SFmode || mode == DFmode))
7025 return false;
7026
7027 if (aarch64_float_const_zero_rtx_p (x))
7028 return true;
7029 return aarch64_float_const_representable_p (x);
7030 }
7031
7032 static bool
7033 aarch64_legitimate_constant_p (machine_mode mode, rtx x)
7034 {
7035 /* Do not allow vector struct mode constants. We could support
7036 0 and -1 easily, but they need support in aarch64-simd.md. */
7037 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
7038 return false;
7039
7040 /* This could probably go away because
7041 we now decompose CONST_INTs according to expand_mov_immediate. */
7042 if ((GET_CODE (x) == CONST_VECTOR
7043 && aarch64_simd_valid_immediate (x, mode, false, NULL))
7044 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
7045 return !targetm.cannot_force_const_mem (mode, x);
7046
7047 if (GET_CODE (x) == HIGH
7048 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7049 return true;
7050
7051 return aarch64_constant_address_p (x);
7052 }
7053
7054 rtx
7055 aarch64_load_tp (rtx target)
7056 {
7057 if (!target
7058 || GET_MODE (target) != Pmode
7059 || !register_operand (target, Pmode))
7060 target = gen_reg_rtx (Pmode);
7061
7062 /* Can return in any reg. */
7063 emit_insn (gen_aarch64_load_tp_hard (target));
7064 return target;
7065 }
7066
7067 /* On AAPCS systems, this is the "struct __va_list". */
7068 static GTY(()) tree va_list_type;
7069
7070 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7071 Return the type to use as __builtin_va_list.
7072
7073 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7074
7075 struct __va_list
7076 {
7077 void *__stack;
7078 void *__gr_top;
7079 void *__vr_top;
7080 int __gr_offs;
7081 int __vr_offs;
7082 }; */
7083
7084 static tree
7085 aarch64_build_builtin_va_list (void)
7086 {
7087 tree va_list_name;
7088 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7089
7090 /* Create the type. */
7091 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
7092 /* Give it the required name. */
7093 va_list_name = build_decl (BUILTINS_LOCATION,
7094 TYPE_DECL,
7095 get_identifier ("__va_list"),
7096 va_list_type);
7097 DECL_ARTIFICIAL (va_list_name) = 1;
7098 TYPE_NAME (va_list_type) = va_list_name;
7099 TYPE_STUB_DECL (va_list_type) = va_list_name;
7100
7101 /* Create the fields. */
7102 f_stack = build_decl (BUILTINS_LOCATION,
7103 FIELD_DECL, get_identifier ("__stack"),
7104 ptr_type_node);
7105 f_grtop = build_decl (BUILTINS_LOCATION,
7106 FIELD_DECL, get_identifier ("__gr_top"),
7107 ptr_type_node);
7108 f_vrtop = build_decl (BUILTINS_LOCATION,
7109 FIELD_DECL, get_identifier ("__vr_top"),
7110 ptr_type_node);
7111 f_groff = build_decl (BUILTINS_LOCATION,
7112 FIELD_DECL, get_identifier ("__gr_offs"),
7113 integer_type_node);
7114 f_vroff = build_decl (BUILTINS_LOCATION,
7115 FIELD_DECL, get_identifier ("__vr_offs"),
7116 integer_type_node);
7117
7118 DECL_ARTIFICIAL (f_stack) = 1;
7119 DECL_ARTIFICIAL (f_grtop) = 1;
7120 DECL_ARTIFICIAL (f_vrtop) = 1;
7121 DECL_ARTIFICIAL (f_groff) = 1;
7122 DECL_ARTIFICIAL (f_vroff) = 1;
7123
7124 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
7125 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
7126 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
7127 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
7128 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
7129
7130 TYPE_FIELDS (va_list_type) = f_stack;
7131 DECL_CHAIN (f_stack) = f_grtop;
7132 DECL_CHAIN (f_grtop) = f_vrtop;
7133 DECL_CHAIN (f_vrtop) = f_groff;
7134 DECL_CHAIN (f_groff) = f_vroff;
7135
7136 /* Compute its layout. */
7137 layout_type (va_list_type);
7138
7139 return va_list_type;
7140 }
7141
7142 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7143 static void
7144 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
7145 {
7146 const CUMULATIVE_ARGS *cum;
7147 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7148 tree stack, grtop, vrtop, groff, vroff;
7149 tree t;
7150 int gr_save_area_size;
7151 int vr_save_area_size;
7152 int vr_offset;
7153
7154 cum = &crtl->args.info;
7155 gr_save_area_size
7156 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
7157 vr_save_area_size
7158 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
7159
7160 if (TARGET_GENERAL_REGS_ONLY)
7161 {
7162 if (cum->aapcs_nvrn > 0)
7163 sorry ("%qs and floating point or vector arguments",
7164 "-mgeneral-regs-only");
7165 vr_save_area_size = 0;
7166 }
7167
7168 f_stack = TYPE_FIELDS (va_list_type_node);
7169 f_grtop = DECL_CHAIN (f_stack);
7170 f_vrtop = DECL_CHAIN (f_grtop);
7171 f_groff = DECL_CHAIN (f_vrtop);
7172 f_vroff = DECL_CHAIN (f_groff);
7173
7174 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
7175 NULL_TREE);
7176 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
7177 NULL_TREE);
7178 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
7179 NULL_TREE);
7180 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
7181 NULL_TREE);
7182 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
7183 NULL_TREE);
7184
7185 /* Emit code to initialize STACK, which points to the next varargs stack
7186 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7187 by named arguments. STACK is 8-byte aligned. */
7188 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
7189 if (cum->aapcs_stack_size > 0)
7190 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
7191 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
7192 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7193
7194 /* Emit code to initialize GRTOP, the top of the GR save area.
7195 virtual_incoming_args_rtx should have been 16 byte aligned. */
7196 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
7197 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
7198 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7199
7200 /* Emit code to initialize VRTOP, the top of the VR save area.
7201 This address is gr_save_area_bytes below GRTOP, rounded
7202 down to the next 16-byte boundary. */
7203 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
7204 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
7205 STACK_BOUNDARY / BITS_PER_UNIT);
7206
7207 if (vr_offset)
7208 t = fold_build_pointer_plus_hwi (t, -vr_offset);
7209 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
7210 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7211
7212 /* Emit code to initialize GROFF, the offset from GRTOP of the
7213 next GPR argument. */
7214 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
7215 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
7216 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7217
7218 /* Likewise emit code to initialize VROFF, the offset from FTOP
7219 of the next VR argument. */
7220 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
7221 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
7222 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7223 }
7224
7225 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7226
7227 static tree
7228 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7229 gimple_seq *post_p ATTRIBUTE_UNUSED)
7230 {
7231 tree addr;
7232 bool indirect_p;
7233 bool is_ha; /* is HFA or HVA. */
7234 bool dw_align; /* double-word align. */
7235 machine_mode ag_mode = VOIDmode;
7236 int nregs;
7237 machine_mode mode;
7238
7239 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
7240 tree stack, f_top, f_off, off, arg, roundup, on_stack;
7241 HOST_WIDE_INT size, rsize, adjust, align;
7242 tree t, u, cond1, cond2;
7243
7244 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7245 if (indirect_p)
7246 type = build_pointer_type (type);
7247
7248 mode = TYPE_MODE (type);
7249
7250 f_stack = TYPE_FIELDS (va_list_type_node);
7251 f_grtop = DECL_CHAIN (f_stack);
7252 f_vrtop = DECL_CHAIN (f_grtop);
7253 f_groff = DECL_CHAIN (f_vrtop);
7254 f_vroff = DECL_CHAIN (f_groff);
7255
7256 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
7257 f_stack, NULL_TREE);
7258 size = int_size_in_bytes (type);
7259 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
7260
7261 dw_align = false;
7262 adjust = 0;
7263 if (aarch64_vfp_is_call_or_return_candidate (mode,
7264 type,
7265 &ag_mode,
7266 &nregs,
7267 &is_ha))
7268 {
7269 /* TYPE passed in fp/simd registers. */
7270 if (TARGET_GENERAL_REGS_ONLY)
7271 sorry ("%qs and floating point or vector arguments",
7272 "-mgeneral-regs-only");
7273
7274 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
7275 unshare_expr (valist), f_vrtop, NULL_TREE);
7276 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
7277 unshare_expr (valist), f_vroff, NULL_TREE);
7278
7279 rsize = nregs * UNITS_PER_VREG;
7280
7281 if (is_ha)
7282 {
7283 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
7284 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
7285 }
7286 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
7287 && size < UNITS_PER_VREG)
7288 {
7289 adjust = UNITS_PER_VREG - size;
7290 }
7291 }
7292 else
7293 {
7294 /* TYPE passed in general registers. */
7295 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
7296 unshare_expr (valist), f_grtop, NULL_TREE);
7297 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
7298 unshare_expr (valist), f_groff, NULL_TREE);
7299 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7300 nregs = rsize / UNITS_PER_WORD;
7301
7302 if (align > 8)
7303 dw_align = true;
7304
7305 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7306 && size < UNITS_PER_WORD)
7307 {
7308 adjust = UNITS_PER_WORD - size;
7309 }
7310 }
7311
7312 /* Get a local temporary for the field value. */
7313 off = get_initialized_tmp_var (f_off, pre_p, NULL);
7314
7315 /* Emit code to branch if off >= 0. */
7316 t = build2 (GE_EXPR, boolean_type_node, off,
7317 build_int_cst (TREE_TYPE (off), 0));
7318 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
7319
7320 if (dw_align)
7321 {
7322 /* Emit: offs = (offs + 15) & -16. */
7323 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7324 build_int_cst (TREE_TYPE (off), 15));
7325 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
7326 build_int_cst (TREE_TYPE (off), -16));
7327 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
7328 }
7329 else
7330 roundup = NULL;
7331
7332 /* Update ap.__[g|v]r_offs */
7333 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
7334 build_int_cst (TREE_TYPE (off), rsize));
7335 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
7336
7337 /* String up. */
7338 if (roundup)
7339 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7340
7341 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7342 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
7343 build_int_cst (TREE_TYPE (f_off), 0));
7344 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
7345
7346 /* String up: make sure the assignment happens before the use. */
7347 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
7348 COND_EXPR_ELSE (cond1) = t;
7349
7350 /* Prepare the trees handling the argument that is passed on the stack;
7351 the top level node will store in ON_STACK. */
7352 arg = get_initialized_tmp_var (stack, pre_p, NULL);
7353 if (align > 8)
7354 {
7355 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7356 t = fold_convert (intDI_type_node, arg);
7357 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7358 build_int_cst (TREE_TYPE (t), 15));
7359 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7360 build_int_cst (TREE_TYPE (t), -16));
7361 t = fold_convert (TREE_TYPE (arg), t);
7362 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
7363 }
7364 else
7365 roundup = NULL;
7366 /* Advance ap.__stack */
7367 t = fold_convert (intDI_type_node, arg);
7368 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
7369 build_int_cst (TREE_TYPE (t), size + 7));
7370 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7371 build_int_cst (TREE_TYPE (t), -8));
7372 t = fold_convert (TREE_TYPE (arg), t);
7373 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
7374 /* String up roundup and advance. */
7375 if (roundup)
7376 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
7377 /* String up with arg */
7378 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
7379 /* Big-endianness related address adjustment. */
7380 if (BLOCK_REG_PADDING (mode, type, 1) == downward
7381 && size < UNITS_PER_WORD)
7382 {
7383 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
7384 size_int (UNITS_PER_WORD - size));
7385 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
7386 }
7387
7388 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
7389 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
7390
7391 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7392 t = off;
7393 if (adjust)
7394 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
7395 build_int_cst (TREE_TYPE (off), adjust));
7396
7397 t = fold_convert (sizetype, t);
7398 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
7399
7400 if (is_ha)
7401 {
7402 /* type ha; // treat as "struct {ftype field[n];}"
7403 ... [computing offs]
7404 for (i = 0; i <nregs; ++i, offs += 16)
7405 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7406 return ha; */
7407 int i;
7408 tree tmp_ha, field_t, field_ptr_t;
7409
7410 /* Declare a local variable. */
7411 tmp_ha = create_tmp_var_raw (type, "ha");
7412 gimple_add_tmp_var (tmp_ha);
7413
7414 /* Establish the base type. */
7415 switch (ag_mode)
7416 {
7417 case SFmode:
7418 field_t = float_type_node;
7419 field_ptr_t = float_ptr_type_node;
7420 break;
7421 case DFmode:
7422 field_t = double_type_node;
7423 field_ptr_t = double_ptr_type_node;
7424 break;
7425 case TFmode:
7426 field_t = long_double_type_node;
7427 field_ptr_t = long_double_ptr_type_node;
7428 break;
7429 /* The half precision and quad precision are not fully supported yet. Enable
7430 the following code after the support is complete. Need to find the correct
7431 type node for __fp16 *. */
7432 #if 0
7433 case HFmode:
7434 field_t = float_type_node;
7435 field_ptr_t = float_ptr_type_node;
7436 break;
7437 #endif
7438 case V2SImode:
7439 case V4SImode:
7440 {
7441 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
7442 field_t = build_vector_type_for_mode (innertype, ag_mode);
7443 field_ptr_t = build_pointer_type (field_t);
7444 }
7445 break;
7446 default:
7447 gcc_assert (0);
7448 }
7449
7450 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7451 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7452 addr = t;
7453 t = fold_convert (field_ptr_t, addr);
7454 t = build2 (MODIFY_EXPR, field_t,
7455 build1 (INDIRECT_REF, field_t, tmp_ha),
7456 build1 (INDIRECT_REF, field_t, t));
7457
7458 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7459 for (i = 1; i < nregs; ++i)
7460 {
7461 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7462 u = fold_convert (field_ptr_t, addr);
7463 u = build2 (MODIFY_EXPR, field_t,
7464 build2 (MEM_REF, field_t, tmp_ha,
7465 build_int_cst (field_ptr_t,
7466 (i *
7467 int_size_in_bytes (field_t)))),
7468 build1 (INDIRECT_REF, field_t, u));
7469 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7470 }
7471
7472 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7473 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7474 }
7475
7476 COND_EXPR_ELSE (cond2) = t;
7477 addr = fold_convert (build_pointer_type (type), cond1);
7478 addr = build_va_arg_indirect_ref (addr);
7479
7480 if (indirect_p)
7481 addr = build_va_arg_indirect_ref (addr);
7482
7483 return addr;
7484 }
7485
7486 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7487
7488 static void
7489 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
7490 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7491 int no_rtl)
7492 {
7493 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7494 CUMULATIVE_ARGS local_cum;
7495 int gr_saved, vr_saved;
7496
7497 /* The caller has advanced CUM up to, but not beyond, the last named
7498 argument. Advance a local copy of CUM past the last "real" named
7499 argument, to find out how many registers are left over. */
7500 local_cum = *cum;
7501 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7502
7503 /* Found out how many registers we need to save. */
7504 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7505 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7506
7507 if (TARGET_GENERAL_REGS_ONLY)
7508 {
7509 if (local_cum.aapcs_nvrn > 0)
7510 sorry ("%qs and floating point or vector arguments",
7511 "-mgeneral-regs-only");
7512 vr_saved = 0;
7513 }
7514
7515 if (!no_rtl)
7516 {
7517 if (gr_saved > 0)
7518 {
7519 rtx ptr, mem;
7520
7521 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7522 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7523 - gr_saved * UNITS_PER_WORD);
7524 mem = gen_frame_mem (BLKmode, ptr);
7525 set_mem_alias_set (mem, get_varargs_alias_set ());
7526
7527 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7528 mem, gr_saved);
7529 }
7530 if (vr_saved > 0)
7531 {
7532 /* We can't use move_block_from_reg, because it will use
7533 the wrong mode, storing D regs only. */
7534 machine_mode mode = TImode;
7535 int off, i;
7536
7537 /* Set OFF to the offset from virtual_incoming_args_rtx of
7538 the first vector register. The VR save area lies below
7539 the GR one, and is aligned to 16 bytes. */
7540 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7541 STACK_BOUNDARY / BITS_PER_UNIT);
7542 off -= vr_saved * UNITS_PER_VREG;
7543
7544 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7545 {
7546 rtx ptr, mem;
7547
7548 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7549 mem = gen_frame_mem (mode, ptr);
7550 set_mem_alias_set (mem, get_varargs_alias_set ());
7551 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7552 off += UNITS_PER_VREG;
7553 }
7554 }
7555 }
7556
7557 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7558 any complication of having crtl->args.pretend_args_size changed. */
7559 cfun->machine->frame.saved_varargs_size
7560 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7561 STACK_BOUNDARY / BITS_PER_UNIT)
7562 + vr_saved * UNITS_PER_VREG);
7563 }
7564
7565 static void
7566 aarch64_conditional_register_usage (void)
7567 {
7568 int i;
7569 if (!TARGET_FLOAT)
7570 {
7571 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7572 {
7573 fixed_regs[i] = 1;
7574 call_used_regs[i] = 1;
7575 }
7576 }
7577 }
7578
7579 /* Walk down the type tree of TYPE counting consecutive base elements.
7580 If *MODEP is VOIDmode, then set it to the first valid floating point
7581 type. If a non-floating point type is found, or if a floating point
7582 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7583 otherwise return the count in the sub-tree. */
7584 static int
7585 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
7586 {
7587 machine_mode mode;
7588 HOST_WIDE_INT size;
7589
7590 switch (TREE_CODE (type))
7591 {
7592 case REAL_TYPE:
7593 mode = TYPE_MODE (type);
7594 if (mode != DFmode && mode != SFmode && mode != TFmode)
7595 return -1;
7596
7597 if (*modep == VOIDmode)
7598 *modep = mode;
7599
7600 if (*modep == mode)
7601 return 1;
7602
7603 break;
7604
7605 case COMPLEX_TYPE:
7606 mode = TYPE_MODE (TREE_TYPE (type));
7607 if (mode != DFmode && mode != SFmode && mode != TFmode)
7608 return -1;
7609
7610 if (*modep == VOIDmode)
7611 *modep = mode;
7612
7613 if (*modep == mode)
7614 return 2;
7615
7616 break;
7617
7618 case VECTOR_TYPE:
7619 /* Use V2SImode and V4SImode as representatives of all 64-bit
7620 and 128-bit vector types. */
7621 size = int_size_in_bytes (type);
7622 switch (size)
7623 {
7624 case 8:
7625 mode = V2SImode;
7626 break;
7627 case 16:
7628 mode = V4SImode;
7629 break;
7630 default:
7631 return -1;
7632 }
7633
7634 if (*modep == VOIDmode)
7635 *modep = mode;
7636
7637 /* Vector modes are considered to be opaque: two vectors are
7638 equivalent for the purposes of being homogeneous aggregates
7639 if they are the same size. */
7640 if (*modep == mode)
7641 return 1;
7642
7643 break;
7644
7645 case ARRAY_TYPE:
7646 {
7647 int count;
7648 tree index = TYPE_DOMAIN (type);
7649
7650 /* Can't handle incomplete types nor sizes that are not
7651 fixed. */
7652 if (!COMPLETE_TYPE_P (type)
7653 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7654 return -1;
7655
7656 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7657 if (count == -1
7658 || !index
7659 || !TYPE_MAX_VALUE (index)
7660 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7661 || !TYPE_MIN_VALUE (index)
7662 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7663 || count < 0)
7664 return -1;
7665
7666 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7667 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7668
7669 /* There must be no padding. */
7670 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7671 return -1;
7672
7673 return count;
7674 }
7675
7676 case RECORD_TYPE:
7677 {
7678 int count = 0;
7679 int sub_count;
7680 tree field;
7681
7682 /* Can't handle incomplete types nor sizes that are not
7683 fixed. */
7684 if (!COMPLETE_TYPE_P (type)
7685 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7686 return -1;
7687
7688 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7689 {
7690 if (TREE_CODE (field) != FIELD_DECL)
7691 continue;
7692
7693 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7694 if (sub_count < 0)
7695 return -1;
7696 count += sub_count;
7697 }
7698
7699 /* There must be no padding. */
7700 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7701 return -1;
7702
7703 return count;
7704 }
7705
7706 case UNION_TYPE:
7707 case QUAL_UNION_TYPE:
7708 {
7709 /* These aren't very interesting except in a degenerate case. */
7710 int count = 0;
7711 int sub_count;
7712 tree field;
7713
7714 /* Can't handle incomplete types nor sizes that are not
7715 fixed. */
7716 if (!COMPLETE_TYPE_P (type)
7717 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7718 return -1;
7719
7720 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7721 {
7722 if (TREE_CODE (field) != FIELD_DECL)
7723 continue;
7724
7725 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7726 if (sub_count < 0)
7727 return -1;
7728 count = count > sub_count ? count : sub_count;
7729 }
7730
7731 /* There must be no padding. */
7732 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7733 return -1;
7734
7735 return count;
7736 }
7737
7738 default:
7739 break;
7740 }
7741
7742 return -1;
7743 }
7744
7745 /* Return true if we use LRA instead of reload pass. */
7746 static bool
7747 aarch64_lra_p (void)
7748 {
7749 return aarch64_lra_flag;
7750 }
7751
7752 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7753 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7754 array types. The C99 floating-point complex types are also considered
7755 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7756 types, which are GCC extensions and out of the scope of AAPCS64, are
7757 treated as composite types here as well.
7758
7759 Note that MODE itself is not sufficient in determining whether a type
7760 is such a composite type or not. This is because
7761 stor-layout.c:compute_record_mode may have already changed the MODE
7762 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7763 structure with only one field may have its MODE set to the mode of the
7764 field. Also an integer mode whose size matches the size of the
7765 RECORD_TYPE type may be used to substitute the original mode
7766 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7767 solely relied on. */
7768
7769 static bool
7770 aarch64_composite_type_p (const_tree type,
7771 machine_mode mode)
7772 {
7773 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7774 return true;
7775
7776 if (mode == BLKmode
7777 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7778 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7779 return true;
7780
7781 return false;
7782 }
7783
7784 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7785 type as described in AAPCS64 \S 4.1.2.
7786
7787 See the comment above aarch64_composite_type_p for the notes on MODE. */
7788
7789 static bool
7790 aarch64_short_vector_p (const_tree type,
7791 machine_mode mode)
7792 {
7793 HOST_WIDE_INT size = -1;
7794
7795 if (type && TREE_CODE (type) == VECTOR_TYPE)
7796 size = int_size_in_bytes (type);
7797 else if (!aarch64_composite_type_p (type, mode)
7798 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7799 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7800 size = GET_MODE_SIZE (mode);
7801
7802 return (size == 8 || size == 16) ? true : false;
7803 }
7804
7805 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7806 shall be passed or returned in simd/fp register(s) (providing these
7807 parameter passing registers are available).
7808
7809 Upon successful return, *COUNT returns the number of needed registers,
7810 *BASE_MODE returns the mode of the individual register and when IS_HAF
7811 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7812 floating-point aggregate or a homogeneous short-vector aggregate. */
7813
7814 static bool
7815 aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
7816 const_tree type,
7817 machine_mode *base_mode,
7818 int *count,
7819 bool *is_ha)
7820 {
7821 machine_mode new_mode = VOIDmode;
7822 bool composite_p = aarch64_composite_type_p (type, mode);
7823
7824 if (is_ha != NULL) *is_ha = false;
7825
7826 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7827 || aarch64_short_vector_p (type, mode))
7828 {
7829 *count = 1;
7830 new_mode = mode;
7831 }
7832 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7833 {
7834 if (is_ha != NULL) *is_ha = true;
7835 *count = 2;
7836 new_mode = GET_MODE_INNER (mode);
7837 }
7838 else if (type && composite_p)
7839 {
7840 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7841
7842 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7843 {
7844 if (is_ha != NULL) *is_ha = true;
7845 *count = ag_count;
7846 }
7847 else
7848 return false;
7849 }
7850 else
7851 return false;
7852
7853 *base_mode = new_mode;
7854 return true;
7855 }
7856
7857 /* Implement TARGET_STRUCT_VALUE_RTX. */
7858
7859 static rtx
7860 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7861 int incoming ATTRIBUTE_UNUSED)
7862 {
7863 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7864 }
7865
7866 /* Implements target hook vector_mode_supported_p. */
7867 static bool
7868 aarch64_vector_mode_supported_p (machine_mode mode)
7869 {
7870 if (TARGET_SIMD
7871 && (mode == V4SImode || mode == V8HImode
7872 || mode == V16QImode || mode == V2DImode
7873 || mode == V2SImode || mode == V4HImode
7874 || mode == V8QImode || mode == V2SFmode
7875 || mode == V4SFmode || mode == V2DFmode
7876 || mode == V1DFmode))
7877 return true;
7878
7879 return false;
7880 }
7881
7882 /* Return appropriate SIMD container
7883 for MODE within a vector of WIDTH bits. */
7884 static machine_mode
7885 aarch64_simd_container_mode (machine_mode mode, unsigned width)
7886 {
7887 gcc_assert (width == 64 || width == 128);
7888 if (TARGET_SIMD)
7889 {
7890 if (width == 128)
7891 switch (mode)
7892 {
7893 case DFmode:
7894 return V2DFmode;
7895 case SFmode:
7896 return V4SFmode;
7897 case SImode:
7898 return V4SImode;
7899 case HImode:
7900 return V8HImode;
7901 case QImode:
7902 return V16QImode;
7903 case DImode:
7904 return V2DImode;
7905 default:
7906 break;
7907 }
7908 else
7909 switch (mode)
7910 {
7911 case SFmode:
7912 return V2SFmode;
7913 case SImode:
7914 return V2SImode;
7915 case HImode:
7916 return V4HImode;
7917 case QImode:
7918 return V8QImode;
7919 default:
7920 break;
7921 }
7922 }
7923 return word_mode;
7924 }
7925
7926 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7927 static machine_mode
7928 aarch64_preferred_simd_mode (machine_mode mode)
7929 {
7930 return aarch64_simd_container_mode (mode, 128);
7931 }
7932
7933 /* Return the bitmask of possible vector sizes for the vectorizer
7934 to iterate over. */
7935 static unsigned int
7936 aarch64_autovectorize_vector_sizes (void)
7937 {
7938 return (16 | 8);
7939 }
7940
7941 /* Implement TARGET_MANGLE_TYPE. */
7942
7943 static const char *
7944 aarch64_mangle_type (const_tree type)
7945 {
7946 /* The AArch64 ABI documents say that "__va_list" has to be
7947 managled as if it is in the "std" namespace. */
7948 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7949 return "St9__va_list";
7950
7951 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
7952 builtin types. */
7953 if (TYPE_NAME (type) != NULL)
7954 return aarch64_mangle_builtin_type (type);
7955
7956 /* Use the default mangling. */
7957 return NULL;
7958 }
7959
7960
7961 /* Return true if the rtx_insn contains a MEM RTX somewhere
7962 in it. */
7963
7964 static bool
7965 has_memory_op (rtx_insn *mem_insn)
7966 {
7967 subrtx_iterator::array_type array;
7968 FOR_EACH_SUBRTX (iter, array, PATTERN (mem_insn), ALL)
7969 if (MEM_P (*iter))
7970 return true;
7971
7972 return false;
7973 }
7974
7975 /* Find the first rtx_insn before insn that will generate an assembly
7976 instruction. */
7977
7978 static rtx_insn *
7979 aarch64_prev_real_insn (rtx_insn *insn)
7980 {
7981 if (!insn)
7982 return NULL;
7983
7984 do
7985 {
7986 insn = prev_real_insn (insn);
7987 }
7988 while (insn && recog_memoized (insn) < 0);
7989
7990 return insn;
7991 }
7992
7993 static bool
7994 is_madd_op (enum attr_type t1)
7995 {
7996 unsigned int i;
7997 /* A number of these may be AArch32 only. */
7998 enum attr_type mlatypes[] = {
7999 TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
8000 TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
8001 TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
8002 };
8003
8004 for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
8005 {
8006 if (t1 == mlatypes[i])
8007 return true;
8008 }
8009
8010 return false;
8011 }
8012
8013 /* Check if there is a register dependency between a load and the insn
8014 for which we hold recog_data. */
8015
8016 static bool
8017 dep_between_memop_and_curr (rtx memop)
8018 {
8019 rtx load_reg;
8020 int opno;
8021
8022 gcc_assert (GET_CODE (memop) == SET);
8023
8024 if (!REG_P (SET_DEST (memop)))
8025 return false;
8026
8027 load_reg = SET_DEST (memop);
8028 for (opno = 1; opno < recog_data.n_operands; opno++)
8029 {
8030 rtx operand = recog_data.operand[opno];
8031 if (REG_P (operand)
8032 && reg_overlap_mentioned_p (load_reg, operand))
8033 return true;
8034
8035 }
8036 return false;
8037 }
8038
8039
8040 /* When working around the Cortex-A53 erratum 835769,
8041 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8042 instruction and has a preceding memory instruction such that a NOP
8043 should be inserted between them. */
8044
8045 bool
8046 aarch64_madd_needs_nop (rtx_insn* insn)
8047 {
8048 enum attr_type attr_type;
8049 rtx_insn *prev;
8050 rtx body;
8051
8052 if (!aarch64_fix_a53_err835769)
8053 return false;
8054
8055 if (recog_memoized (insn) < 0)
8056 return false;
8057
8058 attr_type = get_attr_type (insn);
8059 if (!is_madd_op (attr_type))
8060 return false;
8061
8062 prev = aarch64_prev_real_insn (insn);
8063 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8064 Restore recog state to INSN to avoid state corruption. */
8065 extract_constrain_insn_cached (insn);
8066
8067 if (!prev || !has_memory_op (prev))
8068 return false;
8069
8070 body = single_set (prev);
8071
8072 /* If the previous insn is a memory op and there is no dependency between
8073 it and the DImode madd, emit a NOP between them. If body is NULL then we
8074 have a complex memory operation, probably a load/store pair.
8075 Be conservative for now and emit a NOP. */
8076 if (GET_MODE (recog_data.operand[0]) == DImode
8077 && (!body || !dep_between_memop_and_curr (body)))
8078 return true;
8079
8080 return false;
8081
8082 }
8083
8084
8085 /* Implement FINAL_PRESCAN_INSN. */
8086
8087 void
8088 aarch64_final_prescan_insn (rtx_insn *insn)
8089 {
8090 if (aarch64_madd_needs_nop (insn))
8091 fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
8092 }
8093
8094
8095 /* Return the equivalent letter for size. */
8096 static char
8097 sizetochar (int size)
8098 {
8099 switch (size)
8100 {
8101 case 64: return 'd';
8102 case 32: return 's';
8103 case 16: return 'h';
8104 case 8 : return 'b';
8105 default: gcc_unreachable ();
8106 }
8107 }
8108
8109 /* Return true iff x is a uniform vector of floating-point
8110 constants, and the constant can be represented in
8111 quarter-precision form. Note, as aarch64_float_const_representable
8112 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8113 static bool
8114 aarch64_vect_float_const_representable_p (rtx x)
8115 {
8116 int i = 0;
8117 REAL_VALUE_TYPE r0, ri;
8118 rtx x0, xi;
8119
8120 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
8121 return false;
8122
8123 x0 = CONST_VECTOR_ELT (x, 0);
8124 if (!CONST_DOUBLE_P (x0))
8125 return false;
8126
8127 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
8128
8129 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
8130 {
8131 xi = CONST_VECTOR_ELT (x, i);
8132 if (!CONST_DOUBLE_P (xi))
8133 return false;
8134
8135 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
8136 if (!REAL_VALUES_EQUAL (r0, ri))
8137 return false;
8138 }
8139
8140 return aarch64_float_const_representable_p (x0);
8141 }
8142
8143 /* Return true for valid and false for invalid. */
8144 bool
8145 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
8146 struct simd_immediate_info *info)
8147 {
8148 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8149 matches = 1; \
8150 for (i = 0; i < idx; i += (STRIDE)) \
8151 if (!(TEST)) \
8152 matches = 0; \
8153 if (matches) \
8154 { \
8155 immtype = (CLASS); \
8156 elsize = (ELSIZE); \
8157 eshift = (SHIFT); \
8158 emvn = (NEG); \
8159 break; \
8160 }
8161
8162 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
8163 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
8164 unsigned char bytes[16];
8165 int immtype = -1, matches;
8166 unsigned int invmask = inverse ? 0xff : 0;
8167 int eshift, emvn;
8168
8169 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
8170 {
8171 if (! (aarch64_simd_imm_zero_p (op, mode)
8172 || aarch64_vect_float_const_representable_p (op)))
8173 return false;
8174
8175 if (info)
8176 {
8177 info->value = CONST_VECTOR_ELT (op, 0);
8178 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
8179 info->mvn = false;
8180 info->shift = 0;
8181 }
8182
8183 return true;
8184 }
8185
8186 /* Splat vector constant out into a byte vector. */
8187 for (i = 0; i < n_elts; i++)
8188 {
8189 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8190 it must be laid out in the vector register in reverse order. */
8191 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
8192 unsigned HOST_WIDE_INT elpart;
8193 unsigned int part, parts;
8194
8195 if (CONST_INT_P (el))
8196 {
8197 elpart = INTVAL (el);
8198 parts = 1;
8199 }
8200 else if (GET_CODE (el) == CONST_DOUBLE)
8201 {
8202 elpart = CONST_DOUBLE_LOW (el);
8203 parts = 2;
8204 }
8205 else
8206 gcc_unreachable ();
8207
8208 for (part = 0; part < parts; part++)
8209 {
8210 unsigned int byte;
8211 for (byte = 0; byte < innersize; byte++)
8212 {
8213 bytes[idx++] = (elpart & 0xff) ^ invmask;
8214 elpart >>= BITS_PER_UNIT;
8215 }
8216 if (GET_CODE (el) == CONST_DOUBLE)
8217 elpart = CONST_DOUBLE_HIGH (el);
8218 }
8219 }
8220
8221 /* Sanity check. */
8222 gcc_assert (idx == GET_MODE_SIZE (mode));
8223
8224 do
8225 {
8226 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
8227 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
8228
8229 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8230 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8231
8232 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
8233 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8234
8235 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
8236 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
8237
8238 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
8239
8240 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
8241
8242 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
8243 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
8244
8245 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8246 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8247
8248 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
8249 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8250
8251 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
8252 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
8253
8254 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
8255
8256 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
8257
8258 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
8259 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
8260
8261 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
8262 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
8263
8264 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
8265 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
8266
8267 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
8268 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
8269
8270 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
8271
8272 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
8273 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
8274 }
8275 while (0);
8276
8277 if (immtype == -1)
8278 return false;
8279
8280 if (info)
8281 {
8282 info->element_width = elsize;
8283 info->mvn = emvn != 0;
8284 info->shift = eshift;
8285
8286 unsigned HOST_WIDE_INT imm = 0;
8287
8288 if (immtype >= 12 && immtype <= 15)
8289 info->msl = true;
8290
8291 /* Un-invert bytes of recognized vector, if necessary. */
8292 if (invmask != 0)
8293 for (i = 0; i < idx; i++)
8294 bytes[i] ^= invmask;
8295
8296 if (immtype == 17)
8297 {
8298 /* FIXME: Broken on 32-bit H_W_I hosts. */
8299 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
8300
8301 for (i = 0; i < 8; i++)
8302 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
8303 << (i * BITS_PER_UNIT);
8304
8305
8306 info->value = GEN_INT (imm);
8307 }
8308 else
8309 {
8310 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
8311 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
8312
8313 /* Construct 'abcdefgh' because the assembler cannot handle
8314 generic constants. */
8315 if (info->mvn)
8316 imm = ~imm;
8317 imm = (imm >> info->shift) & 0xff;
8318 info->value = GEN_INT (imm);
8319 }
8320 }
8321
8322 return true;
8323 #undef CHECK
8324 }
8325
8326 /* Check of immediate shift constants are within range. */
8327 bool
8328 aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
8329 {
8330 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
8331 if (left)
8332 return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
8333 else
8334 return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
8335 }
8336
8337 /* Return true if X is a uniform vector where all elements
8338 are either the floating-point constant 0.0 or the
8339 integer constant 0. */
8340 bool
8341 aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
8342 {
8343 return x == CONST0_RTX (mode);
8344 }
8345
8346 bool
8347 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
8348 {
8349 HOST_WIDE_INT imm = INTVAL (x);
8350 int i;
8351
8352 for (i = 0; i < 8; i++)
8353 {
8354 unsigned int byte = imm & 0xff;
8355 if (byte != 0xff && byte != 0)
8356 return false;
8357 imm >>= 8;
8358 }
8359
8360 return true;
8361 }
8362
8363 bool
8364 aarch64_mov_operand_p (rtx x,
8365 enum aarch64_symbol_context context,
8366 machine_mode mode)
8367 {
8368 if (GET_CODE (x) == HIGH
8369 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
8370 return true;
8371
8372 if (CONST_INT_P (x))
8373 return true;
8374
8375 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
8376 return true;
8377
8378 return aarch64_classify_symbolic_expression (x, context)
8379 == SYMBOL_TINY_ABSOLUTE;
8380 }
8381
8382 /* Return a const_int vector of VAL. */
8383 rtx
8384 aarch64_simd_gen_const_vector_dup (machine_mode mode, int val)
8385 {
8386 int nunits = GET_MODE_NUNITS (mode);
8387 rtvec v = rtvec_alloc (nunits);
8388 int i;
8389
8390 for (i=0; i < nunits; i++)
8391 RTVEC_ELT (v, i) = GEN_INT (val);
8392
8393 return gen_rtx_CONST_VECTOR (mode, v);
8394 }
8395
8396 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8397
8398 bool
8399 aarch64_simd_scalar_immediate_valid_for_move (rtx op, machine_mode mode)
8400 {
8401 machine_mode vmode;
8402
8403 gcc_assert (!VECTOR_MODE_P (mode));
8404 vmode = aarch64_preferred_simd_mode (mode);
8405 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
8406 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
8407 }
8408
8409 /* Construct and return a PARALLEL RTX vector with elements numbering the
8410 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8411 the vector - from the perspective of the architecture. This does not
8412 line up with GCC's perspective on lane numbers, so we end up with
8413 different masks depending on our target endian-ness. The diagram
8414 below may help. We must draw the distinction when building masks
8415 which select one half of the vector. An instruction selecting
8416 architectural low-lanes for a big-endian target, must be described using
8417 a mask selecting GCC high-lanes.
8418
8419 Big-Endian Little-Endian
8420
8421 GCC 0 1 2 3 3 2 1 0
8422 | x | x | x | x | | x | x | x | x |
8423 Architecture 3 2 1 0 3 2 1 0
8424
8425 Low Mask: { 2, 3 } { 0, 1 }
8426 High Mask: { 0, 1 } { 2, 3 }
8427 */
8428
8429 rtx
8430 aarch64_simd_vect_par_cnst_half (machine_mode mode, bool high)
8431 {
8432 int nunits = GET_MODE_NUNITS (mode);
8433 rtvec v = rtvec_alloc (nunits / 2);
8434 int high_base = nunits / 2;
8435 int low_base = 0;
8436 int base;
8437 rtx t1;
8438 int i;
8439
8440 if (BYTES_BIG_ENDIAN)
8441 base = high ? low_base : high_base;
8442 else
8443 base = high ? high_base : low_base;
8444
8445 for (i = 0; i < nunits / 2; i++)
8446 RTVEC_ELT (v, i) = GEN_INT (base + i);
8447
8448 t1 = gen_rtx_PARALLEL (mode, v);
8449 return t1;
8450 }
8451
8452 /* Check OP for validity as a PARALLEL RTX vector with elements
8453 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8454 from the perspective of the architecture. See the diagram above
8455 aarch64_simd_vect_par_cnst_half for more details. */
8456
8457 bool
8458 aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode,
8459 bool high)
8460 {
8461 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
8462 HOST_WIDE_INT count_op = XVECLEN (op, 0);
8463 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
8464 int i = 0;
8465
8466 if (!VECTOR_MODE_P (mode))
8467 return false;
8468
8469 if (count_op != count_ideal)
8470 return false;
8471
8472 for (i = 0; i < count_ideal; i++)
8473 {
8474 rtx elt_op = XVECEXP (op, 0, i);
8475 rtx elt_ideal = XVECEXP (ideal, 0, i);
8476
8477 if (!CONST_INT_P (elt_op)
8478 || INTVAL (elt_ideal) != INTVAL (elt_op))
8479 return false;
8480 }
8481 return true;
8482 }
8483
8484 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8485 HIGH (exclusive). */
8486 void
8487 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
8488 const_tree exp)
8489 {
8490 HOST_WIDE_INT lane;
8491 gcc_assert (CONST_INT_P (operand));
8492 lane = INTVAL (operand);
8493
8494 if (lane < low || lane >= high)
8495 {
8496 if (exp)
8497 error ("%Klane %ld out of range %ld - %ld", exp, lane, low, high - 1);
8498 else
8499 error ("lane %ld out of range %ld - %ld", lane, low, high - 1);
8500 }
8501 }
8502
8503 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8504 registers). */
8505 void
8506 aarch64_simd_emit_pair_result_insn (machine_mode mode,
8507 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8508 rtx op1)
8509 {
8510 rtx mem = gen_rtx_MEM (mode, destaddr);
8511 rtx tmp1 = gen_reg_rtx (mode);
8512 rtx tmp2 = gen_reg_rtx (mode);
8513
8514 emit_insn (intfn (tmp1, op1, tmp2));
8515
8516 emit_move_insn (mem, tmp1);
8517 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8518 emit_move_insn (mem, tmp2);
8519 }
8520
8521 /* Return TRUE if OP is a valid vector addressing mode. */
8522 bool
8523 aarch64_simd_mem_operand_p (rtx op)
8524 {
8525 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8526 || REG_P (XEXP (op, 0)));
8527 }
8528
8529 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8530 not to early-clobber SRC registers in the process.
8531
8532 We assume that the operands described by SRC and DEST represent a
8533 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8534 number of components into which the copy has been decomposed. */
8535 void
8536 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8537 rtx *src, unsigned int count)
8538 {
8539 unsigned int i;
8540
8541 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8542 || REGNO (operands[0]) < REGNO (operands[1]))
8543 {
8544 for (i = 0; i < count; i++)
8545 {
8546 operands[2 * i] = dest[i];
8547 operands[2 * i + 1] = src[i];
8548 }
8549 }
8550 else
8551 {
8552 for (i = 0; i < count; i++)
8553 {
8554 operands[2 * i] = dest[count - i - 1];
8555 operands[2 * i + 1] = src[count - i - 1];
8556 }
8557 }
8558 }
8559
8560 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8561 one of VSTRUCT modes: OI, CI or XI. */
8562 int
8563 aarch64_simd_attr_length_move (rtx_insn *insn)
8564 {
8565 machine_mode mode;
8566
8567 extract_insn_cached (insn);
8568
8569 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8570 {
8571 mode = GET_MODE (recog_data.operand[0]);
8572 switch (mode)
8573 {
8574 case OImode:
8575 return 8;
8576 case CImode:
8577 return 12;
8578 case XImode:
8579 return 16;
8580 default:
8581 gcc_unreachable ();
8582 }
8583 }
8584 return 4;
8585 }
8586
8587 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8588 alignment of a vector to 128 bits. */
8589 static HOST_WIDE_INT
8590 aarch64_simd_vector_alignment (const_tree type)
8591 {
8592 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8593 return MIN (align, 128);
8594 }
8595
8596 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8597 static bool
8598 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8599 {
8600 if (is_packed)
8601 return false;
8602
8603 /* We guarantee alignment for vectors up to 128-bits. */
8604 if (tree_int_cst_compare (TYPE_SIZE (type),
8605 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8606 return false;
8607
8608 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8609 return true;
8610 }
8611
8612 /* If VALS is a vector constant that can be loaded into a register
8613 using DUP, generate instructions to do so and return an RTX to
8614 assign to the register. Otherwise return NULL_RTX. */
8615 static rtx
8616 aarch64_simd_dup_constant (rtx vals)
8617 {
8618 machine_mode mode = GET_MODE (vals);
8619 machine_mode inner_mode = GET_MODE_INNER (mode);
8620 int n_elts = GET_MODE_NUNITS (mode);
8621 bool all_same = true;
8622 rtx x;
8623 int i;
8624
8625 if (GET_CODE (vals) != CONST_VECTOR)
8626 return NULL_RTX;
8627
8628 for (i = 1; i < n_elts; ++i)
8629 {
8630 x = CONST_VECTOR_ELT (vals, i);
8631 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8632 all_same = false;
8633 }
8634
8635 if (!all_same)
8636 return NULL_RTX;
8637
8638 /* We can load this constant by using DUP and a constant in a
8639 single ARM register. This will be cheaper than a vector
8640 load. */
8641 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8642 return gen_rtx_VEC_DUPLICATE (mode, x);
8643 }
8644
8645
8646 /* Generate code to load VALS, which is a PARALLEL containing only
8647 constants (for vec_init) or CONST_VECTOR, efficiently into a
8648 register. Returns an RTX to copy into the register, or NULL_RTX
8649 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8650 static rtx
8651 aarch64_simd_make_constant (rtx vals)
8652 {
8653 machine_mode mode = GET_MODE (vals);
8654 rtx const_dup;
8655 rtx const_vec = NULL_RTX;
8656 int n_elts = GET_MODE_NUNITS (mode);
8657 int n_const = 0;
8658 int i;
8659
8660 if (GET_CODE (vals) == CONST_VECTOR)
8661 const_vec = vals;
8662 else if (GET_CODE (vals) == PARALLEL)
8663 {
8664 /* A CONST_VECTOR must contain only CONST_INTs and
8665 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8666 Only store valid constants in a CONST_VECTOR. */
8667 for (i = 0; i < n_elts; ++i)
8668 {
8669 rtx x = XVECEXP (vals, 0, i);
8670 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8671 n_const++;
8672 }
8673 if (n_const == n_elts)
8674 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8675 }
8676 else
8677 gcc_unreachable ();
8678
8679 if (const_vec != NULL_RTX
8680 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8681 /* Load using MOVI/MVNI. */
8682 return const_vec;
8683 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8684 /* Loaded using DUP. */
8685 return const_dup;
8686 else if (const_vec != NULL_RTX)
8687 /* Load from constant pool. We can not take advantage of single-cycle
8688 LD1 because we need a PC-relative addressing mode. */
8689 return const_vec;
8690 else
8691 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8692 We can not construct an initializer. */
8693 return NULL_RTX;
8694 }
8695
8696 void
8697 aarch64_expand_vector_init (rtx target, rtx vals)
8698 {
8699 machine_mode mode = GET_MODE (target);
8700 machine_mode inner_mode = GET_MODE_INNER (mode);
8701 int n_elts = GET_MODE_NUNITS (mode);
8702 int n_var = 0, one_var = -1;
8703 bool all_same = true;
8704 rtx x, mem;
8705 int i;
8706
8707 x = XVECEXP (vals, 0, 0);
8708 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8709 n_var = 1, one_var = 0;
8710
8711 for (i = 1; i < n_elts; ++i)
8712 {
8713 x = XVECEXP (vals, 0, i);
8714 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8715 ++n_var, one_var = i;
8716
8717 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8718 all_same = false;
8719 }
8720
8721 if (n_var == 0)
8722 {
8723 rtx constant = aarch64_simd_make_constant (vals);
8724 if (constant != NULL_RTX)
8725 {
8726 emit_move_insn (target, constant);
8727 return;
8728 }
8729 }
8730
8731 /* Splat a single non-constant element if we can. */
8732 if (all_same)
8733 {
8734 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8735 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8736 return;
8737 }
8738
8739 /* One field is non-constant. Load constant then overwrite varying
8740 field. This is more efficient than using the stack. */
8741 if (n_var == 1)
8742 {
8743 rtx copy = copy_rtx (vals);
8744 rtx index = GEN_INT (one_var);
8745 enum insn_code icode;
8746
8747 /* Load constant part of vector, substitute neighboring value for
8748 varying element. */
8749 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8750 aarch64_expand_vector_init (target, copy);
8751
8752 /* Insert variable. */
8753 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8754 icode = optab_handler (vec_set_optab, mode);
8755 gcc_assert (icode != CODE_FOR_nothing);
8756 emit_insn (GEN_FCN (icode) (target, x, index));
8757 return;
8758 }
8759
8760 /* Construct the vector in memory one field at a time
8761 and load the whole vector. */
8762 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8763 for (i = 0; i < n_elts; i++)
8764 emit_move_insn (adjust_address_nv (mem, inner_mode,
8765 i * GET_MODE_SIZE (inner_mode)),
8766 XVECEXP (vals, 0, i));
8767 emit_move_insn (target, mem);
8768
8769 }
8770
8771 static unsigned HOST_WIDE_INT
8772 aarch64_shift_truncation_mask (machine_mode mode)
8773 {
8774 return
8775 (aarch64_vector_mode_supported_p (mode)
8776 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8777 }
8778
8779 #ifndef TLS_SECTION_ASM_FLAG
8780 #define TLS_SECTION_ASM_FLAG 'T'
8781 #endif
8782
8783 void
8784 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8785 tree decl ATTRIBUTE_UNUSED)
8786 {
8787 char flagchars[10], *f = flagchars;
8788
8789 /* If we have already declared this section, we can use an
8790 abbreviated form to switch back to it -- unless this section is
8791 part of a COMDAT groups, in which case GAS requires the full
8792 declaration every time. */
8793 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8794 && (flags & SECTION_DECLARED))
8795 {
8796 fprintf (asm_out_file, "\t.section\t%s\n", name);
8797 return;
8798 }
8799
8800 if (!(flags & SECTION_DEBUG))
8801 *f++ = 'a';
8802 if (flags & SECTION_WRITE)
8803 *f++ = 'w';
8804 if (flags & SECTION_CODE)
8805 *f++ = 'x';
8806 if (flags & SECTION_SMALL)
8807 *f++ = 's';
8808 if (flags & SECTION_MERGE)
8809 *f++ = 'M';
8810 if (flags & SECTION_STRINGS)
8811 *f++ = 'S';
8812 if (flags & SECTION_TLS)
8813 *f++ = TLS_SECTION_ASM_FLAG;
8814 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8815 *f++ = 'G';
8816 *f = '\0';
8817
8818 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8819
8820 if (!(flags & SECTION_NOTYPE))
8821 {
8822 const char *type;
8823 const char *format;
8824
8825 if (flags & SECTION_BSS)
8826 type = "nobits";
8827 else
8828 type = "progbits";
8829
8830 #ifdef TYPE_OPERAND_FMT
8831 format = "," TYPE_OPERAND_FMT;
8832 #else
8833 format = ",@%s";
8834 #endif
8835
8836 fprintf (asm_out_file, format, type);
8837
8838 if (flags & SECTION_ENTSIZE)
8839 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8840 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8841 {
8842 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8843 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8844 else
8845 fprintf (asm_out_file, ",%s,comdat",
8846 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8847 }
8848 }
8849
8850 putc ('\n', asm_out_file);
8851 }
8852
8853 /* Select a format to encode pointers in exception handling data. */
8854 int
8855 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8856 {
8857 int type;
8858 switch (aarch64_cmodel)
8859 {
8860 case AARCH64_CMODEL_TINY:
8861 case AARCH64_CMODEL_TINY_PIC:
8862 case AARCH64_CMODEL_SMALL:
8863 case AARCH64_CMODEL_SMALL_PIC:
8864 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8865 for everything. */
8866 type = DW_EH_PE_sdata4;
8867 break;
8868 default:
8869 /* No assumptions here. 8-byte relocs required. */
8870 type = DW_EH_PE_sdata8;
8871 break;
8872 }
8873 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8874 }
8875
8876 /* Emit load exclusive. */
8877
8878 static void
8879 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
8880 rtx mem, rtx model_rtx)
8881 {
8882 rtx (*gen) (rtx, rtx, rtx);
8883
8884 switch (mode)
8885 {
8886 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8887 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8888 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8889 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8890 default:
8891 gcc_unreachable ();
8892 }
8893
8894 emit_insn (gen (rval, mem, model_rtx));
8895 }
8896
8897 /* Emit store exclusive. */
8898
8899 static void
8900 aarch64_emit_store_exclusive (machine_mode mode, rtx bval,
8901 rtx rval, rtx mem, rtx model_rtx)
8902 {
8903 rtx (*gen) (rtx, rtx, rtx, rtx);
8904
8905 switch (mode)
8906 {
8907 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8908 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8909 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8910 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8911 default:
8912 gcc_unreachable ();
8913 }
8914
8915 emit_insn (gen (bval, rval, mem, model_rtx));
8916 }
8917
8918 /* Mark the previous jump instruction as unlikely. */
8919
8920 static void
8921 aarch64_emit_unlikely_jump (rtx insn)
8922 {
8923 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8924
8925 insn = emit_jump_insn (insn);
8926 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8927 }
8928
8929 /* Expand a compare and swap pattern. */
8930
8931 void
8932 aarch64_expand_compare_and_swap (rtx operands[])
8933 {
8934 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8935 machine_mode mode, cmp_mode;
8936 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8937
8938 bval = operands[0];
8939 rval = operands[1];
8940 mem = operands[2];
8941 oldval = operands[3];
8942 newval = operands[4];
8943 is_weak = operands[5];
8944 mod_s = operands[6];
8945 mod_f = operands[7];
8946 mode = GET_MODE (mem);
8947 cmp_mode = mode;
8948
8949 /* Normally the succ memory model must be stronger than fail, but in the
8950 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8951 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8952
8953 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8954 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8955 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8956
8957 switch (mode)
8958 {
8959 case QImode:
8960 case HImode:
8961 /* For short modes, we're going to perform the comparison in SImode,
8962 so do the zero-extension now. */
8963 cmp_mode = SImode;
8964 rval = gen_reg_rtx (SImode);
8965 oldval = convert_modes (SImode, mode, oldval, true);
8966 /* Fall through. */
8967
8968 case SImode:
8969 case DImode:
8970 /* Force the value into a register if needed. */
8971 if (!aarch64_plus_operand (oldval, mode))
8972 oldval = force_reg (cmp_mode, oldval);
8973 break;
8974
8975 default:
8976 gcc_unreachable ();
8977 }
8978
8979 switch (mode)
8980 {
8981 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8982 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8983 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8984 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8985 default:
8986 gcc_unreachable ();
8987 }
8988
8989 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8990
8991 if (mode == QImode || mode == HImode)
8992 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8993
8994 x = gen_rtx_REG (CCmode, CC_REGNUM);
8995 x = gen_rtx_EQ (SImode, x, const0_rtx);
8996 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8997 }
8998
8999 /* Split a compare and swap pattern. */
9000
9001 void
9002 aarch64_split_compare_and_swap (rtx operands[])
9003 {
9004 rtx rval, mem, oldval, newval, scratch;
9005 machine_mode mode;
9006 bool is_weak;
9007 rtx_code_label *label1, *label2;
9008 rtx x, cond;
9009
9010 rval = operands[0];
9011 mem = operands[1];
9012 oldval = operands[2];
9013 newval = operands[3];
9014 is_weak = (operands[4] != const0_rtx);
9015 scratch = operands[7];
9016 mode = GET_MODE (mem);
9017
9018 label1 = NULL;
9019 if (!is_weak)
9020 {
9021 label1 = gen_label_rtx ();
9022 emit_label (label1);
9023 }
9024 label2 = gen_label_rtx ();
9025
9026 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
9027
9028 cond = aarch64_gen_compare_reg (NE, rval, oldval);
9029 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9030 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9031 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
9032 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9033
9034 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
9035
9036 if (!is_weak)
9037 {
9038 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
9039 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9040 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
9041 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9042 }
9043 else
9044 {
9045 cond = gen_rtx_REG (CCmode, CC_REGNUM);
9046 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
9047 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
9048 }
9049
9050 emit_label (label2);
9051 }
9052
9053 /* Split an atomic operation. */
9054
9055 void
9056 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
9057 rtx value, rtx model_rtx, rtx cond)
9058 {
9059 machine_mode mode = GET_MODE (mem);
9060 machine_mode wmode = (mode == DImode ? DImode : SImode);
9061 rtx_code_label *label;
9062 rtx x;
9063
9064 label = gen_label_rtx ();
9065 emit_label (label);
9066
9067 if (new_out)
9068 new_out = gen_lowpart (wmode, new_out);
9069 if (old_out)
9070 old_out = gen_lowpart (wmode, old_out);
9071 else
9072 old_out = new_out;
9073 value = simplify_gen_subreg (wmode, value, mode, 0);
9074
9075 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
9076
9077 switch (code)
9078 {
9079 case SET:
9080 new_out = value;
9081 break;
9082
9083 case NOT:
9084 x = gen_rtx_AND (wmode, old_out, value);
9085 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9086 x = gen_rtx_NOT (wmode, new_out);
9087 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9088 break;
9089
9090 case MINUS:
9091 if (CONST_INT_P (value))
9092 {
9093 value = GEN_INT (-INTVAL (value));
9094 code = PLUS;
9095 }
9096 /* Fall through. */
9097
9098 default:
9099 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
9100 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
9101 break;
9102 }
9103
9104 aarch64_emit_store_exclusive (mode, cond, mem,
9105 gen_lowpart (mode, new_out), model_rtx);
9106
9107 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
9108 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
9109 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
9110 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
9111 }
9112
9113 static void
9114 aarch64_print_extension (void)
9115 {
9116 const struct aarch64_option_extension *opt = NULL;
9117
9118 for (opt = all_extensions; opt->name != NULL; opt++)
9119 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
9120 asm_fprintf (asm_out_file, "+%s", opt->name);
9121
9122 asm_fprintf (asm_out_file, "\n");
9123 }
9124
9125 static void
9126 aarch64_start_file (void)
9127 {
9128 if (selected_arch)
9129 {
9130 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
9131 aarch64_print_extension ();
9132 }
9133 else if (selected_cpu)
9134 {
9135 const char *truncated_name
9136 = aarch64_rewrite_selected_cpu (selected_cpu->name);
9137 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
9138 aarch64_print_extension ();
9139 }
9140 default_file_start();
9141 }
9142
9143 /* Target hook for c_mode_for_suffix. */
9144 static machine_mode
9145 aarch64_c_mode_for_suffix (char suffix)
9146 {
9147 if (suffix == 'q')
9148 return TFmode;
9149
9150 return VOIDmode;
9151 }
9152
9153 /* We can only represent floating point constants which will fit in
9154 "quarter-precision" values. These values are characterised by
9155 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9156 by:
9157
9158 (-1)^s * (n/16) * 2^r
9159
9160 Where:
9161 's' is the sign bit.
9162 'n' is an integer in the range 16 <= n <= 31.
9163 'r' is an integer in the range -3 <= r <= 4. */
9164
9165 /* Return true iff X can be represented by a quarter-precision
9166 floating point immediate operand X. Note, we cannot represent 0.0. */
9167 bool
9168 aarch64_float_const_representable_p (rtx x)
9169 {
9170 /* This represents our current view of how many bits
9171 make up the mantissa. */
9172 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
9173 int exponent;
9174 unsigned HOST_WIDE_INT mantissa, mask;
9175 REAL_VALUE_TYPE r, m;
9176 bool fail;
9177
9178 if (!CONST_DOUBLE_P (x))
9179 return false;
9180
9181 if (GET_MODE (x) == VOIDmode)
9182 return false;
9183
9184 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9185
9186 /* We cannot represent infinities, NaNs or +/-zero. We won't
9187 know if we have +zero until we analyse the mantissa, but we
9188 can reject the other invalid values. */
9189 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
9190 || REAL_VALUE_MINUS_ZERO (r))
9191 return false;
9192
9193 /* Extract exponent. */
9194 r = real_value_abs (&r);
9195 exponent = REAL_EXP (&r);
9196
9197 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9198 highest (sign) bit, with a fixed binary point at bit point_pos.
9199 m1 holds the low part of the mantissa, m2 the high part.
9200 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9201 bits for the mantissa, this can fail (low bits will be lost). */
9202 real_ldexp (&m, &r, point_pos - exponent);
9203 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
9204
9205 /* If the low part of the mantissa has bits set we cannot represent
9206 the value. */
9207 if (w.elt (0) != 0)
9208 return false;
9209 /* We have rejected the lower HOST_WIDE_INT, so update our
9210 understanding of how many bits lie in the mantissa and
9211 look only at the high HOST_WIDE_INT. */
9212 mantissa = w.elt (1);
9213 point_pos -= HOST_BITS_PER_WIDE_INT;
9214
9215 /* We can only represent values with a mantissa of the form 1.xxxx. */
9216 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
9217 if ((mantissa & mask) != 0)
9218 return false;
9219
9220 /* Having filtered unrepresentable values, we may now remove all
9221 but the highest 5 bits. */
9222 mantissa >>= point_pos - 5;
9223
9224 /* We cannot represent the value 0.0, so reject it. This is handled
9225 elsewhere. */
9226 if (mantissa == 0)
9227 return false;
9228
9229 /* Then, as bit 4 is always set, we can mask it off, leaving
9230 the mantissa in the range [0, 15]. */
9231 mantissa &= ~(1 << 4);
9232 gcc_assert (mantissa <= 15);
9233
9234 /* GCC internally does not use IEEE754-like encoding (where normalized
9235 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9236 Our mantissa values are shifted 4 places to the left relative to
9237 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9238 by 5 places to correct for GCC's representation. */
9239 exponent = 5 - exponent;
9240
9241 return (exponent >= 0 && exponent <= 7);
9242 }
9243
9244 char*
9245 aarch64_output_simd_mov_immediate (rtx const_vector,
9246 machine_mode mode,
9247 unsigned width)
9248 {
9249 bool is_valid;
9250 static char templ[40];
9251 const char *mnemonic;
9252 const char *shift_op;
9253 unsigned int lane_count = 0;
9254 char element_char;
9255
9256 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
9257
9258 /* This will return true to show const_vector is legal for use as either
9259 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9260 also update INFO to show how the immediate should be generated. */
9261 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
9262 gcc_assert (is_valid);
9263
9264 element_char = sizetochar (info.element_width);
9265 lane_count = width / info.element_width;
9266
9267 mode = GET_MODE_INNER (mode);
9268 if (mode == SFmode || mode == DFmode)
9269 {
9270 gcc_assert (info.shift == 0 && ! info.mvn);
9271 if (aarch64_float_const_zero_rtx_p (info.value))
9272 info.value = GEN_INT (0);
9273 else
9274 {
9275 #define buf_size 20
9276 REAL_VALUE_TYPE r;
9277 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
9278 char float_buf[buf_size] = {'\0'};
9279 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
9280 #undef buf_size
9281
9282 if (lane_count == 1)
9283 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
9284 else
9285 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
9286 lane_count, element_char, float_buf);
9287 return templ;
9288 }
9289 }
9290
9291 mnemonic = info.mvn ? "mvni" : "movi";
9292 shift_op = info.msl ? "msl" : "lsl";
9293
9294 if (lane_count == 1)
9295 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
9296 mnemonic, UINTVAL (info.value));
9297 else if (info.shift)
9298 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9299 ", %s %d", mnemonic, lane_count, element_char,
9300 UINTVAL (info.value), shift_op, info.shift);
9301 else
9302 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
9303 mnemonic, lane_count, element_char, UINTVAL (info.value));
9304 return templ;
9305 }
9306
9307 char*
9308 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
9309 machine_mode mode)
9310 {
9311 machine_mode vmode;
9312
9313 gcc_assert (!VECTOR_MODE_P (mode));
9314 vmode = aarch64_simd_container_mode (mode, 64);
9315 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
9316 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
9317 }
9318
9319 /* Split operands into moves from op[1] + op[2] into op[0]. */
9320
9321 void
9322 aarch64_split_combinev16qi (rtx operands[3])
9323 {
9324 unsigned int dest = REGNO (operands[0]);
9325 unsigned int src1 = REGNO (operands[1]);
9326 unsigned int src2 = REGNO (operands[2]);
9327 machine_mode halfmode = GET_MODE (operands[1]);
9328 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
9329 rtx destlo, desthi;
9330
9331 gcc_assert (halfmode == V16QImode);
9332
9333 if (src1 == dest && src2 == dest + halfregs)
9334 {
9335 /* No-op move. Can't split to nothing; emit something. */
9336 emit_note (NOTE_INSN_DELETED);
9337 return;
9338 }
9339
9340 /* Preserve register attributes for variable tracking. */
9341 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
9342 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
9343 GET_MODE_SIZE (halfmode));
9344
9345 /* Special case of reversed high/low parts. */
9346 if (reg_overlap_mentioned_p (operands[2], destlo)
9347 && reg_overlap_mentioned_p (operands[1], desthi))
9348 {
9349 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9350 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
9351 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
9352 }
9353 else if (!reg_overlap_mentioned_p (operands[2], destlo))
9354 {
9355 /* Try to avoid unnecessary moves if part of the result
9356 is in the right place already. */
9357 if (src1 != dest)
9358 emit_move_insn (destlo, operands[1]);
9359 if (src2 != dest + halfregs)
9360 emit_move_insn (desthi, operands[2]);
9361 }
9362 else
9363 {
9364 if (src2 != dest + halfregs)
9365 emit_move_insn (desthi, operands[2]);
9366 if (src1 != dest)
9367 emit_move_insn (destlo, operands[1]);
9368 }
9369 }
9370
9371 /* vec_perm support. */
9372
9373 #define MAX_VECT_LEN 16
9374
9375 struct expand_vec_perm_d
9376 {
9377 rtx target, op0, op1;
9378 unsigned char perm[MAX_VECT_LEN];
9379 machine_mode vmode;
9380 unsigned char nelt;
9381 bool one_vector_p;
9382 bool testing_p;
9383 };
9384
9385 /* Generate a variable permutation. */
9386
9387 static void
9388 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
9389 {
9390 machine_mode vmode = GET_MODE (target);
9391 bool one_vector_p = rtx_equal_p (op0, op1);
9392
9393 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
9394 gcc_checking_assert (GET_MODE (op0) == vmode);
9395 gcc_checking_assert (GET_MODE (op1) == vmode);
9396 gcc_checking_assert (GET_MODE (sel) == vmode);
9397 gcc_checking_assert (TARGET_SIMD);
9398
9399 if (one_vector_p)
9400 {
9401 if (vmode == V8QImode)
9402 {
9403 /* Expand the argument to a V16QI mode by duplicating it. */
9404 rtx pair = gen_reg_rtx (V16QImode);
9405 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
9406 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9407 }
9408 else
9409 {
9410 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
9411 }
9412 }
9413 else
9414 {
9415 rtx pair;
9416
9417 if (vmode == V8QImode)
9418 {
9419 pair = gen_reg_rtx (V16QImode);
9420 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
9421 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
9422 }
9423 else
9424 {
9425 pair = gen_reg_rtx (OImode);
9426 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
9427 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
9428 }
9429 }
9430 }
9431
9432 void
9433 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
9434 {
9435 machine_mode vmode = GET_MODE (target);
9436 unsigned int nelt = GET_MODE_NUNITS (vmode);
9437 bool one_vector_p = rtx_equal_p (op0, op1);
9438 rtx mask;
9439
9440 /* The TBL instruction does not use a modulo index, so we must take care
9441 of that ourselves. */
9442 mask = aarch64_simd_gen_const_vector_dup (vmode,
9443 one_vector_p ? nelt - 1 : 2 * nelt - 1);
9444 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
9445
9446 /* For big-endian, we also need to reverse the index within the vector
9447 (but not which vector). */
9448 if (BYTES_BIG_ENDIAN)
9449 {
9450 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9451 if (!one_vector_p)
9452 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
9453 sel = expand_simple_binop (vmode, XOR, sel, mask,
9454 NULL, 0, OPTAB_LIB_WIDEN);
9455 }
9456 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
9457 }
9458
9459 /* Recognize patterns suitable for the TRN instructions. */
9460 static bool
9461 aarch64_evpc_trn (struct expand_vec_perm_d *d)
9462 {
9463 unsigned int i, odd, mask, nelt = d->nelt;
9464 rtx out, in0, in1, x;
9465 rtx (*gen) (rtx, rtx, rtx);
9466 machine_mode vmode = d->vmode;
9467
9468 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9469 return false;
9470
9471 /* Note that these are little-endian tests.
9472 We correct for big-endian later. */
9473 if (d->perm[0] == 0)
9474 odd = 0;
9475 else if (d->perm[0] == 1)
9476 odd = 1;
9477 else
9478 return false;
9479 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9480
9481 for (i = 0; i < nelt; i += 2)
9482 {
9483 if (d->perm[i] != i + odd)
9484 return false;
9485 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9486 return false;
9487 }
9488
9489 /* Success! */
9490 if (d->testing_p)
9491 return true;
9492
9493 in0 = d->op0;
9494 in1 = d->op1;
9495 if (BYTES_BIG_ENDIAN)
9496 {
9497 x = in0, in0 = in1, in1 = x;
9498 odd = !odd;
9499 }
9500 out = d->target;
9501
9502 if (odd)
9503 {
9504 switch (vmode)
9505 {
9506 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9507 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9508 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9509 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9510 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9511 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9512 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9513 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9514 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9515 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9516 default:
9517 return false;
9518 }
9519 }
9520 else
9521 {
9522 switch (vmode)
9523 {
9524 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9525 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9526 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9527 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9528 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9529 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9530 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9531 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9532 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9533 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9534 default:
9535 return false;
9536 }
9537 }
9538
9539 emit_insn (gen (out, in0, in1));
9540 return true;
9541 }
9542
9543 /* Recognize patterns suitable for the UZP instructions. */
9544 static bool
9545 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9546 {
9547 unsigned int i, odd, mask, nelt = d->nelt;
9548 rtx out, in0, in1, x;
9549 rtx (*gen) (rtx, rtx, rtx);
9550 machine_mode vmode = d->vmode;
9551
9552 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9553 return false;
9554
9555 /* Note that these are little-endian tests.
9556 We correct for big-endian later. */
9557 if (d->perm[0] == 0)
9558 odd = 0;
9559 else if (d->perm[0] == 1)
9560 odd = 1;
9561 else
9562 return false;
9563 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9564
9565 for (i = 0; i < nelt; i++)
9566 {
9567 unsigned elt = (i * 2 + odd) & mask;
9568 if (d->perm[i] != elt)
9569 return false;
9570 }
9571
9572 /* Success! */
9573 if (d->testing_p)
9574 return true;
9575
9576 in0 = d->op0;
9577 in1 = d->op1;
9578 if (BYTES_BIG_ENDIAN)
9579 {
9580 x = in0, in0 = in1, in1 = x;
9581 odd = !odd;
9582 }
9583 out = d->target;
9584
9585 if (odd)
9586 {
9587 switch (vmode)
9588 {
9589 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9590 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9591 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9592 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9593 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9594 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9595 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9596 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9597 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9598 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9599 default:
9600 return false;
9601 }
9602 }
9603 else
9604 {
9605 switch (vmode)
9606 {
9607 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9608 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9609 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9610 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9611 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9612 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9613 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9614 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9615 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9616 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9617 default:
9618 return false;
9619 }
9620 }
9621
9622 emit_insn (gen (out, in0, in1));
9623 return true;
9624 }
9625
9626 /* Recognize patterns suitable for the ZIP instructions. */
9627 static bool
9628 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9629 {
9630 unsigned int i, high, mask, nelt = d->nelt;
9631 rtx out, in0, in1, x;
9632 rtx (*gen) (rtx, rtx, rtx);
9633 machine_mode vmode = d->vmode;
9634
9635 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9636 return false;
9637
9638 /* Note that these are little-endian tests.
9639 We correct for big-endian later. */
9640 high = nelt / 2;
9641 if (d->perm[0] == high)
9642 /* Do Nothing. */
9643 ;
9644 else if (d->perm[0] == 0)
9645 high = 0;
9646 else
9647 return false;
9648 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9649
9650 for (i = 0; i < nelt / 2; i++)
9651 {
9652 unsigned elt = (i + high) & mask;
9653 if (d->perm[i * 2] != elt)
9654 return false;
9655 elt = (elt + nelt) & mask;
9656 if (d->perm[i * 2 + 1] != elt)
9657 return false;
9658 }
9659
9660 /* Success! */
9661 if (d->testing_p)
9662 return true;
9663
9664 in0 = d->op0;
9665 in1 = d->op1;
9666 if (BYTES_BIG_ENDIAN)
9667 {
9668 x = in0, in0 = in1, in1 = x;
9669 high = !high;
9670 }
9671 out = d->target;
9672
9673 if (high)
9674 {
9675 switch (vmode)
9676 {
9677 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9678 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9679 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9680 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9681 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9682 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9683 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9684 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9685 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9686 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9687 default:
9688 return false;
9689 }
9690 }
9691 else
9692 {
9693 switch (vmode)
9694 {
9695 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9696 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9697 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9698 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9699 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9700 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9701 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9702 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9703 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9704 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9705 default:
9706 return false;
9707 }
9708 }
9709
9710 emit_insn (gen (out, in0, in1));
9711 return true;
9712 }
9713
9714 /* Recognize patterns for the EXT insn. */
9715
9716 static bool
9717 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9718 {
9719 unsigned int i, nelt = d->nelt;
9720 rtx (*gen) (rtx, rtx, rtx, rtx);
9721 rtx offset;
9722
9723 unsigned int location = d->perm[0]; /* Always < nelt. */
9724
9725 /* Check if the extracted indices are increasing by one. */
9726 for (i = 1; i < nelt; i++)
9727 {
9728 unsigned int required = location + i;
9729 if (d->one_vector_p)
9730 {
9731 /* We'll pass the same vector in twice, so allow indices to wrap. */
9732 required &= (nelt - 1);
9733 }
9734 if (d->perm[i] != required)
9735 return false;
9736 }
9737
9738 switch (d->vmode)
9739 {
9740 case V16QImode: gen = gen_aarch64_extv16qi; break;
9741 case V8QImode: gen = gen_aarch64_extv8qi; break;
9742 case V4HImode: gen = gen_aarch64_extv4hi; break;
9743 case V8HImode: gen = gen_aarch64_extv8hi; break;
9744 case V2SImode: gen = gen_aarch64_extv2si; break;
9745 case V4SImode: gen = gen_aarch64_extv4si; break;
9746 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9747 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9748 case V2DImode: gen = gen_aarch64_extv2di; break;
9749 case V2DFmode: gen = gen_aarch64_extv2df; break;
9750 default:
9751 return false;
9752 }
9753
9754 /* Success! */
9755 if (d->testing_p)
9756 return true;
9757
9758 /* The case where (location == 0) is a no-op for both big- and little-endian,
9759 and is removed by the mid-end at optimization levels -O1 and higher. */
9760
9761 if (BYTES_BIG_ENDIAN && (location != 0))
9762 {
9763 /* After setup, we want the high elements of the first vector (stored
9764 at the LSB end of the register), and the low elements of the second
9765 vector (stored at the MSB end of the register). So swap. */
9766 rtx temp = d->op0;
9767 d->op0 = d->op1;
9768 d->op1 = temp;
9769 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9770 location = nelt - location;
9771 }
9772
9773 offset = GEN_INT (location);
9774 emit_insn (gen (d->target, d->op0, d->op1, offset));
9775 return true;
9776 }
9777
9778 /* Recognize patterns for the REV insns. */
9779
9780 static bool
9781 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9782 {
9783 unsigned int i, j, diff, nelt = d->nelt;
9784 rtx (*gen) (rtx, rtx);
9785
9786 if (!d->one_vector_p)
9787 return false;
9788
9789 diff = d->perm[0];
9790 switch (diff)
9791 {
9792 case 7:
9793 switch (d->vmode)
9794 {
9795 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9796 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9797 default:
9798 return false;
9799 }
9800 break;
9801 case 3:
9802 switch (d->vmode)
9803 {
9804 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9805 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9806 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9807 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9808 default:
9809 return false;
9810 }
9811 break;
9812 case 1:
9813 switch (d->vmode)
9814 {
9815 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9816 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9817 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9818 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9819 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9820 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9821 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9822 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9823 default:
9824 return false;
9825 }
9826 break;
9827 default:
9828 return false;
9829 }
9830
9831 for (i = 0; i < nelt ; i += diff + 1)
9832 for (j = 0; j <= diff; j += 1)
9833 {
9834 /* This is guaranteed to be true as the value of diff
9835 is 7, 3, 1 and we should have enough elements in the
9836 queue to generate this. Getting a vector mask with a
9837 value of diff other than these values implies that
9838 something is wrong by the time we get here. */
9839 gcc_assert (i + j < nelt);
9840 if (d->perm[i + j] != i + diff - j)
9841 return false;
9842 }
9843
9844 /* Success! */
9845 if (d->testing_p)
9846 return true;
9847
9848 emit_insn (gen (d->target, d->op0));
9849 return true;
9850 }
9851
9852 static bool
9853 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9854 {
9855 rtx (*gen) (rtx, rtx, rtx);
9856 rtx out = d->target;
9857 rtx in0;
9858 machine_mode vmode = d->vmode;
9859 unsigned int i, elt, nelt = d->nelt;
9860 rtx lane;
9861
9862 elt = d->perm[0];
9863 for (i = 1; i < nelt; i++)
9864 {
9865 if (elt != d->perm[i])
9866 return false;
9867 }
9868
9869 /* The generic preparation in aarch64_expand_vec_perm_const_1
9870 swaps the operand order and the permute indices if it finds
9871 d->perm[0] to be in the second operand. Thus, we can always
9872 use d->op0 and need not do any extra arithmetic to get the
9873 correct lane number. */
9874 in0 = d->op0;
9875 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
9876
9877 switch (vmode)
9878 {
9879 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9880 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9881 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9882 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9883 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9884 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9885 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9886 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9887 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9888 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9889 default:
9890 return false;
9891 }
9892
9893 emit_insn (gen (out, in0, lane));
9894 return true;
9895 }
9896
9897 static bool
9898 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9899 {
9900 rtx rperm[MAX_VECT_LEN], sel;
9901 machine_mode vmode = d->vmode;
9902 unsigned int i, nelt = d->nelt;
9903
9904 if (d->testing_p)
9905 return true;
9906
9907 /* Generic code will try constant permutation twice. Once with the
9908 original mode and again with the elements lowered to QImode.
9909 So wait and don't do the selector expansion ourselves. */
9910 if (vmode != V8QImode && vmode != V16QImode)
9911 return false;
9912
9913 for (i = 0; i < nelt; ++i)
9914 {
9915 int nunits = GET_MODE_NUNITS (vmode);
9916
9917 /* If big-endian and two vectors we end up with a weird mixed-endian
9918 mode on NEON. Reverse the index within each word but not the word
9919 itself. */
9920 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9921 : d->perm[i]);
9922 }
9923 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9924 sel = force_reg (vmode, sel);
9925
9926 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9927 return true;
9928 }
9929
9930 static bool
9931 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9932 {
9933 /* The pattern matching functions above are written to look for a small
9934 number to begin the sequence (0, 1, N/2). If we begin with an index
9935 from the second operand, we can swap the operands. */
9936 if (d->perm[0] >= d->nelt)
9937 {
9938 unsigned i, nelt = d->nelt;
9939 rtx x;
9940
9941 gcc_assert (nelt == (nelt & -nelt));
9942 for (i = 0; i < nelt; ++i)
9943 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
9944
9945 x = d->op0;
9946 d->op0 = d->op1;
9947 d->op1 = x;
9948 }
9949
9950 if (TARGET_SIMD)
9951 {
9952 if (aarch64_evpc_rev (d))
9953 return true;
9954 else if (aarch64_evpc_ext (d))
9955 return true;
9956 else if (aarch64_evpc_dup (d))
9957 return true;
9958 else if (aarch64_evpc_zip (d))
9959 return true;
9960 else if (aarch64_evpc_uzp (d))
9961 return true;
9962 else if (aarch64_evpc_trn (d))
9963 return true;
9964 return aarch64_evpc_tbl (d);
9965 }
9966 return false;
9967 }
9968
9969 /* Expand a vec_perm_const pattern. */
9970
9971 bool
9972 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9973 {
9974 struct expand_vec_perm_d d;
9975 int i, nelt, which;
9976
9977 d.target = target;
9978 d.op0 = op0;
9979 d.op1 = op1;
9980
9981 d.vmode = GET_MODE (target);
9982 gcc_assert (VECTOR_MODE_P (d.vmode));
9983 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9984 d.testing_p = false;
9985
9986 for (i = which = 0; i < nelt; ++i)
9987 {
9988 rtx e = XVECEXP (sel, 0, i);
9989 int ei = INTVAL (e) & (2 * nelt - 1);
9990 which |= (ei < nelt ? 1 : 2);
9991 d.perm[i] = ei;
9992 }
9993
9994 switch (which)
9995 {
9996 default:
9997 gcc_unreachable ();
9998
9999 case 3:
10000 d.one_vector_p = false;
10001 if (!rtx_equal_p (op0, op1))
10002 break;
10003
10004 /* The elements of PERM do not suggest that only the first operand
10005 is used, but both operands are identical. Allow easier matching
10006 of the permutation by folding the permutation into the single
10007 input vector. */
10008 /* Fall Through. */
10009 case 2:
10010 for (i = 0; i < nelt; ++i)
10011 d.perm[i] &= nelt - 1;
10012 d.op0 = op1;
10013 d.one_vector_p = true;
10014 break;
10015
10016 case 1:
10017 d.op1 = op0;
10018 d.one_vector_p = true;
10019 break;
10020 }
10021
10022 return aarch64_expand_vec_perm_const_1 (&d);
10023 }
10024
10025 static bool
10026 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
10027 const unsigned char *sel)
10028 {
10029 struct expand_vec_perm_d d;
10030 unsigned int i, nelt, which;
10031 bool ret;
10032
10033 d.vmode = vmode;
10034 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
10035 d.testing_p = true;
10036 memcpy (d.perm, sel, nelt);
10037
10038 /* Calculate whether all elements are in one vector. */
10039 for (i = which = 0; i < nelt; ++i)
10040 {
10041 unsigned char e = d.perm[i];
10042 gcc_assert (e < 2 * nelt);
10043 which |= (e < nelt ? 1 : 2);
10044 }
10045
10046 /* If all elements are from the second vector, reindex as if from the
10047 first vector. */
10048 if (which == 2)
10049 for (i = 0; i < nelt; ++i)
10050 d.perm[i] -= nelt;
10051
10052 /* Check whether the mask can be applied to a single vector. */
10053 d.one_vector_p = (which != 3);
10054
10055 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
10056 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
10057 if (!d.one_vector_p)
10058 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
10059
10060 start_sequence ();
10061 ret = aarch64_expand_vec_perm_const_1 (&d);
10062 end_sequence ();
10063
10064 return ret;
10065 }
10066
10067 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
10068 bool
10069 aarch64_cannot_change_mode_class (machine_mode from,
10070 machine_mode to,
10071 enum reg_class rclass)
10072 {
10073 /* Full-reg subregs are allowed on general regs or any class if they are
10074 the same size. */
10075 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
10076 || !reg_classes_intersect_p (FP_REGS, rclass))
10077 return false;
10078
10079 /* Limited combinations of subregs are safe on FPREGs. Particularly,
10080 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10081 2. Scalar to Scalar for integer modes or same size float modes.
10082 3. Vector to Vector modes.
10083 4. On little-endian only, Vector-Structure to Vector modes. */
10084 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
10085 {
10086 if (aarch64_vector_mode_supported_p (from)
10087 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
10088 return false;
10089
10090 if (GET_MODE_NUNITS (from) == 1
10091 && GET_MODE_NUNITS (to) == 1
10092 && (GET_MODE_CLASS (from) == MODE_INT
10093 || from == to))
10094 return false;
10095
10096 if (aarch64_vector_mode_supported_p (from)
10097 && aarch64_vector_mode_supported_p (to))
10098 return false;
10099
10100 /* Within an vector structure straddling multiple vector registers
10101 we are in a mixed-endian representation. As such, we can't
10102 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
10103 switch between vectors and vector structures cheaply. */
10104 if (!BYTES_BIG_ENDIAN)
10105 if ((aarch64_vector_mode_supported_p (from)
10106 && aarch64_vect_struct_mode_p (to))
10107 || (aarch64_vector_mode_supported_p (to)
10108 && aarch64_vect_struct_mode_p (from)))
10109 return false;
10110 }
10111
10112 return true;
10113 }
10114
10115 /* Implement MODES_TIEABLE_P. */
10116
10117 bool
10118 aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10119 {
10120 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
10121 return true;
10122
10123 /* We specifically want to allow elements of "structure" modes to
10124 be tieable to the structure. This more general condition allows
10125 other rarer situations too. */
10126 if (TARGET_SIMD
10127 && aarch64_vector_mode_p (mode1)
10128 && aarch64_vector_mode_p (mode2))
10129 return true;
10130
10131 return false;
10132 }
10133
10134 /* Return a new RTX holding the result of moving POINTER forward by
10135 AMOUNT bytes. */
10136
10137 static rtx
10138 aarch64_move_pointer (rtx pointer, int amount)
10139 {
10140 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
10141
10142 return adjust_automodify_address (pointer, GET_MODE (pointer),
10143 next, amount);
10144 }
10145
10146 /* Return a new RTX holding the result of moving POINTER forward by the
10147 size of the mode it points to. */
10148
10149 static rtx
10150 aarch64_progress_pointer (rtx pointer)
10151 {
10152 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
10153
10154 return aarch64_move_pointer (pointer, amount);
10155 }
10156
10157 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10158 MODE bytes. */
10159
10160 static void
10161 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
10162 machine_mode mode)
10163 {
10164 rtx reg = gen_reg_rtx (mode);
10165
10166 /* "Cast" the pointers to the correct mode. */
10167 *src = adjust_address (*src, mode, 0);
10168 *dst = adjust_address (*dst, mode, 0);
10169 /* Emit the memcpy. */
10170 emit_move_insn (reg, *src);
10171 emit_move_insn (*dst, reg);
10172 /* Move the pointers forward. */
10173 *src = aarch64_progress_pointer (*src);
10174 *dst = aarch64_progress_pointer (*dst);
10175 }
10176
10177 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10178 we succeed, otherwise return false. */
10179
10180 bool
10181 aarch64_expand_movmem (rtx *operands)
10182 {
10183 unsigned int n;
10184 rtx dst = operands[0];
10185 rtx src = operands[1];
10186 rtx base;
10187 bool speed_p = !optimize_function_for_size_p (cfun);
10188
10189 /* When optimizing for size, give a better estimate of the length of a
10190 memcpy call, but use the default otherwise. */
10191 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
10192
10193 /* We can't do anything smart if the amount to copy is not constant. */
10194 if (!CONST_INT_P (operands[2]))
10195 return false;
10196
10197 n = UINTVAL (operands[2]);
10198
10199 /* Try to keep the number of instructions low. For cases below 16 bytes we
10200 need to make at most two moves. For cases above 16 bytes it will be one
10201 move for each 16 byte chunk, then at most two additional moves. */
10202 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
10203 return false;
10204
10205 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10206 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
10207
10208 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
10209 src = adjust_automodify_address (src, VOIDmode, base, 0);
10210
10211 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10212 1-byte chunk. */
10213 if (n < 4)
10214 {
10215 if (n >= 2)
10216 {
10217 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10218 n -= 2;
10219 }
10220
10221 if (n == 1)
10222 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10223
10224 return true;
10225 }
10226
10227 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10228 4-byte chunk, partially overlapping with the previously copied chunk. */
10229 if (n < 8)
10230 {
10231 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10232 n -= 4;
10233 if (n > 0)
10234 {
10235 int move = n - 4;
10236
10237 src = aarch64_move_pointer (src, move);
10238 dst = aarch64_move_pointer (dst, move);
10239 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10240 }
10241 return true;
10242 }
10243
10244 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10245 them, then (if applicable) an 8-byte chunk. */
10246 while (n >= 8)
10247 {
10248 if (n / 16)
10249 {
10250 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
10251 n -= 16;
10252 }
10253 else
10254 {
10255 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10256 n -= 8;
10257 }
10258 }
10259
10260 /* Finish the final bytes of the copy. We can always do this in one
10261 instruction. We either copy the exact amount we need, or partially
10262 overlap with the previous chunk we copied and copy 8-bytes. */
10263 if (n == 0)
10264 return true;
10265 else if (n == 1)
10266 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
10267 else if (n == 2)
10268 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
10269 else if (n == 4)
10270 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10271 else
10272 {
10273 if (n == 3)
10274 {
10275 src = aarch64_move_pointer (src, -1);
10276 dst = aarch64_move_pointer (dst, -1);
10277 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
10278 }
10279 else
10280 {
10281 int move = n - 8;
10282
10283 src = aarch64_move_pointer (src, move);
10284 dst = aarch64_move_pointer (dst, move);
10285 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
10286 }
10287 }
10288
10289 return true;
10290 }
10291
10292 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10293
10294 static unsigned HOST_WIDE_INT
10295 aarch64_asan_shadow_offset (void)
10296 {
10297 return (HOST_WIDE_INT_1 << 36);
10298 }
10299
10300 static bool
10301 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
10302 unsigned int align,
10303 enum by_pieces_operation op,
10304 bool speed_p)
10305 {
10306 /* STORE_BY_PIECES can be used when copying a constant string, but
10307 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10308 For now we always fail this and let the move_by_pieces code copy
10309 the string from read-only memory. */
10310 if (op == STORE_BY_PIECES)
10311 return false;
10312
10313 return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
10314 }
10315
10316 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10317 instruction fusion of some sort. */
10318
10319 static bool
10320 aarch64_macro_fusion_p (void)
10321 {
10322 return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
10323 }
10324
10325
10326 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10327 should be kept together during scheduling. */
10328
10329 static bool
10330 aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10331 {
10332 rtx set_dest;
10333 rtx prev_set = single_set (prev);
10334 rtx curr_set = single_set (curr);
10335 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10336 bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10337
10338 if (!aarch64_macro_fusion_p ())
10339 return false;
10340
10341 if (simple_sets_p
10342 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
10343 {
10344 /* We are trying to match:
10345 prev (mov) == (set (reg r0) (const_int imm16))
10346 curr (movk) == (set (zero_extract (reg r0)
10347 (const_int 16)
10348 (const_int 16))
10349 (const_int imm16_1)) */
10350
10351 set_dest = SET_DEST (curr_set);
10352
10353 if (GET_CODE (set_dest) == ZERO_EXTRACT
10354 && CONST_INT_P (SET_SRC (curr_set))
10355 && CONST_INT_P (SET_SRC (prev_set))
10356 && CONST_INT_P (XEXP (set_dest, 2))
10357 && INTVAL (XEXP (set_dest, 2)) == 16
10358 && REG_P (XEXP (set_dest, 0))
10359 && REG_P (SET_DEST (prev_set))
10360 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
10361 {
10362 return true;
10363 }
10364 }
10365
10366 if (simple_sets_p
10367 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_ADD))
10368 {
10369
10370 /* We're trying to match:
10371 prev (adrp) == (set (reg r1)
10372 (high (symbol_ref ("SYM"))))
10373 curr (add) == (set (reg r0)
10374 (lo_sum (reg r1)
10375 (symbol_ref ("SYM"))))
10376 Note that r0 need not necessarily be the same as r1, especially
10377 during pre-regalloc scheduling. */
10378
10379 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10380 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10381 {
10382 if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
10383 && REG_P (XEXP (SET_SRC (curr_set), 0))
10384 && REGNO (XEXP (SET_SRC (curr_set), 0))
10385 == REGNO (SET_DEST (prev_set))
10386 && rtx_equal_p (XEXP (SET_SRC (prev_set), 0),
10387 XEXP (SET_SRC (curr_set), 1)))
10388 return true;
10389 }
10390 }
10391
10392 if (simple_sets_p
10393 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOVK_MOVK))
10394 {
10395
10396 /* We're trying to match:
10397 prev (movk) == (set (zero_extract (reg r0)
10398 (const_int 16)
10399 (const_int 32))
10400 (const_int imm16_1))
10401 curr (movk) == (set (zero_extract (reg r0)
10402 (const_int 16)
10403 (const_int 48))
10404 (const_int imm16_2)) */
10405
10406 if (GET_CODE (SET_DEST (prev_set)) == ZERO_EXTRACT
10407 && GET_CODE (SET_DEST (curr_set)) == ZERO_EXTRACT
10408 && REG_P (XEXP (SET_DEST (prev_set), 0))
10409 && REG_P (XEXP (SET_DEST (curr_set), 0))
10410 && REGNO (XEXP (SET_DEST (prev_set), 0))
10411 == REGNO (XEXP (SET_DEST (curr_set), 0))
10412 && CONST_INT_P (XEXP (SET_DEST (prev_set), 2))
10413 && CONST_INT_P (XEXP (SET_DEST (curr_set), 2))
10414 && INTVAL (XEXP (SET_DEST (prev_set), 2)) == 32
10415 && INTVAL (XEXP (SET_DEST (curr_set), 2)) == 48
10416 && CONST_INT_P (SET_SRC (prev_set))
10417 && CONST_INT_P (SET_SRC (curr_set)))
10418 return true;
10419
10420 }
10421 if (simple_sets_p
10422 && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_ADRP_LDR))
10423 {
10424 /* We're trying to match:
10425 prev (adrp) == (set (reg r0)
10426 (high (symbol_ref ("SYM"))))
10427 curr (ldr) == (set (reg r1)
10428 (mem (lo_sum (reg r0)
10429 (symbol_ref ("SYM")))))
10430 or
10431 curr (ldr) == (set (reg r1)
10432 (zero_extend (mem
10433 (lo_sum (reg r0)
10434 (symbol_ref ("SYM")))))) */
10435 if (satisfies_constraint_Ush (SET_SRC (prev_set))
10436 && REG_P (SET_DEST (prev_set)) && REG_P (SET_DEST (curr_set)))
10437 {
10438 rtx curr_src = SET_SRC (curr_set);
10439
10440 if (GET_CODE (curr_src) == ZERO_EXTEND)
10441 curr_src = XEXP (curr_src, 0);
10442
10443 if (MEM_P (curr_src) && GET_CODE (XEXP (curr_src, 0)) == LO_SUM
10444 && REG_P (XEXP (XEXP (curr_src, 0), 0))
10445 && REGNO (XEXP (XEXP (curr_src, 0), 0))
10446 == REGNO (SET_DEST (prev_set))
10447 && rtx_equal_p (XEXP (XEXP (curr_src, 0), 1),
10448 XEXP (SET_SRC (prev_set), 0)))
10449 return true;
10450 }
10451 }
10452
10453 if ((aarch64_tune_params->fuseable_ops & AARCH64_FUSE_CMP_BRANCH)
10454 && any_condjump_p (curr))
10455 {
10456 enum attr_type prev_type = get_attr_type (prev);
10457
10458 /* FIXME: this misses some which is considered simple arthematic
10459 instructions for ThunderX. Simple shifts are missed here. */
10460 if (prev_type == TYPE_ALUS_SREG
10461 || prev_type == TYPE_ALUS_IMM
10462 || prev_type == TYPE_LOGICS_REG
10463 || prev_type == TYPE_LOGICS_IMM)
10464 return true;
10465 }
10466
10467 return false;
10468 }
10469
10470 /* If MEM is in the form of [base+offset], extract the two parts
10471 of address and set to BASE and OFFSET, otherwise return false
10472 after clearing BASE and OFFSET. */
10473
10474 bool
10475 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
10476 {
10477 rtx addr;
10478
10479 gcc_assert (MEM_P (mem));
10480
10481 addr = XEXP (mem, 0);
10482
10483 if (REG_P (addr))
10484 {
10485 *base = addr;
10486 *offset = const0_rtx;
10487 return true;
10488 }
10489
10490 if (GET_CODE (addr) == PLUS
10491 && REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
10492 {
10493 *base = XEXP (addr, 0);
10494 *offset = XEXP (addr, 1);
10495 return true;
10496 }
10497
10498 *base = NULL_RTX;
10499 *offset = NULL_RTX;
10500
10501 return false;
10502 }
10503
10504 /* Types for scheduling fusion. */
10505 enum sched_fusion_type
10506 {
10507 SCHED_FUSION_NONE = 0,
10508 SCHED_FUSION_LD_SIGN_EXTEND,
10509 SCHED_FUSION_LD_ZERO_EXTEND,
10510 SCHED_FUSION_LD,
10511 SCHED_FUSION_ST,
10512 SCHED_FUSION_NUM
10513 };
10514
10515 /* If INSN is a load or store of address in the form of [base+offset],
10516 extract the two parts and set to BASE and OFFSET. Return scheduling
10517 fusion type this INSN is. */
10518
10519 static enum sched_fusion_type
10520 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
10521 {
10522 rtx x, dest, src;
10523 enum sched_fusion_type fusion = SCHED_FUSION_LD;
10524
10525 gcc_assert (INSN_P (insn));
10526 x = PATTERN (insn);
10527 if (GET_CODE (x) != SET)
10528 return SCHED_FUSION_NONE;
10529
10530 src = SET_SRC (x);
10531 dest = SET_DEST (x);
10532
10533 if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
10534 && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
10535 return SCHED_FUSION_NONE;
10536
10537 if (GET_CODE (src) == SIGN_EXTEND)
10538 {
10539 fusion = SCHED_FUSION_LD_SIGN_EXTEND;
10540 src = XEXP (src, 0);
10541 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10542 return SCHED_FUSION_NONE;
10543 }
10544 else if (GET_CODE (src) == ZERO_EXTEND)
10545 {
10546 fusion = SCHED_FUSION_LD_ZERO_EXTEND;
10547 src = XEXP (src, 0);
10548 if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
10549 return SCHED_FUSION_NONE;
10550 }
10551
10552 if (GET_CODE (src) == MEM && REG_P (dest))
10553 extract_base_offset_in_addr (src, base, offset);
10554 else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
10555 {
10556 fusion = SCHED_FUSION_ST;
10557 extract_base_offset_in_addr (dest, base, offset);
10558 }
10559 else
10560 return SCHED_FUSION_NONE;
10561
10562 if (*base == NULL_RTX || *offset == NULL_RTX)
10563 fusion = SCHED_FUSION_NONE;
10564
10565 return fusion;
10566 }
10567
10568 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10569
10570 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10571 and PRI are only calculated for these instructions. For other instruction,
10572 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10573 type instruction fusion can be added by returning different priorities.
10574
10575 It's important that irrelevant instructions get the largest FUSION_PRI. */
10576
10577 static void
10578 aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
10579 int *fusion_pri, int *pri)
10580 {
10581 int tmp, off_val;
10582 rtx base, offset;
10583 enum sched_fusion_type fusion;
10584
10585 gcc_assert (INSN_P (insn));
10586
10587 tmp = max_pri - 1;
10588 fusion = fusion_load_store (insn, &base, &offset);
10589 if (fusion == SCHED_FUSION_NONE)
10590 {
10591 *pri = tmp;
10592 *fusion_pri = tmp;
10593 return;
10594 }
10595
10596 /* Set FUSION_PRI according to fusion type and base register. */
10597 *fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
10598
10599 /* Calculate PRI. */
10600 tmp /= 2;
10601
10602 /* INSN with smaller offset goes first. */
10603 off_val = (int)(INTVAL (offset));
10604 if (off_val >= 0)
10605 tmp -= (off_val & 0xfffff);
10606 else
10607 tmp += ((- off_val) & 0xfffff);
10608
10609 *pri = tmp;
10610 return;
10611 }
10612
10613 /* Given OPERANDS of consecutive load/store, check if we can merge
10614 them into ldp/stp. LOAD is true if they are load instructions.
10615 MODE is the mode of memory operands. */
10616
10617 bool
10618 aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
10619 enum machine_mode mode)
10620 {
10621 HOST_WIDE_INT offval_1, offval_2, msize;
10622 enum reg_class rclass_1, rclass_2;
10623 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
10624
10625 if (load)
10626 {
10627 mem_1 = operands[1];
10628 mem_2 = operands[3];
10629 reg_1 = operands[0];
10630 reg_2 = operands[2];
10631 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
10632 if (REGNO (reg_1) == REGNO (reg_2))
10633 return false;
10634 }
10635 else
10636 {
10637 mem_1 = operands[0];
10638 mem_2 = operands[2];
10639 reg_1 = operands[1];
10640 reg_2 = operands[3];
10641 }
10642
10643 /* The mems cannot be volatile. */
10644 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
10645 return false;
10646
10647 /* Check if the addresses are in the form of [base+offset]. */
10648 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10649 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10650 return false;
10651 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10652 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10653 return false;
10654
10655 /* Check if the bases are same. */
10656 if (!rtx_equal_p (base_1, base_2))
10657 return false;
10658
10659 offval_1 = INTVAL (offset_1);
10660 offval_2 = INTVAL (offset_2);
10661 msize = GET_MODE_SIZE (mode);
10662 /* Check if the offsets are consecutive. */
10663 if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
10664 return false;
10665
10666 /* Check if the addresses are clobbered by load. */
10667 if (load)
10668 {
10669 if (reg_mentioned_p (reg_1, mem_1))
10670 return false;
10671
10672 /* In increasing order, the last load can clobber the address. */
10673 if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
10674 return false;
10675 }
10676
10677 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10678 rclass_1 = FP_REGS;
10679 else
10680 rclass_1 = GENERAL_REGS;
10681
10682 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10683 rclass_2 = FP_REGS;
10684 else
10685 rclass_2 = GENERAL_REGS;
10686
10687 /* Check if the registers are of same class. */
10688 if (rclass_1 != rclass_2)
10689 return false;
10690
10691 return true;
10692 }
10693
10694 /* Given OPERANDS of consecutive load/store, check if we can merge
10695 them into ldp/stp by adjusting the offset. LOAD is true if they
10696 are load instructions. MODE is the mode of memory operands.
10697
10698 Given below consecutive stores:
10699
10700 str w1, [xb, 0x100]
10701 str w1, [xb, 0x104]
10702 str w1, [xb, 0x108]
10703 str w1, [xb, 0x10c]
10704
10705 Though the offsets are out of the range supported by stp, we can
10706 still pair them after adjusting the offset, like:
10707
10708 add scratch, xb, 0x100
10709 stp w1, w1, [scratch]
10710 stp w1, w1, [scratch, 0x8]
10711
10712 The peephole patterns detecting this opportunity should guarantee
10713 the scratch register is avaliable. */
10714
10715 bool
10716 aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
10717 enum machine_mode mode)
10718 {
10719 enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
10720 HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
10721 rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
10722 rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
10723
10724 if (load)
10725 {
10726 reg_1 = operands[0];
10727 mem_1 = operands[1];
10728 reg_2 = operands[2];
10729 mem_2 = operands[3];
10730 reg_3 = operands[4];
10731 mem_3 = operands[5];
10732 reg_4 = operands[6];
10733 mem_4 = operands[7];
10734 gcc_assert (REG_P (reg_1) && REG_P (reg_2)
10735 && REG_P (reg_3) && REG_P (reg_4));
10736 if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
10737 return false;
10738 }
10739 else
10740 {
10741 mem_1 = operands[0];
10742 reg_1 = operands[1];
10743 mem_2 = operands[2];
10744 reg_2 = operands[3];
10745 mem_3 = operands[4];
10746 reg_3 = operands[5];
10747 mem_4 = operands[6];
10748 reg_4 = operands[7];
10749 }
10750 /* Skip if memory operand is by itslef valid for ldp/stp. */
10751 if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
10752 return false;
10753
10754 /* The mems cannot be volatile. */
10755 if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
10756 || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
10757 return false;
10758
10759 /* Check if the addresses are in the form of [base+offset]. */
10760 extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
10761 if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
10762 return false;
10763 extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
10764 if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
10765 return false;
10766 extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
10767 if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
10768 return false;
10769 extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
10770 if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
10771 return false;
10772
10773 /* Check if the bases are same. */
10774 if (!rtx_equal_p (base_1, base_2)
10775 || !rtx_equal_p (base_2, base_3)
10776 || !rtx_equal_p (base_3, base_4))
10777 return false;
10778
10779 offval_1 = INTVAL (offset_1);
10780 offval_2 = INTVAL (offset_2);
10781 offval_3 = INTVAL (offset_3);
10782 offval_4 = INTVAL (offset_4);
10783 msize = GET_MODE_SIZE (mode);
10784 /* Check if the offsets are consecutive. */
10785 if ((offval_1 != (offval_2 + msize)
10786 || offval_1 != (offval_3 + msize * 2)
10787 || offval_1 != (offval_4 + msize * 3))
10788 && (offval_4 != (offval_3 + msize)
10789 || offval_4 != (offval_2 + msize * 2)
10790 || offval_4 != (offval_1 + msize * 3)))
10791 return false;
10792
10793 /* Check if the addresses are clobbered by load. */
10794 if (load)
10795 {
10796 if (reg_mentioned_p (reg_1, mem_1)
10797 || reg_mentioned_p (reg_2, mem_2)
10798 || reg_mentioned_p (reg_3, mem_3))
10799 return false;
10800
10801 /* In increasing order, the last load can clobber the address. */
10802 if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
10803 return false;
10804 }
10805
10806 if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
10807 rclass_1 = FP_REGS;
10808 else
10809 rclass_1 = GENERAL_REGS;
10810
10811 if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
10812 rclass_2 = FP_REGS;
10813 else
10814 rclass_2 = GENERAL_REGS;
10815
10816 if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
10817 rclass_3 = FP_REGS;
10818 else
10819 rclass_3 = GENERAL_REGS;
10820
10821 if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
10822 rclass_4 = FP_REGS;
10823 else
10824 rclass_4 = GENERAL_REGS;
10825
10826 /* Check if the registers are of same class. */
10827 if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
10828 return false;
10829
10830 return true;
10831 }
10832
10833 /* Given OPERANDS of consecutive load/store, this function pairs them
10834 into ldp/stp after adjusting the offset. It depends on the fact
10835 that addresses of load/store instructions are in increasing order.
10836 MODE is the mode of memory operands. CODE is the rtl operator
10837 which should be applied to all memory operands, it's SIGN_EXTEND,
10838 ZERO_EXTEND or UNKNOWN. */
10839
10840 bool
10841 aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
10842 enum machine_mode mode, RTX_CODE code)
10843 {
10844 rtx base, offset, t1, t2;
10845 rtx mem_1, mem_2, mem_3, mem_4;
10846 HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
10847
10848 if (load)
10849 {
10850 mem_1 = operands[1];
10851 mem_2 = operands[3];
10852 mem_3 = operands[5];
10853 mem_4 = operands[7];
10854 }
10855 else
10856 {
10857 mem_1 = operands[0];
10858 mem_2 = operands[2];
10859 mem_3 = operands[4];
10860 mem_4 = operands[6];
10861 gcc_assert (code == UNKNOWN);
10862 }
10863
10864 extract_base_offset_in_addr (mem_1, &base, &offset);
10865 gcc_assert (base != NULL_RTX && offset != NULL_RTX);
10866
10867 /* Adjust offset thus it can fit in ldp/stp instruction. */
10868 msize = GET_MODE_SIZE (mode);
10869 stp_off_limit = msize * 0x40;
10870 off_val = INTVAL (offset);
10871 abs_off = (off_val < 0) ? -off_val : off_val;
10872 new_off = abs_off % stp_off_limit;
10873 adj_off = abs_off - new_off;
10874
10875 /* Further adjust to make sure all offsets are OK. */
10876 if ((new_off + msize * 2) >= stp_off_limit)
10877 {
10878 adj_off += stp_off_limit;
10879 new_off -= stp_off_limit;
10880 }
10881
10882 /* Make sure the adjustment can be done with ADD/SUB instructions. */
10883 if (adj_off >= 0x1000)
10884 return false;
10885
10886 if (off_val < 0)
10887 {
10888 adj_off = -adj_off;
10889 new_off = -new_off;
10890 }
10891
10892 /* Create new memory references. */
10893 mem_1 = change_address (mem_1, VOIDmode,
10894 plus_constant (DImode, operands[8], new_off));
10895
10896 /* Check if the adjusted address is OK for ldp/stp. */
10897 if (!aarch64_mem_pair_operand (mem_1, mode))
10898 return false;
10899
10900 msize = GET_MODE_SIZE (mode);
10901 mem_2 = change_address (mem_2, VOIDmode,
10902 plus_constant (DImode,
10903 operands[8],
10904 new_off + msize));
10905 mem_3 = change_address (mem_3, VOIDmode,
10906 plus_constant (DImode,
10907 operands[8],
10908 new_off + msize * 2));
10909 mem_4 = change_address (mem_4, VOIDmode,
10910 plus_constant (DImode,
10911 operands[8],
10912 new_off + msize * 3));
10913
10914 if (code == ZERO_EXTEND)
10915 {
10916 mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
10917 mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
10918 mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
10919 mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
10920 }
10921 else if (code == SIGN_EXTEND)
10922 {
10923 mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
10924 mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
10925 mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
10926 mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
10927 }
10928
10929 if (load)
10930 {
10931 operands[1] = mem_1;
10932 operands[3] = mem_2;
10933 operands[5] = mem_3;
10934 operands[7] = mem_4;
10935 }
10936 else
10937 {
10938 operands[0] = mem_1;
10939 operands[2] = mem_2;
10940 operands[4] = mem_3;
10941 operands[6] = mem_4;
10942 }
10943
10944 /* Emit adjusting instruction. */
10945 emit_insn (gen_rtx_SET (VOIDmode, operands[8],
10946 plus_constant (DImode, base, adj_off)));
10947 /* Emit ldp/stp instructions. */
10948 t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
10949 t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
10950 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
10951 t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
10952 t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
10953 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
10954 return true;
10955 }
10956
10957 #undef TARGET_ADDRESS_COST
10958 #define TARGET_ADDRESS_COST aarch64_address_cost
10959
10960 /* This hook will determines whether unnamed bitfields affect the alignment
10961 of the containing structure. The hook returns true if the structure
10962 should inherit the alignment requirements of an unnamed bitfield's
10963 type. */
10964 #undef TARGET_ALIGN_ANON_BITFIELD
10965 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10966
10967 #undef TARGET_ASM_ALIGNED_DI_OP
10968 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10969
10970 #undef TARGET_ASM_ALIGNED_HI_OP
10971 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10972
10973 #undef TARGET_ASM_ALIGNED_SI_OP
10974 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10975
10976 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10977 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10978 hook_bool_const_tree_hwi_hwi_const_tree_true
10979
10980 #undef TARGET_ASM_FILE_START
10981 #define TARGET_ASM_FILE_START aarch64_start_file
10982
10983 #undef TARGET_ASM_OUTPUT_MI_THUNK
10984 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10985
10986 #undef TARGET_ASM_SELECT_RTX_SECTION
10987 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10988
10989 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10990 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10991
10992 #undef TARGET_BUILD_BUILTIN_VA_LIST
10993 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10994
10995 #undef TARGET_CALLEE_COPIES
10996 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10997
10998 #undef TARGET_CAN_ELIMINATE
10999 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11000
11001 #undef TARGET_CANNOT_FORCE_CONST_MEM
11002 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11003
11004 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11005 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11006
11007 /* Only the least significant bit is used for initialization guard
11008 variables. */
11009 #undef TARGET_CXX_GUARD_MASK_BIT
11010 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11011
11012 #undef TARGET_C_MODE_FOR_SUFFIX
11013 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11014
11015 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11016 #undef TARGET_DEFAULT_TARGET_FLAGS
11017 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11018 #endif
11019
11020 #undef TARGET_CLASS_MAX_NREGS
11021 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11022
11023 #undef TARGET_BUILTIN_DECL
11024 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11025
11026 #undef TARGET_EXPAND_BUILTIN
11027 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11028
11029 #undef TARGET_EXPAND_BUILTIN_VA_START
11030 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11031
11032 #undef TARGET_FOLD_BUILTIN
11033 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11034
11035 #undef TARGET_FUNCTION_ARG
11036 #define TARGET_FUNCTION_ARG aarch64_function_arg
11037
11038 #undef TARGET_FUNCTION_ARG_ADVANCE
11039 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11040
11041 #undef TARGET_FUNCTION_ARG_BOUNDARY
11042 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11043
11044 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11045 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11046
11047 #undef TARGET_FUNCTION_VALUE
11048 #define TARGET_FUNCTION_VALUE aarch64_function_value
11049
11050 #undef TARGET_FUNCTION_VALUE_REGNO_P
11051 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11052
11053 #undef TARGET_FRAME_POINTER_REQUIRED
11054 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11055
11056 #undef TARGET_GIMPLE_FOLD_BUILTIN
11057 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11058
11059 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11060 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11061
11062 #undef TARGET_INIT_BUILTINS
11063 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11064
11065 #undef TARGET_LEGITIMATE_ADDRESS_P
11066 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11067
11068 #undef TARGET_LEGITIMATE_CONSTANT_P
11069 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11070
11071 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11072 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11073
11074 #undef TARGET_LRA_P
11075 #define TARGET_LRA_P aarch64_lra_p
11076
11077 #undef TARGET_MANGLE_TYPE
11078 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11079
11080 #undef TARGET_MEMORY_MOVE_COST
11081 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11082
11083 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11084 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11085
11086 #undef TARGET_MUST_PASS_IN_STACK
11087 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11088
11089 /* This target hook should return true if accesses to volatile bitfields
11090 should use the narrowest mode possible. It should return false if these
11091 accesses should use the bitfield container type. */
11092 #undef TARGET_NARROW_VOLATILE_BITFIELD
11093 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11094
11095 #undef TARGET_OPTION_OVERRIDE
11096 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11097
11098 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11099 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11100 aarch64_override_options_after_change
11101
11102 #undef TARGET_PASS_BY_REFERENCE
11103 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11104
11105 #undef TARGET_PREFERRED_RELOAD_CLASS
11106 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11107
11108 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11109 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11110
11111 #undef TARGET_SECONDARY_RELOAD
11112 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11113
11114 #undef TARGET_SHIFT_TRUNCATION_MASK
11115 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11116
11117 #undef TARGET_SETUP_INCOMING_VARARGS
11118 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11119
11120 #undef TARGET_STRUCT_VALUE_RTX
11121 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11122
11123 #undef TARGET_REGISTER_MOVE_COST
11124 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11125
11126 #undef TARGET_RETURN_IN_MEMORY
11127 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11128
11129 #undef TARGET_RETURN_IN_MSB
11130 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11131
11132 #undef TARGET_RTX_COSTS
11133 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11134
11135 #undef TARGET_SCHED_ISSUE_RATE
11136 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11137
11138 #undef TARGET_TRAMPOLINE_INIT
11139 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11140
11141 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11142 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11143
11144 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11145 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11146
11147 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11148 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11149
11150 #undef TARGET_VECTORIZE_ADD_STMT_COST
11151 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11152
11153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11155 aarch64_builtin_vectorization_cost
11156
11157 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11158 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11159
11160 #undef TARGET_VECTORIZE_BUILTINS
11161 #define TARGET_VECTORIZE_BUILTINS
11162
11163 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11164 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11165 aarch64_builtin_vectorized_function
11166
11167 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11168 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11169 aarch64_autovectorize_vector_sizes
11170
11171 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11172 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11173 aarch64_atomic_assign_expand_fenv
11174
11175 /* Section anchor support. */
11176
11177 #undef TARGET_MIN_ANCHOR_OFFSET
11178 #define TARGET_MIN_ANCHOR_OFFSET -256
11179
11180 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11181 byte offset; we can do much more for larger data types, but have no way
11182 to determine the size of the access. We assume accesses are aligned. */
11183 #undef TARGET_MAX_ANCHOR_OFFSET
11184 #define TARGET_MAX_ANCHOR_OFFSET 4095
11185
11186 #undef TARGET_VECTOR_ALIGNMENT
11187 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11188
11189 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11190 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11191 aarch64_simd_vector_alignment_reachable
11192
11193 /* vec_perm support. */
11194
11195 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11196 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11197 aarch64_vectorize_vec_perm_const_ok
11198
11199
11200 #undef TARGET_FIXED_CONDITION_CODE_REGS
11201 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11202
11203 #undef TARGET_FLAGS_REGNUM
11204 #define TARGET_FLAGS_REGNUM CC_REGNUM
11205
11206 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11207 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11208
11209 #undef TARGET_ASAN_SHADOW_OFFSET
11210 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11211
11212 #undef TARGET_LEGITIMIZE_ADDRESS
11213 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11214
11215 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11216 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11217 aarch64_use_by_pieces_infrastructure_p
11218
11219 #undef TARGET_CAN_USE_DOLOOP_P
11220 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11221
11222 #undef TARGET_SCHED_MACRO_FUSION_P
11223 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11224
11225 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11226 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11227
11228 #undef TARGET_SCHED_FUSION_PRIORITY
11229 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11230
11231 struct gcc_target targetm = TARGET_INITIALIZER;
11232
11233 #include "gt-aarch64.h"