]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/gcn/gcn.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / gcn / gcn.c
1 /* Copyright (C) 2016-2021 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 /* {{{ Includes. */
18
19 /* We want GET_MODE_SIZE et al to return integers, please. */
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "memmodel.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "df.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "fold-const.h"
40 #include "calls.h"
41 #include "explow.h"
42 #include "expr.h"
43 #include "output.h"
44 #include "cfgrtl.h"
45 #include "langhooks.h"
46 #include "builtins.h"
47 #include "omp-general.h"
48 #include "print-rtl.h"
49 #include "attribs.h"
50 #include "varasm.h"
51 #include "intl.h"
52 #include "rtl-iter.h"
53
54 /* This file should be included last. */
55 #include "target-def.h"
56
57 /* }}} */
58 /* {{{ Global variables. */
59
60 /* Constants used by FP instructions. */
61
62 static REAL_VALUE_TYPE dconst4, dconst1over2pi;
63 static bool ext_gcn_constants_init = 0;
64
65 /* Holds the ISA variant, derived from the command line parameters. */
66
67 int gcn_isa = 3; /* Default to GCN3. */
68
69 /* Reserve this much space for LDS (for propagating variables from
70 worker-single mode to worker-partitioned mode), per workgroup. Global
71 analysis could calculate an exact bound, but we don't do that yet.
72
73 We want to permit full occupancy, so size accordingly. */
74
75 #define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
76 #define ACC_LDS_SIZE 32768 /* Half of the total should be fine. */
77 #define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
78
79 #define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
80 : flag_openmp ? OMP_LDS_SIZE \
81 : OTHER_LDS_SIZE)
82
83 /* The number of registers usable by normal non-kernel functions.
84 The SGPR count includes any special extra registers such as VCC. */
85
86 #define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
87 #define MAX_NORMAL_VGPR_COUNT 24
88
89 /* }}} */
90 /* {{{ Initialization and options. */
91
92 /* Initialize machine_function. */
93
94 static struct machine_function *
95 gcn_init_machine_status (void)
96 {
97 struct machine_function *f;
98
99 f = ggc_cleared_alloc<machine_function> ();
100
101 /* Set up LDS allocation for broadcasting for this function. */
102 f->lds_allocated = 32;
103 f->lds_allocs = hash_map<tree, int>::create_ggc (64);
104
105 /* And LDS temporary decls for worker reductions. */
106 vec_alloc (f->reduc_decls, 0);
107
108 if (TARGET_GCN3)
109 f->use_flat_addressing = true;
110
111 return f;
112 }
113
114 /* Implement TARGET_OPTION_OVERRIDE.
115
116 Override option settings where defaults are variable, or we have specific
117 needs to consider. */
118
119 static void
120 gcn_option_override (void)
121 {
122 init_machine_status = gcn_init_machine_status;
123
124 /* The HSA runtime does not respect ELF load addresses, so force PIE. */
125 if (!flag_pie)
126 flag_pie = 2;
127 if (!flag_pic)
128 flag_pic = flag_pie;
129
130 gcn_isa = gcn_arch == PROCESSOR_FIJI ? 3 : 5;
131
132 /* The default stack size needs to be small for offload kernels because
133 there may be many, many threads. Also, a smaller stack gives a
134 measureable performance boost. But, a small stack is insufficient
135 for running the testsuite, so we use a larger default for the stand
136 alone case. */
137 if (stack_size_opt == -1)
138 {
139 if (flag_openacc || flag_openmp)
140 /* 512 bytes per work item = 32kB total. */
141 stack_size_opt = 512 * 64;
142 else
143 /* 1MB total. */
144 stack_size_opt = 1048576;
145 }
146 }
147
148 /* }}} */
149 /* {{{ Attributes. */
150
151 /* This table defines the arguments that are permitted in
152 __attribute__ ((amdgpu_hsa_kernel (...))).
153
154 The names and values correspond to the HSA metadata that is encoded
155 into the assembler file and binary. */
156
157 static const struct gcn_kernel_arg_type
158 {
159 const char *name;
160 const char *header_pseudo;
161 machine_mode mode;
162
163 /* This should be set to -1 or -2 for a dynamically allocated register
164 number. Use -1 if this argument contributes to the user_sgpr_count,
165 -2 otherwise. */
166 int fixed_regno;
167 } gcn_kernel_arg_types[] = {
168 {"exec", NULL, DImode, EXEC_REG},
169 #define PRIVATE_SEGMENT_BUFFER_ARG 1
170 {"private_segment_buffer",
171 ".amdhsa_user_sgpr_private_segment_buffer", TImode, -1},
172 #define DISPATCH_PTR_ARG 2
173 {"dispatch_ptr", ".amdhsa_user_sgpr_dispatch_ptr", DImode, -1},
174 #define QUEUE_PTR_ARG 3
175 {"queue_ptr", ".amdhsa_user_sgpr_queue_ptr", DImode, -1},
176 #define KERNARG_SEGMENT_PTR_ARG 4
177 {"kernarg_segment_ptr", ".amdhsa_user_sgpr_kernarg_segment_ptr", DImode, -1},
178 {"dispatch_id", ".amdhsa_user_sgpr_dispatch_id", DImode, -1},
179 #define FLAT_SCRATCH_INIT_ARG 6
180 {"flat_scratch_init", ".amdhsa_user_sgpr_flat_scratch_init", DImode, -1},
181 #define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
182 {"private_segment_size", ".amdhsa_user_sgpr_private_segment_size", SImode, -1},
183 #define WORKGROUP_ID_X_ARG 8
184 {"workgroup_id_X", ".amdhsa_system_sgpr_workgroup_id_x", SImode, -2},
185 {"workgroup_id_Y", ".amdhsa_system_sgpr_workgroup_id_y", SImode, -2},
186 {"workgroup_id_Z", ".amdhsa_system_sgpr_workgroup_id_z", SImode, -2},
187 {"workgroup_info", ".amdhsa_system_sgpr_workgroup_info", SImode, -1},
188 #define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 12
189 {"private_segment_wave_offset",
190 ".amdhsa_system_sgpr_private_segment_wavefront_offset", SImode, -2},
191 #define WORK_ITEM_ID_X_ARG 13
192 {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
193 #define WORK_ITEM_ID_Y_ARG 14
194 {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
195 #define WORK_ITEM_ID_Z_ARG 15
196 {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
197 };
198
199 static const long default_requested_args
200 = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
201 | (1 << DISPATCH_PTR_ARG)
202 | (1 << QUEUE_PTR_ARG)
203 | (1 << KERNARG_SEGMENT_PTR_ARG)
204 | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
205 | (1 << WORKGROUP_ID_X_ARG)
206 | (1 << WORK_ITEM_ID_X_ARG)
207 | (1 << WORK_ITEM_ID_Y_ARG)
208 | (1 << WORK_ITEM_ID_Z_ARG);
209
210 /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
211 This function also sets the default values for some arguments.
212
213 Return true on success, with ARGS populated. */
214
215 static bool
216 gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
217 tree list)
218 {
219 bool err = false;
220 args->requested = default_requested_args;
221 args->nargs = 0;
222
223 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
224 args->reg[a] = -1;
225
226 for (; list; list = TREE_CHAIN (list))
227 {
228 const char *str;
229 if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
230 {
231 error ("amdgpu_hsa_kernel attribute requires string constant "
232 "arguments");
233 break;
234 }
235 str = TREE_STRING_POINTER (TREE_VALUE (list));
236 int a;
237 for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
238 {
239 if (!strcmp (str, gcn_kernel_arg_types[a].name))
240 break;
241 }
242 if (a == GCN_KERNEL_ARG_TYPES)
243 {
244 error ("unknown specifier %s in amdgpu_hsa_kernel attribute", str);
245 err = true;
246 break;
247 }
248 if (args->requested & (1 << a))
249 {
250 error ("duplicated parameter specifier %s in amdgpu_hsa_kernel "
251 "attribute", str);
252 err = true;
253 break;
254 }
255 args->requested |= (1 << a);
256 args->order[args->nargs++] = a;
257 }
258
259 /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
260 WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
261 requesting WORK_ITEM_ID_X_ARG. */
262 if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
263 args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
264 if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
265 args->requested |= (1 << WORK_ITEM_ID_X_ARG);
266
267 int sgpr_regno = FIRST_SGPR_REG;
268 args->nsgprs = 0;
269 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
270 {
271 if (!(args->requested & (1 << a)))
272 continue;
273
274 if (gcn_kernel_arg_types[a].fixed_regno >= 0)
275 args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
276 else
277 {
278 int reg_count;
279
280 switch (gcn_kernel_arg_types[a].mode)
281 {
282 case E_SImode:
283 reg_count = 1;
284 break;
285 case E_DImode:
286 reg_count = 2;
287 break;
288 case E_TImode:
289 reg_count = 4;
290 break;
291 default:
292 gcc_unreachable ();
293 }
294 args->reg[a] = sgpr_regno;
295 sgpr_regno += reg_count;
296 if (gcn_kernel_arg_types[a].fixed_regno == -1)
297 args->nsgprs += reg_count;
298 }
299 }
300 if (sgpr_regno > FIRST_SGPR_REG + 16)
301 {
302 error ("too many arguments passed in sgpr registers");
303 }
304 return err;
305 }
306
307 /* Referenced by TARGET_ATTRIBUTE_TABLE.
308
309 Validates target specific attributes. */
310
311 static tree
312 gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
313 tree args, int, bool *no_add_attrs)
314 {
315 if (!FUNC_OR_METHOD_TYPE_P (*node))
316 {
317 warning (OPT_Wattributes, "%qE attribute only applies to functions",
318 name);
319 *no_add_attrs = true;
320 return NULL_TREE;
321 }
322
323 /* Can combine regparm with all attributes but fastcall, and thiscall. */
324 if (is_attribute_p ("gcnhsa_kernel", name))
325 {
326 struct gcn_kernel_args kernelarg;
327
328 if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
329 *no_add_attrs = true;
330
331 return NULL_TREE;
332 }
333
334 return NULL_TREE;
335 }
336
337 /* Implement TARGET_ATTRIBUTE_TABLE.
338
339 Create target-specific __attribute__ types. */
340
341 static const struct attribute_spec gcn_attribute_table[] = {
342 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
343 affects_type_identity } */
344 {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
345 true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL},
346 /* End element. */
347 {NULL, 0, 0, false, false, false, false, NULL, NULL}
348 };
349
350 /* }}} */
351 /* {{{ Registers and modes. */
352
353 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
354
355 bool
356 gcn_scalar_mode_supported_p (scalar_mode mode)
357 {
358 return (mode == BImode
359 || mode == QImode
360 || mode == HImode /* || mode == HFmode */
361 || mode == SImode || mode == SFmode
362 || mode == DImode || mode == DFmode
363 || mode == TImode);
364 }
365
366 /* Implement TARGET_CLASS_MAX_NREGS.
367
368 Return the number of hard registers needed to hold a value of MODE in
369 a register of class RCLASS. */
370
371 static unsigned char
372 gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
373 {
374 /* Scalar registers are 32bit, vector registers are in fact tuples of
375 64 lanes. */
376 if (rclass == VGPR_REGS)
377 {
378 if (vgpr_1reg_mode_p (mode))
379 return 1;
380 if (vgpr_2reg_mode_p (mode))
381 return 2;
382 /* TImode is used by DImode compare_and_swap. */
383 if (mode == TImode)
384 return 4;
385 }
386 else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
387 return 2;
388 return CEIL (GET_MODE_SIZE (mode), 4);
389 }
390
391 /* Implement TARGET_HARD_REGNO_NREGS.
392
393 Return the number of hard registers needed to hold a value of MODE in
394 REGNO. */
395
396 unsigned int
397 gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
398 {
399 return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
400 }
401
402 /* Implement TARGET_HARD_REGNO_MODE_OK.
403
404 Return true if REGNO can hold value in MODE. */
405
406 bool
407 gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
408 {
409 /* Treat a complex mode as if it were a scalar mode of the same overall
410 size for the purposes of allocating hard registers. */
411 if (COMPLEX_MODE_P (mode))
412 switch (mode)
413 {
414 case E_CQImode:
415 case E_CHImode:
416 mode = SImode;
417 break;
418 case E_CSImode:
419 mode = DImode;
420 break;
421 case E_CDImode:
422 mode = TImode;
423 break;
424 case E_HCmode:
425 mode = SFmode;
426 break;
427 case E_SCmode:
428 mode = DFmode;
429 break;
430 default:
431 /* Not supported. */
432 return false;
433 }
434
435 switch (regno)
436 {
437 case FLAT_SCRATCH_LO_REG:
438 case XNACK_MASK_LO_REG:
439 case TBA_LO_REG:
440 case TMA_LO_REG:
441 return (mode == SImode || mode == DImode);
442 case VCC_LO_REG:
443 case EXEC_LO_REG:
444 return (mode == BImode || mode == SImode || mode == DImode);
445 case M0_REG:
446 case FLAT_SCRATCH_HI_REG:
447 case XNACK_MASK_HI_REG:
448 case TBA_HI_REG:
449 case TMA_HI_REG:
450 return mode == SImode;
451 case VCC_HI_REG:
452 return false;
453 case EXEC_HI_REG:
454 return mode == SImode /*|| mode == V32BImode */ ;
455 case SCC_REG:
456 case VCCZ_REG:
457 case EXECZ_REG:
458 return mode == BImode;
459 }
460 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
461 return true;
462 if (SGPR_REGNO_P (regno))
463 /* We restrict double register values to aligned registers. */
464 return (sgpr_1reg_mode_p (mode)
465 || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
466 || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
467 if (VGPR_REGNO_P (regno))
468 /* Vector instructions do not care about the alignment of register
469 pairs, but where there is no 64-bit instruction, many of the
470 define_split do not work if the input and output registers partially
471 overlap. We tried to fix this with early clobber and match
472 constraints, but it was bug prone, added complexity, and conflicts
473 with the 'U0' constraints on vec_merge.
474 Therefore, we restrict ourselved to aligned registers. */
475 return (vgpr_1reg_mode_p (mode)
476 || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
477 /* TImode is used by DImode compare_and_swap. */
478 || (mode == TImode
479 && !((regno - FIRST_VGPR_REG) & 3)));
480 return false;
481 }
482
483 /* Implement REGNO_REG_CLASS via gcn.h.
484
485 Return smallest class containing REGNO. */
486
487 enum reg_class
488 gcn_regno_reg_class (int regno)
489 {
490 switch (regno)
491 {
492 case SCC_REG:
493 return SCC_CONDITIONAL_REG;
494 case VCC_LO_REG:
495 case VCC_HI_REG:
496 return VCC_CONDITIONAL_REG;
497 case VCCZ_REG:
498 return VCCZ_CONDITIONAL_REG;
499 case EXECZ_REG:
500 return EXECZ_CONDITIONAL_REG;
501 case EXEC_LO_REG:
502 case EXEC_HI_REG:
503 return EXEC_MASK_REG;
504 }
505 if (VGPR_REGNO_P (regno))
506 return VGPR_REGS;
507 if (SGPR_REGNO_P (regno))
508 return SGPR_REGS;
509 if (regno < FIRST_VGPR_REG)
510 return GENERAL_REGS;
511 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
512 return AFP_REGS;
513 return ALL_REGS;
514 }
515
516 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
517
518 GCC assumes that lowpart contains first part of value as stored in memory.
519 This is not the case for vector registers. */
520
521 bool
522 gcn_can_change_mode_class (machine_mode from, machine_mode to,
523 reg_class_t regclass)
524 {
525 if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
526 return true;
527 return (gcn_class_max_nregs (regclass, from)
528 == gcn_class_max_nregs (regclass, to));
529 }
530
531 /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
532
533 When this hook returns true for MODE, the compiler allows
534 registers explicitly used in the rtl to be used as spill registers
535 but prevents the compiler from extending the lifetime of these
536 registers. */
537
538 bool
539 gcn_small_register_classes_for_mode_p (machine_mode mode)
540 {
541 /* We allocate into exec and vcc regs. Those make small register class. */
542 return mode == DImode || mode == SImode;
543 }
544
545 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
546
547 Returns true if pseudos that have been assigned to registers of class RCLASS
548 would likely be spilled because registers of RCLASS are needed for spill
549 registers. */
550
551 static bool
552 gcn_class_likely_spilled_p (reg_class_t rclass)
553 {
554 return (rclass == EXEC_MASK_REG
555 || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
556 }
557
558 /* Implement TARGET_MODES_TIEABLE_P.
559
560 Returns true if a value of MODE1 is accessible in MODE2 without
561 copying. */
562
563 bool
564 gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
565 {
566 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
567 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
568 }
569
570 /* Implement TARGET_TRULY_NOOP_TRUNCATION.
571
572 Returns true if it is safe to “convert” a value of INPREC bits to one of
573 OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
574 it as if it had only OUTPREC bits. */
575
576 bool
577 gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
578 {
579 return ((inprec <= 32) && (outprec <= inprec));
580 }
581
582 /* Return N-th part of value occupying multiple registers. */
583
584 rtx
585 gcn_operand_part (machine_mode mode, rtx op, int n)
586 {
587 if (GET_MODE_SIZE (mode) >= 256)
588 {
589 /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */
590
591 if (REG_P (op))
592 {
593 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
594 return gen_rtx_REG (V64SImode, REGNO (op) + n);
595 }
596 if (GET_CODE (op) == CONST_VECTOR)
597 {
598 int units = GET_MODE_NUNITS (mode);
599 rtvec v = rtvec_alloc (units);
600
601 for (int i = 0; i < units; ++i)
602 RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
603 CONST_VECTOR_ELT (op, i), n);
604
605 return gen_rtx_CONST_VECTOR (V64SImode, v);
606 }
607 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
608 return gcn_gen_undef (V64SImode);
609 gcc_unreachable ();
610 }
611 else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
612 {
613 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
614 return gen_rtx_REG (SImode, REGNO (op) + n);
615 }
616 else
617 {
618 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
619 return gcn_gen_undef (SImode);
620
621 /* If it's a constant then let's assume it is of the largest mode
622 available, otherwise simplify_gen_subreg will fail. */
623 if (mode == VOIDmode && CONST_INT_P (op))
624 mode = DImode;
625 return simplify_gen_subreg (SImode, op, mode, n * 4);
626 }
627 }
628
629 /* Return N-th part of value occupying multiple registers. */
630
631 rtx
632 gcn_operand_doublepart (machine_mode mode, rtx op, int n)
633 {
634 return simplify_gen_subreg (DImode, op, mode, n * 8);
635 }
636
637 /* Return true if OP can be split into subregs or high/low parts.
638 This is always true for scalars, but not normally true for vectors.
639 However, for vectors in hardregs we can use the low and high registers. */
640
641 bool
642 gcn_can_split_p (machine_mode, rtx op)
643 {
644 if (vgpr_vector_mode_p (GET_MODE (op)))
645 {
646 if (GET_CODE (op) == SUBREG)
647 op = SUBREG_REG (op);
648 if (!REG_P (op))
649 return true;
650 return REGNO (op) <= FIRST_PSEUDO_REGISTER;
651 }
652 return true;
653 }
654
655 /* Implement TARGET_SPILL_CLASS.
656
657 Return class of registers which could be used for pseudo of MODE
658 and of class RCLASS for spilling instead of memory. Return NO_REGS
659 if it is not possible or non-profitable. */
660
661 static reg_class_t
662 gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
663 {
664 if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
665 || c == VCC_CONDITIONAL_REG)
666 return SGPR_REGS;
667 else
668 return NO_REGS;
669 }
670
671 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
672
673 Change allocno class for given pseudo from allocno and best class
674 calculated by IRA. */
675
676 static reg_class_t
677 gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
678 reg_class_t best_cl)
679 {
680 /* Avoid returning classes that contain both vgpr and sgpr registers. */
681 if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
682 return cl;
683 if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
684 && best_cl != ALL_GPR_REGS)
685 return best_cl;
686
687 machine_mode mode = PSEUDO_REGNO_MODE (regno);
688 if (vgpr_vector_mode_p (mode))
689 return VGPR_REGS;
690
691 return GENERAL_REGS;
692 }
693
694 /* Create a new DImode pseudo reg and emit an instruction to initialize
695 it to VAL. */
696
697 static rtx
698 get_exec (int64_t val)
699 {
700 rtx reg = gen_reg_rtx (DImode);
701 emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
702 return reg;
703 }
704
705 /* Return value of scalar exec register. */
706
707 rtx
708 gcn_scalar_exec ()
709 {
710 return const1_rtx;
711 }
712
713 /* Return pseudo holding scalar exec register. */
714
715 rtx
716 gcn_scalar_exec_reg ()
717 {
718 return get_exec (1);
719 }
720
721 /* Return value of full exec register. */
722
723 rtx
724 gcn_full_exec ()
725 {
726 return constm1_rtx;
727 }
728
729 /* Return pseudo holding full exec register. */
730
731 rtx
732 gcn_full_exec_reg ()
733 {
734 return get_exec (-1);
735 }
736
737 /* }}} */
738 /* {{{ Immediate constants. */
739
740 /* Initialize shared numeric constants. */
741
742 static void
743 init_ext_gcn_constants (void)
744 {
745 real_from_integer (&dconst4, DFmode, 4, SIGNED);
746
747 /* FIXME: this constant probably does not match what hardware really loads.
748 Reality check it eventually. */
749 real_from_string (&dconst1over2pi,
750 "0.1591549430918953357663423455968866839");
751 real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
752
753 ext_gcn_constants_init = 1;
754 }
755
756 /* Return non-zero if X is a constant that can appear as an inline operand.
757 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
758 Or a vector of those.
759 The value returned should be the encoding of this constant. */
760
761 int
762 gcn_inline_fp_constant_p (rtx x, bool allow_vector)
763 {
764 machine_mode mode = GET_MODE (x);
765
766 if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
767 && allow_vector)
768 {
769 int n;
770 if (GET_CODE (x) != CONST_VECTOR)
771 return 0;
772 n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
773 if (!n)
774 return 0;
775 for (int i = 1; i < 64; i++)
776 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
777 return 0;
778 return 1;
779 }
780
781 if (mode != HFmode && mode != SFmode && mode != DFmode)
782 return 0;
783
784 const REAL_VALUE_TYPE *r;
785
786 if (x == CONST0_RTX (mode))
787 return 128;
788 if (x == CONST1_RTX (mode))
789 return 242;
790
791 r = CONST_DOUBLE_REAL_VALUE (x);
792
793 if (real_identical (r, &dconstm1))
794 return 243;
795
796 if (real_identical (r, &dconsthalf))
797 return 240;
798 if (real_identical (r, &dconstm1))
799 return 243;
800 if (real_identical (r, &dconst2))
801 return 244;
802 if (real_identical (r, &dconst4))
803 return 246;
804 if (real_identical (r, &dconst1over2pi))
805 return 248;
806 if (!ext_gcn_constants_init)
807 init_ext_gcn_constants ();
808 real_value_negate (r);
809 if (real_identical (r, &dconsthalf))
810 return 241;
811 if (real_identical (r, &dconst2))
812 return 245;
813 if (real_identical (r, &dconst4))
814 return 247;
815
816 /* FIXME: add 4, -4 and 1/(2*PI). */
817
818 return 0;
819 }
820
821 /* Return non-zero if X is a constant that can appear as an immediate operand.
822 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
823 Or a vector of those.
824 The value returned should be the encoding of this constant. */
825
826 bool
827 gcn_fp_constant_p (rtx x, bool allow_vector)
828 {
829 machine_mode mode = GET_MODE (x);
830
831 if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
832 && allow_vector)
833 {
834 int n;
835 if (GET_CODE (x) != CONST_VECTOR)
836 return false;
837 n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
838 if (!n)
839 return false;
840 for (int i = 1; i < 64; i++)
841 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
842 return false;
843 return true;
844 }
845 if (mode != HFmode && mode != SFmode && mode != DFmode)
846 return false;
847
848 if (gcn_inline_fp_constant_p (x, false))
849 return true;
850 /* FIXME: It is not clear how 32bit immediates are interpreted here. */
851 return (mode != DFmode);
852 }
853
854 /* Return true if X is a constant representable as an inline immediate
855 constant in a 32-bit instruction encoding. */
856
857 bool
858 gcn_inline_constant_p (rtx x)
859 {
860 if (GET_CODE (x) == CONST_INT)
861 return INTVAL (x) >= -16 && INTVAL (x) <= 64;
862 if (GET_CODE (x) == CONST_DOUBLE)
863 return gcn_inline_fp_constant_p (x, false);
864 if (GET_CODE (x) == CONST_VECTOR)
865 {
866 int n;
867 if (!vgpr_vector_mode_p (GET_MODE (x)))
868 return false;
869 n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
870 if (!n)
871 return false;
872 for (int i = 1; i < 64; i++)
873 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
874 return false;
875 return 1;
876 }
877 return false;
878 }
879
880 /* Return true if X is a constant representable as an immediate constant
881 in a 32 or 64-bit instruction encoding. */
882
883 bool
884 gcn_constant_p (rtx x)
885 {
886 switch (GET_CODE (x))
887 {
888 case CONST_INT:
889 return true;
890
891 case CONST_DOUBLE:
892 return gcn_fp_constant_p (x, false);
893
894 case CONST_VECTOR:
895 {
896 int n;
897 if (!vgpr_vector_mode_p (GET_MODE (x)))
898 return false;
899 n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
900 if (!n)
901 return false;
902 for (int i = 1; i < 64; i++)
903 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
904 return false;
905 return true;
906 }
907
908 case SYMBOL_REF:
909 case LABEL_REF:
910 return true;
911
912 default:
913 ;
914 }
915
916 return false;
917 }
918
919 /* Return true if X is a constant representable as two inline immediate
920 constants in a 64-bit instruction that is split into two 32-bit
921 instructions.
922 When MIXED is set, the low-part is permitted to use the full 32-bits. */
923
924 bool
925 gcn_inline_constant64_p (rtx x, bool mixed)
926 {
927 if (GET_CODE (x) == CONST_VECTOR)
928 {
929 if (!vgpr_vector_mode_p (GET_MODE (x)))
930 return false;
931 if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed))
932 return false;
933 for (int i = 1; i < 64; i++)
934 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
935 return false;
936
937 return true;
938 }
939
940 if (GET_CODE (x) != CONST_INT)
941 return false;
942
943 rtx val_lo = gcn_operand_part (DImode, x, 0);
944 rtx val_hi = gcn_operand_part (DImode, x, 1);
945 return ((mixed || gcn_inline_constant_p (val_lo))
946 && gcn_inline_constant_p (val_hi));
947 }
948
949 /* Return true if X is a constant representable as an immediate constant
950 in a 32 or 64-bit instruction encoding where the hardware will
951 extend the immediate to 64-bits. */
952
953 bool
954 gcn_constant64_p (rtx x)
955 {
956 if (!gcn_constant_p (x))
957 return false;
958
959 if (GET_CODE (x) != CONST_INT)
960 return true;
961
962 /* Negative numbers are only allowed if they can be encoded within src0,
963 because the 32-bit immediates do not get sign-extended.
964 Unsigned numbers must not be encodable as 32-bit -1..-16, because the
965 assembler will use a src0 inline immediate and that will get
966 sign-extended. */
967 HOST_WIDE_INT val = INTVAL (x);
968 return (((val & 0xffffffff) == val /* Positive 32-bit. */
969 && (val & 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */
970 || gcn_inline_constant_p (x)); /* Src0. */
971 }
972
973 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
974
975 Returns true if X is a legitimate constant for a MODE immediate operand. */
976
977 bool
978 gcn_legitimate_constant_p (machine_mode, rtx x)
979 {
980 return gcn_constant_p (x);
981 }
982
983 /* Return true if X is a CONST_VECTOR of single constant. */
984
985 static bool
986 single_cst_vector_p (rtx x)
987 {
988 if (GET_CODE (x) != CONST_VECTOR)
989 return false;
990 for (int i = 1; i < 64; i++)
991 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
992 return false;
993 return true;
994 }
995
996 /* Create a CONST_VECTOR of duplicated value A. */
997
998 rtx
999 gcn_vec_constant (machine_mode mode, int a)
1000 {
1001 /*if (!a)
1002 return CONST0_RTX (mode);
1003 if (a == -1)
1004 return CONSTM1_RTX (mode);
1005 if (a == 1)
1006 return CONST1_RTX (mode);
1007 if (a == 2)
1008 return CONST2_RTX (mode);*/
1009
1010 int units = GET_MODE_NUNITS (mode);
1011 machine_mode innermode = GET_MODE_INNER (mode);
1012
1013 rtx tem;
1014 if (FLOAT_MODE_P (innermode))
1015 {
1016 REAL_VALUE_TYPE rv;
1017 real_from_integer (&rv, NULL, a, SIGNED);
1018 tem = const_double_from_real_value (rv, innermode);
1019 }
1020 else
1021 tem = gen_int_mode (a, innermode);
1022
1023 rtvec v = rtvec_alloc (units);
1024 for (int i = 0; i < units; ++i)
1025 RTVEC_ELT (v, i) = tem;
1026
1027 return gen_rtx_CONST_VECTOR (mode, v);
1028 }
1029
1030 /* Create a CONST_VECTOR of duplicated value A. */
1031
1032 rtx
1033 gcn_vec_constant (machine_mode mode, rtx a)
1034 {
1035 int units = GET_MODE_NUNITS (mode);
1036 rtvec v = rtvec_alloc (units);
1037
1038 for (int i = 0; i < units; ++i)
1039 RTVEC_ELT (v, i) = a;
1040
1041 return gen_rtx_CONST_VECTOR (mode, v);
1042 }
1043
1044 /* Create an undefined vector value, used where an insn operand is
1045 optional. */
1046
1047 rtx
1048 gcn_gen_undef (machine_mode mode)
1049 {
1050 return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
1051 }
1052
1053 /* }}} */
1054 /* {{{ Addresses, pointers and moves. */
1055
1056 /* Return true is REG is a valid place to store a pointer,
1057 for instructions that require an SGPR.
1058 FIXME rename. */
1059
1060 static bool
1061 gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
1062 {
1063 if (GET_CODE (reg) == SUBREG)
1064 reg = SUBREG_REG (reg);
1065
1066 if (!REG_P (reg))
1067 return false;
1068
1069 if (GET_MODE (reg) != mode)
1070 return false;
1071
1072 int regno = REGNO (reg);
1073
1074 if (regno >= FIRST_PSEUDO_REGISTER)
1075 {
1076 if (!strict)
1077 return true;
1078
1079 if (!reg_renumber)
1080 return false;
1081
1082 regno = reg_renumber[regno];
1083 }
1084
1085 return (SGPR_REGNO_P (regno) || regno == M0_REG
1086 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1087 }
1088
1089 /* Return true is REG is a valid place to store a pointer,
1090 for instructions that require a VGPR. */
1091
1092 static bool
1093 gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
1094 {
1095 if (GET_CODE (reg) == SUBREG)
1096 reg = SUBREG_REG (reg);
1097
1098 if (!REG_P (reg))
1099 return false;
1100
1101 if (GET_MODE (reg) != mode)
1102 return false;
1103
1104 int regno = REGNO (reg);
1105
1106 if (regno >= FIRST_PSEUDO_REGISTER)
1107 {
1108 if (!strict)
1109 return true;
1110
1111 if (!reg_renumber)
1112 return false;
1113
1114 regno = reg_renumber[regno];
1115 }
1116
1117 return VGPR_REGNO_P (regno);
1118 }
1119
1120 /* Return true if X would be valid inside a MEM using the Flat address
1121 space. */
1122
1123 bool
1124 gcn_flat_address_p (rtx x, machine_mode mode)
1125 {
1126 bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1127 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1128
1129 if (vec_mode && gcn_address_register_p (x, DImode, false))
1130 return true;
1131
1132 if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
1133 return true;
1134
1135 if (TARGET_GCN5_PLUS
1136 && GET_CODE (x) == PLUS
1137 && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
1138 && CONST_INT_P (XEXP (x, 1)))
1139 return true;
1140
1141 return false;
1142 }
1143
1144 /* Return true if X would be valid inside a MEM using the Scalar Flat
1145 address space. */
1146
1147 bool
1148 gcn_scalar_flat_address_p (rtx x)
1149 {
1150 if (gcn_address_register_p (x, DImode, false))
1151 return true;
1152
1153 if (GET_CODE (x) == PLUS
1154 && gcn_address_register_p (XEXP (x, 0), DImode, false)
1155 && CONST_INT_P (XEXP (x, 1)))
1156 return true;
1157
1158 return false;
1159 }
1160
1161 /* Return true if MEM X would be valid for the Scalar Flat address space. */
1162
1163 bool
1164 gcn_scalar_flat_mem_p (rtx x)
1165 {
1166 if (!MEM_P (x))
1167 return false;
1168
1169 if (GET_MODE_SIZE (GET_MODE (x)) < 4)
1170 return false;
1171
1172 return gcn_scalar_flat_address_p (XEXP (x, 0));
1173 }
1174
1175 /* Return true if X would be valid inside a MEM using the LDS or GDS
1176 address spaces. */
1177
1178 bool
1179 gcn_ds_address_p (rtx x)
1180 {
1181 if (gcn_vec_address_register_p (x, SImode, false))
1182 return true;
1183
1184 if (GET_CODE (x) == PLUS
1185 && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
1186 && CONST_INT_P (XEXP (x, 1)))
1187 return true;
1188
1189 return false;
1190 }
1191
1192 /* Return true if ADDR would be valid inside a MEM using the Global
1193 address space. */
1194
1195 bool
1196 gcn_global_address_p (rtx addr)
1197 {
1198 if (gcn_address_register_p (addr, DImode, false)
1199 || gcn_vec_address_register_p (addr, DImode, false))
1200 return true;
1201
1202 if (GET_CODE (addr) == PLUS)
1203 {
1204 rtx base = XEXP (addr, 0);
1205 rtx offset = XEXP (addr, 1);
1206 bool immediate_p = (CONST_INT_P (offset)
1207 && INTVAL (offset) >= -(1 << 12)
1208 && INTVAL (offset) < (1 << 12));
1209
1210 if ((gcn_address_register_p (base, DImode, false)
1211 || gcn_vec_address_register_p (base, DImode, false))
1212 && immediate_p)
1213 /* SGPR + CONST or VGPR + CONST */
1214 return true;
1215
1216 if (gcn_address_register_p (base, DImode, false)
1217 && gcn_vgpr_register_operand (offset, SImode))
1218 /* SPGR + VGPR */
1219 return true;
1220
1221 if (GET_CODE (base) == PLUS
1222 && gcn_address_register_p (XEXP (base, 0), DImode, false)
1223 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1224 && immediate_p)
1225 /* (SGPR + VGPR) + CONST */
1226 return true;
1227 }
1228
1229 return false;
1230 }
1231
1232 /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1233
1234 Recognizes RTL expressions that are valid memory addresses for an
1235 instruction. The MODE argument is the machine mode for the MEM
1236 expression that wants to use this address.
1237
1238 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1239 convert common non-canonical forms to canonical form so that they will
1240 be recognized. */
1241
1242 static bool
1243 gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
1244 addr_space_t as)
1245 {
1246 /* All vector instructions need to work on addresses in registers. */
1247 if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
1248 return false;
1249
1250 if (AS_SCALAR_FLAT_P (as))
1251 {
1252 if (mode == QImode || mode == HImode)
1253 return 0;
1254
1255 switch (GET_CODE (x))
1256 {
1257 case REG:
1258 return gcn_address_register_p (x, DImode, strict);
1259 /* Addresses are in the form BASE+OFFSET
1260 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1261 Writes and atomics do not accept SGPR. */
1262 case PLUS:
1263 {
1264 rtx x0 = XEXP (x, 0);
1265 rtx x1 = XEXP (x, 1);
1266 if (!gcn_address_register_p (x0, DImode, strict))
1267 return false;
1268 /* FIXME: This is disabled because of the mode mismatch between
1269 SImode (for the address or m0 register) and the DImode PLUS.
1270 We'll need a zero_extend or similar.
1271
1272 if (gcn_m0_register_p (x1, SImode, strict)
1273 || gcn_address_register_p (x1, SImode, strict))
1274 return true;
1275 else*/
1276 if (GET_CODE (x1) == CONST_INT)
1277 {
1278 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
1279 /* The low bits of the offset are ignored, even when
1280 they're meant to realign the pointer. */
1281 && !(INTVAL (x1) & 0x3))
1282 return true;
1283 }
1284 return false;
1285 }
1286
1287 default:
1288 break;
1289 }
1290 }
1291 else if (AS_SCRATCH_P (as))
1292 return gcn_address_register_p (x, SImode, strict);
1293 else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
1294 {
1295 if (TARGET_GCN3 || GET_CODE (x) == REG)
1296 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1297 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1298 ? gcn_address_register_p (x, DImode, strict)
1299 : gcn_vec_address_register_p (x, DImode, strict));
1300 else
1301 {
1302 gcc_assert (TARGET_GCN5_PLUS);
1303
1304 if (GET_CODE (x) == PLUS)
1305 {
1306 rtx x1 = XEXP (x, 1);
1307
1308 if (VECTOR_MODE_P (mode)
1309 ? !gcn_address_register_p (x, DImode, strict)
1310 : !gcn_vec_address_register_p (x, DImode, strict))
1311 return false;
1312
1313 if (GET_CODE (x1) == CONST_INT)
1314 {
1315 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
1316 /* The low bits of the offset are ignored, even when
1317 they're meant to realign the pointer. */
1318 && !(INTVAL (x1) & 0x3))
1319 return true;
1320 }
1321 }
1322 return false;
1323 }
1324 }
1325 else if (AS_GLOBAL_P (as))
1326 {
1327 gcc_assert (TARGET_GCN5_PLUS);
1328
1329 if (GET_CODE (x) == REG)
1330 return (gcn_address_register_p (x, DImode, strict)
1331 || (!VECTOR_MODE_P (mode)
1332 && gcn_vec_address_register_p (x, DImode, strict)));
1333 else if (GET_CODE (x) == PLUS)
1334 {
1335 rtx base = XEXP (x, 0);
1336 rtx offset = XEXP (x, 1);
1337
1338 bool immediate_p = (GET_CODE (offset) == CONST_INT
1339 /* Signed 13-bit immediate. */
1340 && INTVAL (offset) >= -(1 << 12)
1341 && INTVAL (offset) < (1 << 12)
1342 /* The low bits of the offset are ignored, even
1343 when they're meant to realign the pointer. */
1344 && !(INTVAL (offset) & 0x3));
1345
1346 if (!VECTOR_MODE_P (mode))
1347 {
1348 if ((gcn_address_register_p (base, DImode, strict)
1349 || gcn_vec_address_register_p (base, DImode, strict))
1350 && immediate_p)
1351 /* SGPR + CONST or VGPR + CONST */
1352 return true;
1353
1354 if (gcn_address_register_p (base, DImode, strict)
1355 && gcn_vgpr_register_operand (offset, SImode))
1356 /* SGPR + VGPR */
1357 return true;
1358
1359 if (GET_CODE (base) == PLUS
1360 && gcn_address_register_p (XEXP (base, 0), DImode, strict)
1361 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1362 && immediate_p)
1363 /* (SGPR + VGPR) + CONST */
1364 return true;
1365 }
1366 else
1367 {
1368 if (gcn_address_register_p (base, DImode, strict)
1369 && immediate_p)
1370 /* SGPR + CONST */
1371 return true;
1372 }
1373 }
1374 else
1375 return false;
1376 }
1377 else if (AS_ANY_DS_P (as))
1378 switch (GET_CODE (x))
1379 {
1380 case REG:
1381 return (VECTOR_MODE_P (mode)
1382 ? gcn_address_register_p (x, SImode, strict)
1383 : gcn_vec_address_register_p (x, SImode, strict));
1384 /* Addresses are in the form BASE+OFFSET
1385 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1386 Writes and atomics do not accept SGPR. */
1387 case PLUS:
1388 {
1389 rtx x0 = XEXP (x, 0);
1390 rtx x1 = XEXP (x, 1);
1391 if (!gcn_vec_address_register_p (x0, DImode, strict))
1392 return false;
1393 if (GET_CODE (x1) == REG)
1394 {
1395 if (GET_CODE (x1) != REG
1396 || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
1397 && !gcn_ssrc_register_operand (x1, DImode)))
1398 return false;
1399 }
1400 else if (GET_CODE (x1) == CONST_VECTOR
1401 && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
1402 && single_cst_vector_p (x1))
1403 {
1404 x1 = CONST_VECTOR_ELT (x1, 0);
1405 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
1406 return true;
1407 }
1408 return false;
1409 }
1410
1411 default:
1412 break;
1413 }
1414 else
1415 gcc_unreachable ();
1416 return false;
1417 }
1418
1419 /* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1420
1421 Return the appropriate mode for a named address pointer. */
1422
1423 static scalar_int_mode
1424 gcn_addr_space_pointer_mode (addr_space_t addrspace)
1425 {
1426 switch (addrspace)
1427 {
1428 case ADDR_SPACE_SCRATCH:
1429 case ADDR_SPACE_LDS:
1430 case ADDR_SPACE_GDS:
1431 return SImode;
1432 case ADDR_SPACE_DEFAULT:
1433 case ADDR_SPACE_FLAT:
1434 case ADDR_SPACE_FLAT_SCRATCH:
1435 case ADDR_SPACE_SCALAR_FLAT:
1436 return DImode;
1437 default:
1438 gcc_unreachable ();
1439 }
1440 }
1441
1442 /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1443
1444 Return the appropriate mode for a named address space address. */
1445
1446 static scalar_int_mode
1447 gcn_addr_space_address_mode (addr_space_t addrspace)
1448 {
1449 return gcn_addr_space_pointer_mode (addrspace);
1450 }
1451
1452 /* Implement TARGET_ADDR_SPACE_SUBSET_P.
1453
1454 Determine if one named address space is a subset of another. */
1455
1456 static bool
1457 gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
1458 {
1459 if (subset == superset)
1460 return true;
1461 /* FIXME is this true? */
1462 if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
1463 return true;
1464 return false;
1465 }
1466
1467 /* Convert from one address space to another. */
1468
1469 static rtx
1470 gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
1471 {
1472 gcc_assert (POINTER_TYPE_P (from_type));
1473 gcc_assert (POINTER_TYPE_P (to_type));
1474
1475 addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
1476 addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
1477
1478 if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
1479 {
1480 rtx queue = gen_rtx_REG (DImode,
1481 cfun->machine->args.reg[QUEUE_PTR_ARG]);
1482 rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
1483 gen_rtx_PLUS (DImode, queue,
1484 gen_int_mode (64, SImode)));
1485 rtx tmp = gen_reg_rtx (DImode);
1486
1487 emit_move_insn (gen_lowpart (SImode, tmp), op);
1488 emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
1489 group_seg_aperture_hi);
1490
1491 return tmp;
1492 }
1493 else if (as_from == as_to)
1494 return op;
1495 else
1496 gcc_unreachable ();
1497 }
1498
1499
1500 /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1501
1502 Retun true if REGNO is OK for memory adressing. */
1503
1504 bool
1505 gcn_regno_mode_code_ok_for_base_p (int regno,
1506 machine_mode, addr_space_t as, int, int)
1507 {
1508 if (regno >= FIRST_PSEUDO_REGISTER)
1509 {
1510 if (reg_renumber)
1511 regno = reg_renumber[regno];
1512 else
1513 return true;
1514 }
1515 if (AS_FLAT_P (as))
1516 return (VGPR_REGNO_P (regno)
1517 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1518 else if (AS_SCALAR_FLAT_P (as))
1519 return (SGPR_REGNO_P (regno)
1520 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1521 else if (AS_GLOBAL_P (as))
1522 {
1523 return (SGPR_REGNO_P (regno)
1524 || VGPR_REGNO_P (regno)
1525 || regno == ARG_POINTER_REGNUM
1526 || regno == FRAME_POINTER_REGNUM);
1527 }
1528 else
1529 /* For now. */
1530 return false;
1531 }
1532
1533 /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1534
1535 Return a suitable register class for memory addressing. */
1536
1537 reg_class
1538 gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
1539 int ic)
1540 {
1541 switch (as)
1542 {
1543 case ADDR_SPACE_DEFAULT:
1544 return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
1545 case ADDR_SPACE_SCALAR_FLAT:
1546 case ADDR_SPACE_SCRATCH:
1547 return SGPR_REGS;
1548 break;
1549 case ADDR_SPACE_FLAT:
1550 case ADDR_SPACE_FLAT_SCRATCH:
1551 case ADDR_SPACE_LDS:
1552 case ADDR_SPACE_GDS:
1553 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1554 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1555 ? SGPR_REGS : VGPR_REGS);
1556 case ADDR_SPACE_GLOBAL:
1557 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1558 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1559 ? SGPR_REGS : ALL_GPR_REGS);
1560 }
1561 gcc_unreachable ();
1562 }
1563
1564 /* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
1565
1566 Return true if REGNO is OK for index of memory addressing. */
1567
1568 bool
1569 regno_ok_for_index_p (int regno)
1570 {
1571 if (regno >= FIRST_PSEUDO_REGISTER)
1572 {
1573 if (reg_renumber)
1574 regno = reg_renumber[regno];
1575 else
1576 return true;
1577 }
1578 return regno == M0_REG || VGPR_REGNO_P (regno);
1579 }
1580
1581 /* Generate move which uses the exec flags. If EXEC is NULL, then it is
1582 assumed that all lanes normally relevant to the mode of the move are
1583 affected. If PREV is NULL, then a sensible default is supplied for
1584 the inactive lanes. */
1585
1586 static rtx
1587 gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
1588 {
1589 machine_mode mode = GET_MODE (op0);
1590
1591 if (vgpr_vector_mode_p (mode))
1592 {
1593 if (exec && exec != CONSTM1_RTX (DImode))
1594 {
1595 if (!prev)
1596 prev = op0;
1597 }
1598 else
1599 {
1600 if (!prev)
1601 prev = gcn_gen_undef (mode);
1602 exec = gcn_full_exec_reg ();
1603 }
1604
1605 rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
1606
1607 return gen_rtx_PARALLEL (VOIDmode,
1608 gen_rtvec (2, set,
1609 gen_rtx_CLOBBER (VOIDmode,
1610 gen_rtx_SCRATCH (V64DImode))));
1611 }
1612
1613 return (gen_rtx_PARALLEL
1614 (VOIDmode,
1615 gen_rtvec (2, gen_rtx_SET (op0, op1),
1616 gen_rtx_USE (VOIDmode,
1617 exec ? exec : gcn_scalar_exec ()))));
1618 }
1619
1620 /* Generate masked move. */
1621
1622 static rtx
1623 gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
1624 {
1625 if (exec)
1626 return (gen_rtx_SET (op0,
1627 gen_rtx_VEC_MERGE (GET_MODE (op0),
1628 gen_rtx_VEC_DUPLICATE (GET_MODE
1629 (op0), op1),
1630 op2, exec)));
1631 else
1632 return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
1633 }
1634
1635 /* Expand vector init of OP0 by VEC.
1636 Implements vec_init instruction pattern. */
1637
1638 void
1639 gcn_expand_vector_init (rtx op0, rtx vec)
1640 {
1641 int64_t initialized_mask = 0;
1642 int64_t curr_mask = 1;
1643 machine_mode mode = GET_MODE (op0);
1644
1645 rtx val = XVECEXP (vec, 0, 0);
1646
1647 for (int i = 1; i < 64; i++)
1648 if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
1649 curr_mask |= (int64_t) 1 << i;
1650
1651 if (gcn_constant_p (val))
1652 emit_move_insn (op0, gcn_vec_constant (mode, val));
1653 else
1654 {
1655 val = force_reg (GET_MODE_INNER (mode), val);
1656 emit_insn (gen_duplicate_load (op0, val));
1657 }
1658 initialized_mask |= curr_mask;
1659 for (int i = 1; i < 64; i++)
1660 if (!(initialized_mask & ((int64_t) 1 << i)))
1661 {
1662 curr_mask = (int64_t) 1 << i;
1663 rtx val = XVECEXP (vec, 0, i);
1664
1665 for (int j = i + 1; j < 64; j++)
1666 if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
1667 curr_mask |= (int64_t) 1 << j;
1668 if (gcn_constant_p (val))
1669 emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
1670 get_exec (curr_mask)));
1671 else
1672 {
1673 val = force_reg (GET_MODE_INNER (mode), val);
1674 emit_insn (gen_duplicate_load (op0, val, op0,
1675 get_exec (curr_mask)));
1676 }
1677 initialized_mask |= curr_mask;
1678 }
1679 }
1680
1681 /* Load vector constant where n-th lane contains BASE+n*VAL. */
1682
1683 static rtx
1684 strided_constant (machine_mode mode, int base, int val)
1685 {
1686 rtx x = gen_reg_rtx (mode);
1687 emit_move_insn (x, gcn_vec_constant (mode, base));
1688 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
1689 x, get_exec (0xffffffff00000000)));
1690 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
1691 x, get_exec (0xffff0000ffff0000)));
1692 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
1693 x, get_exec (0xff00ff00ff00ff00)));
1694 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
1695 x, get_exec (0xf0f0f0f0f0f0f0f0)));
1696 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
1697 x, get_exec (0xcccccccccccccccc)));
1698 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
1699 x, get_exec (0xaaaaaaaaaaaaaaaa)));
1700 return x;
1701 }
1702
1703 /* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */
1704
1705 static rtx
1706 gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
1707 addr_space_t as)
1708 {
1709 switch (as)
1710 {
1711 case ADDR_SPACE_DEFAULT:
1712 return gcn_addr_space_legitimize_address (x, old, mode,
1713 DEFAULT_ADDR_SPACE);
1714 case ADDR_SPACE_SCALAR_FLAT:
1715 case ADDR_SPACE_SCRATCH:
1716 /* Instructions working on vectors need the address to be in
1717 a register. */
1718 if (vgpr_vector_mode_p (mode))
1719 return force_reg (GET_MODE (x), x);
1720
1721 return x;
1722 case ADDR_SPACE_FLAT:
1723 case ADDR_SPACE_FLAT_SCRATCH:
1724 case ADDR_SPACE_GLOBAL:
1725 return TARGET_GCN3 ? force_reg (DImode, x) : x;
1726 case ADDR_SPACE_LDS:
1727 case ADDR_SPACE_GDS:
1728 /* FIXME: LDS support offsets, handle them!. */
1729 if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
1730 {
1731 rtx addrs = gen_reg_rtx (V64SImode);
1732 rtx base = force_reg (SImode, x);
1733 rtx offsets = strided_constant (V64SImode, 0,
1734 GET_MODE_UNIT_SIZE (mode));
1735
1736 emit_insn (gen_vec_duplicatev64si (addrs, base));
1737 emit_insn (gen_addv64si3 (addrs, offsets, addrs));
1738 return addrs;
1739 }
1740 return x;
1741 }
1742 gcc_unreachable ();
1743 }
1744
1745 /* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
1746 proper vector of stepped addresses.
1747
1748 MEM will be a DImode address of a vector in an SGPR.
1749 TMP will be a V64DImode VGPR pair or (scratch:V64DI). */
1750
1751 rtx
1752 gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
1753 rtx tmp)
1754 {
1755 gcc_assert (MEM_P (mem));
1756 rtx mem_base = XEXP (mem, 0);
1757 rtx mem_index = NULL_RTX;
1758
1759 if (!TARGET_GCN5_PLUS)
1760 {
1761 /* gcn_addr_space_legitimize_address should have put the address in a
1762 register. If not, it is too late to do anything about it. */
1763 gcc_assert (REG_P (mem_base));
1764 }
1765
1766 if (GET_CODE (mem_base) == PLUS)
1767 {
1768 mem_index = XEXP (mem_base, 1);
1769 mem_base = XEXP (mem_base, 0);
1770 }
1771
1772 /* RF and RM base registers for vector modes should be always an SGPR. */
1773 gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
1774 || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
1775
1776 machine_mode inner = GET_MODE_INNER (mode);
1777 int shift = exact_log2 (GET_MODE_SIZE (inner));
1778 rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
1779 rtx undef_v64si = gcn_gen_undef (V64SImode);
1780 rtx new_base = NULL_RTX;
1781 addr_space_t as = MEM_ADDR_SPACE (mem);
1782
1783 rtx tmplo = (REG_P (tmp)
1784 ? gcn_operand_part (V64DImode, tmp, 0)
1785 : gen_reg_rtx (V64SImode));
1786
1787 /* tmplo[:] = ramp[:] << shift */
1788 if (exec)
1789 emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
1790 gen_int_mode (shift, SImode),
1791 undef_v64si, exec));
1792 else
1793 emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
1794
1795 if (AS_FLAT_P (as))
1796 {
1797 rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
1798
1799 if (REG_P (tmp))
1800 {
1801 rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
1802 rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
1803 rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
1804
1805 /* tmphi[:] = mem_base_hi */
1806 if (exec)
1807 emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
1808 undef_v64si, exec));
1809 else
1810 emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
1811
1812 /* tmp[:] += zext (mem_base) */
1813 if (exec)
1814 {
1815 emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
1816 vcc, undef_v64si, exec));
1817 emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
1818 vcc, vcc, undef_v64si, exec));
1819 }
1820 else
1821 emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
1822 }
1823 else
1824 {
1825 tmp = gen_reg_rtx (V64DImode);
1826 if (exec)
1827 emit_insn (gen_addv64di3_vcc_zext_dup2_exec
1828 (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
1829 exec));
1830 else
1831 emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
1832 }
1833
1834 new_base = tmp;
1835 }
1836 else if (AS_ANY_DS_P (as))
1837 {
1838 if (!exec)
1839 emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
1840 else
1841 emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
1842 gcn_gen_undef (V64SImode), exec));
1843 new_base = tmplo;
1844 }
1845 else
1846 {
1847 mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
1848 new_base = gen_rtx_PLUS (V64DImode, mem_base,
1849 gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
1850 }
1851
1852 return gen_rtx_PLUS (GET_MODE (new_base), new_base,
1853 gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
1854 (mem_index ? mem_index
1855 : const0_rtx)));
1856 }
1857
1858 /* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
1859 suitable for the given address space. This is indented for use in
1860 gather/scatter patterns.
1861
1862 The offsets may be signed or unsigned, according to UNSIGNED_P.
1863 If EXEC is set then _exec patterns will be used, otherwise plain.
1864
1865 Return values.
1866 ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses.
1867 ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */
1868
1869 rtx
1870 gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
1871 bool unsigned_p, rtx exec)
1872 {
1873 rtx tmpsi = gen_reg_rtx (V64SImode);
1874 rtx tmpdi = gen_reg_rtx (V64DImode);
1875 rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
1876 rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
1877
1878 if (CONST_INT_P (scale)
1879 && INTVAL (scale) > 0
1880 && exact_log2 (INTVAL (scale)) >= 0)
1881 emit_insn (gen_ashlv64si3 (tmpsi, offsets,
1882 GEN_INT (exact_log2 (INTVAL (scale)))));
1883 else
1884 (exec
1885 ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
1886 exec))
1887 : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
1888
1889 /* "Global" instructions do not support negative register offsets. */
1890 if (as == ADDR_SPACE_FLAT || !unsigned_p)
1891 {
1892 if (unsigned_p)
1893 (exec
1894 ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
1895 undefdi, exec))
1896 : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
1897 else
1898 (exec
1899 ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
1900 undefdi, exec))
1901 : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
1902 return tmpdi;
1903 }
1904 else if (as == ADDR_SPACE_GLOBAL)
1905 return tmpsi;
1906
1907 gcc_unreachable ();
1908 }
1909
1910 /* Return true if move from OP0 to OP1 is known to be executed in vector
1911 unit. */
1912
1913 bool
1914 gcn_vgpr_move_p (rtx op0, rtx op1)
1915 {
1916 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1917 return true;
1918 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1919 return true;
1920 return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
1921 || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
1922 || vgpr_vector_mode_p (GET_MODE (op0)));
1923 }
1924
1925 /* Return true if move from OP0 to OP1 is known to be executed in scalar
1926 unit. Used in the machine description. */
1927
1928 bool
1929 gcn_sgpr_move_p (rtx op0, rtx op1)
1930 {
1931 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1932 return true;
1933 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1934 return true;
1935 if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
1936 || VGPR_REGNO_P (REGNO (op0)))
1937 return false;
1938 if (REG_P (op1)
1939 && REGNO (op1) < FIRST_PSEUDO_REGISTER
1940 && !VGPR_REGNO_P (REGNO (op1)))
1941 return true;
1942 return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
1943 }
1944
1945 /* Implement TARGET_SECONDARY_RELOAD.
1946
1947 The address space determines which registers can be used for loads and
1948 stores. */
1949
1950 static reg_class_t
1951 gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
1952 machine_mode reload_mode, secondary_reload_info *sri)
1953 {
1954 reg_class_t result = NO_REGS;
1955 bool spilled_pseudo =
1956 (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
1957
1958 if (dump_file && (dump_flags & TDF_DETAILS))
1959 {
1960 fprintf (dump_file, "gcn_secondary_reload: ");
1961 dump_value_slim (dump_file, x, 1);
1962 fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
1963 reg_class_names[rclass], GET_MODE_NAME (reload_mode));
1964 if (REG_P (x) || GET_CODE (x) == SUBREG)
1965 fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
1966 (true_regnum (x) >= 0
1967 && true_regnum (x) < FIRST_PSEUDO_REGISTER
1968 ? reg_names[true_regnum (x)]
1969 : (spilled_pseudo ? "stack spill" : "??")));
1970 fprintf (dump_file, "\n");
1971 }
1972
1973 /* Some callers don't use or initialize icode. */
1974 sri->icode = CODE_FOR_nothing;
1975
1976 if (MEM_P (x) || spilled_pseudo)
1977 {
1978 addr_space_t as = DEFAULT_ADDR_SPACE;
1979
1980 /* If we have a spilled pseudo, we can't find the address space
1981 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
1982 ADDR_SPACE_GLOBAL for GCN5. */
1983 if (MEM_P (x))
1984 as = MEM_ADDR_SPACE (x);
1985
1986 if (as == ADDR_SPACE_DEFAULT)
1987 as = DEFAULT_ADDR_SPACE;
1988
1989 switch (as)
1990 {
1991 case ADDR_SPACE_SCALAR_FLAT:
1992 result =
1993 ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
1994 break;
1995 case ADDR_SPACE_FLAT:
1996 case ADDR_SPACE_FLAT_SCRATCH:
1997 case ADDR_SPACE_GLOBAL:
1998 if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
1999 || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
2000 {
2001 if (in_p)
2002 switch (reload_mode)
2003 {
2004 case E_V64SImode:
2005 sri->icode = CODE_FOR_reload_inv64si;
2006 break;
2007 case E_V64SFmode:
2008 sri->icode = CODE_FOR_reload_inv64sf;
2009 break;
2010 case E_V64HImode:
2011 sri->icode = CODE_FOR_reload_inv64hi;
2012 break;
2013 case E_V64HFmode:
2014 sri->icode = CODE_FOR_reload_inv64hf;
2015 break;
2016 case E_V64QImode:
2017 sri->icode = CODE_FOR_reload_inv64qi;
2018 break;
2019 case E_V64DImode:
2020 sri->icode = CODE_FOR_reload_inv64di;
2021 break;
2022 case E_V64DFmode:
2023 sri->icode = CODE_FOR_reload_inv64df;
2024 break;
2025 default:
2026 gcc_unreachable ();
2027 }
2028 else
2029 switch (reload_mode)
2030 {
2031 case E_V64SImode:
2032 sri->icode = CODE_FOR_reload_outv64si;
2033 break;
2034 case E_V64SFmode:
2035 sri->icode = CODE_FOR_reload_outv64sf;
2036 break;
2037 case E_V64HImode:
2038 sri->icode = CODE_FOR_reload_outv64hi;
2039 break;
2040 case E_V64HFmode:
2041 sri->icode = CODE_FOR_reload_outv64hf;
2042 break;
2043 case E_V64QImode:
2044 sri->icode = CODE_FOR_reload_outv64qi;
2045 break;
2046 case E_V64DImode:
2047 sri->icode = CODE_FOR_reload_outv64di;
2048 break;
2049 case E_V64DFmode:
2050 sri->icode = CODE_FOR_reload_outv64df;
2051 break;
2052 default:
2053 gcc_unreachable ();
2054 }
2055 break;
2056 }
2057 /* Fallthrough. */
2058 case ADDR_SPACE_LDS:
2059 case ADDR_SPACE_GDS:
2060 case ADDR_SPACE_SCRATCH:
2061 result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
2062 break;
2063 }
2064 }
2065
2066 if (dump_file && (dump_flags & TDF_DETAILS))
2067 fprintf (dump_file, " <= %s (icode: %s)\n", reg_class_names[result],
2068 get_insn_name (sri->icode));
2069
2070 return result;
2071 }
2072
2073 /* Update register usage after having seen the compiler flags and kernel
2074 attributes. We typically want to fix registers that contain values
2075 set by the HSA runtime. */
2076
2077 static void
2078 gcn_conditional_register_usage (void)
2079 {
2080 if (!cfun || !cfun->machine)
2081 return;
2082
2083 if (cfun->machine->normal_function)
2084 {
2085 /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
2086 for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
2087 i <= LAST_SGPR_REG; i++)
2088 fixed_regs[i] = 1, call_used_regs[i] = 1;
2089
2090 for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
2091 i <= LAST_VGPR_REG; i++)
2092 fixed_regs[i] = 1, call_used_regs[i] = 1;
2093
2094 return;
2095 }
2096
2097 /* If the set of requested args is the default set, nothing more needs to
2098 be done. */
2099 if (cfun->machine->args.requested == default_requested_args)
2100 return;
2101
2102 /* Requesting a set of args different from the default violates the ABI. */
2103 if (!leaf_function_p ())
2104 warning (0, "A non-default set of initial values has been requested, "
2105 "which violates the ABI!");
2106
2107 for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
2108 fixed_regs[i] = 0;
2109
2110 /* Fix the runtime argument register containing values that may be
2111 needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2112 needed after the prologue so there's no need to fix them. */
2113 if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
2114 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
2115 if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
2116 {
2117 /* The upper 32-bits of the 64-bit descriptor are not used, so allow
2118 the containing registers to be used for other purposes. */
2119 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
2120 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
2121 }
2122 if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
2123 {
2124 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
2125 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
2126 }
2127 if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
2128 {
2129 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
2130 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
2131 }
2132 if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
2133 fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
2134 if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
2135 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
2136 if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
2137 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
2138 if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
2139 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
2140
2141 if (TARGET_GCN5_PLUS)
2142 /* v0 is always zero, for global nul-offsets. */
2143 fixed_regs[VGPR_REGNO (0)] = 1;
2144 }
2145
2146 /* Determine if a load or store is valid, according to the register classes
2147 and address space. Used primarily by the machine description to decide
2148 when to split a move into two steps. */
2149
2150 bool
2151 gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
2152 {
2153 if (!MEM_P (dest) && !MEM_P (src))
2154 return true;
2155
2156 if (MEM_P (dest)
2157 && AS_FLAT_P (MEM_ADDR_SPACE (dest))
2158 && (gcn_flat_address_p (XEXP (dest, 0), mode)
2159 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2160 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2161 && gcn_vgpr_register_operand (src, mode))
2162 return true;
2163 else if (MEM_P (src)
2164 && AS_FLAT_P (MEM_ADDR_SPACE (src))
2165 && (gcn_flat_address_p (XEXP (src, 0), mode)
2166 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2167 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2168 && gcn_vgpr_register_operand (dest, mode))
2169 return true;
2170
2171 if (MEM_P (dest)
2172 && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
2173 && (gcn_global_address_p (XEXP (dest, 0))
2174 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2175 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2176 && gcn_vgpr_register_operand (src, mode))
2177 return true;
2178 else if (MEM_P (src)
2179 && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
2180 && (gcn_global_address_p (XEXP (src, 0))
2181 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2182 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2183 && gcn_vgpr_register_operand (dest, mode))
2184 return true;
2185
2186 if (MEM_P (dest)
2187 && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
2188 && (gcn_scalar_flat_address_p (XEXP (dest, 0))
2189 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2190 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2191 && gcn_ssrc_register_operand (src, mode))
2192 return true;
2193 else if (MEM_P (src)
2194 && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
2195 && (gcn_scalar_flat_address_p (XEXP (src, 0))
2196 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2197 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2198 && gcn_sdst_register_operand (dest, mode))
2199 return true;
2200
2201 if (MEM_P (dest)
2202 && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
2203 && gcn_ds_address_p (XEXP (dest, 0))
2204 && gcn_vgpr_register_operand (src, mode))
2205 return true;
2206 else if (MEM_P (src)
2207 && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
2208 && gcn_ds_address_p (XEXP (src, 0))
2209 && gcn_vgpr_register_operand (dest, mode))
2210 return true;
2211
2212 return false;
2213 }
2214
2215 /* }}} */
2216 /* {{{ Functions and ABI. */
2217
2218 /* Implement TARGET_FUNCTION_VALUE.
2219
2220 Define how to find the value returned by a function.
2221 The register location is always the same, but the mode depends on
2222 VALTYPE. */
2223
2224 static rtx
2225 gcn_function_value (const_tree valtype, const_tree, bool)
2226 {
2227 machine_mode mode = TYPE_MODE (valtype);
2228
2229 if (INTEGRAL_TYPE_P (valtype)
2230 && GET_MODE_CLASS (mode) == MODE_INT
2231 && GET_MODE_SIZE (mode) < 4)
2232 mode = SImode;
2233
2234 return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
2235 }
2236
2237 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2238
2239 Return true if N is a possible register number for the function return
2240 value. */
2241
2242 static bool
2243 gcn_function_value_regno_p (const unsigned int n)
2244 {
2245 return n == RETURN_VALUE_REG;
2246 }
2247
2248 /* Calculate the number of registers required to hold function argument
2249 ARG. */
2250
2251 static int
2252 num_arg_regs (const function_arg_info &arg)
2253 {
2254 if (targetm.calls.must_pass_in_stack (arg))
2255 return 0;
2256
2257 int size = arg.promoted_size_in_bytes ();
2258 return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2259 }
2260
2261 /* Implement TARGET_STRICT_ARGUMENT_NAMING.
2262
2263 Return true if the location where a function argument is passed
2264 depends on whether or not it is a named argument
2265
2266 For gcn, we know how to handle functions declared as stdarg: by
2267 passing an extra pointer to the unnamed arguments. However, the
2268 Fortran frontend can produce a different situation, where a
2269 function pointer is declared with no arguments, but the actual
2270 function and calls to it take more arguments. In that case, we
2271 want to ensure the call matches the definition of the function. */
2272
2273 static bool
2274 gcn_strict_argument_naming (cumulative_args_t cum_v)
2275 {
2276 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2277
2278 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
2279 }
2280
2281 /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2282
2283 See comment on gcn_strict_argument_naming. */
2284
2285 static bool
2286 gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
2287 {
2288 return !gcn_strict_argument_naming (cum_v);
2289 }
2290
2291 /* Implement TARGET_FUNCTION_ARG.
2292
2293 Return an RTX indicating whether a function argument is passed in a register
2294 and if so, which register. */
2295
2296 static rtx
2297 gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
2298 {
2299 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2300 if (cum->normal_function)
2301 {
2302 if (!arg.named || arg.end_marker_p ())
2303 return 0;
2304
2305 if (targetm.calls.must_pass_in_stack (arg))
2306 return 0;
2307
2308 /* Vector parameters are not supported yet. */
2309 if (VECTOR_MODE_P (arg.mode))
2310 return 0;
2311
2312 int reg_num = FIRST_PARM_REG + cum->num;
2313 int num_regs = num_arg_regs (arg);
2314 if (num_regs > 0)
2315 while (reg_num % num_regs != 0)
2316 reg_num++;
2317 if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
2318 return gen_rtx_REG (arg.mode, reg_num);
2319 }
2320 else
2321 {
2322 if (cum->num >= cum->args.nargs)
2323 {
2324 cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1)
2325 & -(TYPE_ALIGN (arg.type) / 8);
2326 cfun->machine->kernarg_segment_alignment
2327 = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
2328 TYPE_ALIGN (arg.type) / 8);
2329 rtx addr = gen_rtx_REG (DImode,
2330 cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
2331 if (cum->offset)
2332 addr = gen_rtx_PLUS (DImode, addr,
2333 gen_int_mode (cum->offset, DImode));
2334 rtx mem = gen_rtx_MEM (arg.mode, addr);
2335 set_mem_attributes (mem, arg.type, 1);
2336 set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
2337 MEM_READONLY_P (mem) = 1;
2338 return mem;
2339 }
2340
2341 int a = cum->args.order[cum->num];
2342 if (arg.mode != gcn_kernel_arg_types[a].mode)
2343 {
2344 error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
2345 return 0;
2346 }
2347 return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
2348 cum->args.reg[a]);
2349 }
2350 return 0;
2351 }
2352
2353 /* Implement TARGET_FUNCTION_ARG_ADVANCE.
2354
2355 Updates the summarizer variable pointed to by CUM_V to advance past an
2356 argument in the argument list. */
2357
2358 static void
2359 gcn_function_arg_advance (cumulative_args_t cum_v,
2360 const function_arg_info &arg)
2361 {
2362 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2363
2364 if (cum->normal_function)
2365 {
2366 if (!arg.named)
2367 return;
2368
2369 int num_regs = num_arg_regs (arg);
2370 if (num_regs > 0)
2371 while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
2372 cum->num++;
2373 cum->num += num_regs;
2374 }
2375 else
2376 {
2377 if (cum->num < cum->args.nargs)
2378 cum->num++;
2379 else
2380 {
2381 cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type));
2382 cfun->machine->kernarg_segment_byte_size = cum->offset;
2383 }
2384 }
2385 }
2386
2387 /* Implement TARGET_ARG_PARTIAL_BYTES.
2388
2389 Returns the number of bytes at the beginning of an argument that must be put
2390 in registers. The value must be zero for arguments that are passed entirely
2391 in registers or that are entirely pushed on the stack. */
2392
2393 static int
2394 gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
2395 {
2396 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2397
2398 if (!arg.named)
2399 return 0;
2400
2401 if (targetm.calls.must_pass_in_stack (arg))
2402 return 0;
2403
2404 if (cum->num >= NUM_PARM_REGS)
2405 return 0;
2406
2407 /* If the argument fits entirely in registers, return 0. */
2408 if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
2409 return 0;
2410
2411 return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
2412 }
2413
2414 /* A normal function which takes a pointer argument (to a scalar) may be
2415 passed a pointer to LDS space (via a high-bits-set aperture), and that only
2416 works with FLAT addressing, not GLOBAL. Force FLAT addressing if the
2417 function has an incoming pointer-to-scalar parameter. */
2418
2419 static void
2420 gcn_detect_incoming_pointer_arg (tree fndecl)
2421 {
2422 gcc_assert (cfun && cfun->machine);
2423
2424 for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
2425 arg;
2426 arg = TREE_CHAIN (arg))
2427 if (POINTER_TYPE_P (TREE_VALUE (arg))
2428 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_VALUE (arg))))
2429 cfun->machine->use_flat_addressing = true;
2430 }
2431
2432 /* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2433
2434 Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2435 whose data type is FNTYPE. For a library call, FNTYPE is 0. */
2436
2437 void
2438 gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
2439 tree fntype /* tree ptr for function decl */ ,
2440 rtx libname /* SYMBOL_REF of library name or 0 */ ,
2441 tree fndecl, int caller)
2442 {
2443 memset (cum, 0, sizeof (*cum));
2444 cum->fntype = fntype;
2445 if (libname)
2446 {
2447 gcc_assert (cfun && cfun->machine);
2448 cum->normal_function = true;
2449 if (!caller)
2450 {
2451 cfun->machine->normal_function = true;
2452 gcn_detect_incoming_pointer_arg (fndecl);
2453 }
2454 return;
2455 }
2456 tree attr = NULL;
2457 if (fndecl)
2458 attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
2459 if (fndecl && !attr)
2460 attr = lookup_attribute ("amdgpu_hsa_kernel",
2461 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
2462 if (!attr && fntype)
2463 attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
2464 /* Handle main () as kernel, so we can run testsuite.
2465 Handle OpenACC kernels similarly to main. */
2466 if (!attr && !caller && fndecl
2467 && (MAIN_NAME_P (DECL_NAME (fndecl))
2468 || lookup_attribute ("omp target entrypoint",
2469 DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
2470 gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
2471 else
2472 {
2473 if (!attr || caller)
2474 {
2475 gcc_assert (cfun && cfun->machine);
2476 cum->normal_function = true;
2477 if (!caller)
2478 cfun->machine->normal_function = true;
2479 }
2480 gcn_parse_amdgpu_hsa_kernel_attribute
2481 (&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
2482 }
2483 cfun->machine->args = cum->args;
2484 if (!caller && cfun->machine->normal_function)
2485 gcn_detect_incoming_pointer_arg (fndecl);
2486
2487 reinit_regs ();
2488 }
2489
2490 static bool
2491 gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
2492 {
2493 machine_mode mode = TYPE_MODE (type);
2494 HOST_WIDE_INT size = int_size_in_bytes (type);
2495
2496 if (AGGREGATE_TYPE_P (type))
2497 return true;
2498
2499 /* Vector return values are not supported yet. */
2500 if (VECTOR_TYPE_P (type))
2501 return true;
2502
2503 if (mode == BLKmode)
2504 return true;
2505
2506 if (size > 2 * UNITS_PER_WORD)
2507 return true;
2508
2509 return false;
2510 }
2511
2512 /* Implement TARGET_PROMOTE_FUNCTION_MODE.
2513
2514 Return the mode to use for outgoing function arguments. */
2515
2516 machine_mode
2517 gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
2518 int *ARG_UNUSED (punsignedp),
2519 const_tree ARG_UNUSED (funtype),
2520 int ARG_UNUSED (for_return))
2521 {
2522 if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
2523 return SImode;
2524
2525 return mode;
2526 }
2527
2528 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2529
2530 Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle
2531 ARGS_GROW_DOWNWARDS. */
2532
2533 static tree
2534 gcn_gimplify_va_arg_expr (tree valist, tree type,
2535 gimple_seq *ARG_UNUSED (pre_p),
2536 gimple_seq *ARG_UNUSED (post_p))
2537 {
2538 tree ptr = build_pointer_type (type);
2539 tree valist_type;
2540 tree t, u;
2541 bool indirect;
2542
2543 indirect = pass_va_arg_by_reference (type);
2544 if (indirect)
2545 {
2546 type = ptr;
2547 ptr = build_pointer_type (type);
2548 }
2549 valist_type = TREE_TYPE (valist);
2550
2551 /* Args grow down. Not handled by generic routines. */
2552
2553 u = fold_convert (sizetype, size_in_bytes (type));
2554 u = fold_build1 (NEGATE_EXPR, sizetype, u);
2555 t = fold_build_pointer_plus (valist, u);
2556
2557 /* Align to 8 byte boundary. */
2558
2559 u = build_int_cst (TREE_TYPE (t), -8);
2560 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
2561 t = fold_convert (valist_type, t);
2562
2563 t = build2 (MODIFY_EXPR, valist_type, valist, t);
2564
2565 t = fold_convert (ptr, t);
2566 t = build_va_arg_indirect_ref (t);
2567
2568 if (indirect)
2569 t = build_va_arg_indirect_ref (t);
2570
2571 return t;
2572 }
2573
2574 /* Return 1 if TRAIT NAME is present in the OpenMP context's
2575 device trait set, return 0 if not present in any OpenMP context in the
2576 whole translation unit, or -1 if not present in the current OpenMP context
2577 but might be present in another OpenMP context in the same TU. */
2578
2579 int
2580 gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
2581 const char *name)
2582 {
2583 switch (trait)
2584 {
2585 case omp_device_kind:
2586 return strcmp (name, "gpu") == 0;
2587 case omp_device_arch:
2588 return strcmp (name, "gcn") == 0;
2589 case omp_device_isa:
2590 if (strcmp (name, "fiji") == 0)
2591 return gcn_arch == PROCESSOR_FIJI;
2592 if (strcmp (name, "gfx900") == 0)
2593 return gcn_arch == PROCESSOR_VEGA10;
2594 if (strcmp (name, "gfx906") == 0)
2595 return gcn_arch == PROCESSOR_VEGA20;
2596 return 0;
2597 default:
2598 gcc_unreachable ();
2599 }
2600 }
2601
2602 /* Calculate stack offsets needed to create prologues and epilogues. */
2603
2604 static struct machine_function *
2605 gcn_compute_frame_offsets (void)
2606 {
2607 machine_function *offsets = cfun->machine;
2608
2609 if (reload_completed)
2610 return offsets;
2611
2612 offsets->need_frame_pointer = frame_pointer_needed;
2613
2614 offsets->outgoing_args_size = crtl->outgoing_args_size;
2615 offsets->pretend_size = crtl->args.pretend_args_size;
2616
2617 offsets->local_vars = get_frame_size ();
2618
2619 offsets->lr_needs_saving = (!leaf_function_p ()
2620 || df_regs_ever_live_p (LR_REGNUM)
2621 || df_regs_ever_live_p (LR_REGNUM + 1));
2622
2623 offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
2624
2625 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
2626 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2627 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2628 && frame_pointer_needed))
2629 offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
2630
2631 /* Round up to 64-bit boundary to maintain stack alignment. */
2632 offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
2633
2634 return offsets;
2635 }
2636
2637 /* Insert code into the prologue or epilogue to store or load any
2638 callee-save register to/from the stack.
2639
2640 Helper function for gcn_expand_prologue and gcn_expand_epilogue. */
2641
2642 static void
2643 move_callee_saved_registers (rtx sp, machine_function *offsets,
2644 bool prologue)
2645 {
2646 int regno, offset, saved_scalars;
2647 rtx exec = gen_rtx_REG (DImode, EXEC_REG);
2648 rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
2649 rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
2650 rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
2651 HOST_WIDE_INT exec_set = 0;
2652 int offreg_set = 0;
2653
2654 start_sequence ();
2655
2656 /* Move scalars into two vector registers. */
2657 for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
2658 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2659 || ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
2660 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2661 && offsets->need_frame_pointer))
2662 {
2663 rtx reg = gen_rtx_REG (SImode, regno);
2664 rtx vreg = gen_rtx_REG (V64SImode,
2665 VGPR_REGNO (6 + (saved_scalars / 64)));
2666 int lane = saved_scalars % 64;
2667
2668 if (prologue)
2669 emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
2670 else
2671 emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
2672
2673 saved_scalars++;
2674 }
2675
2676 rtx move_scalars = get_insns ();
2677 end_sequence ();
2678 start_sequence ();
2679
2680 /* Ensure that all vector lanes are moved. */
2681 exec_set = -1;
2682 emit_move_insn (exec, GEN_INT (exec_set));
2683
2684 /* Set up a vector stack pointer. */
2685 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2686 rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
2687 emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
2688 gcn_gen_undef (V64SImode), exec));
2689 rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
2690 emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
2691 exec));
2692 emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
2693 gcn_operand_part (V64SImode, vsp, 0),
2694 _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
2695 exec));
2696 emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
2697 gcn_operand_part (V64SImode, vsp, 1),
2698 const0_rtx, vcc, vcc,
2699 gcn_gen_undef (V64SImode), exec));
2700
2701 /* Move vectors. */
2702 for (regno = FIRST_VGPR_REG, offset = offsets->pretend_size;
2703 regno < FIRST_PSEUDO_REGISTER; regno++)
2704 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
2705 || (regno == VGPR_REGNO (6) && saved_scalars > 0)
2706 || (regno == VGPR_REGNO (7) && saved_scalars > 63))
2707 {
2708 rtx reg = gen_rtx_REG (V64SImode, regno);
2709 int size = 256;
2710
2711 if (regno == VGPR_REGNO (6) && saved_scalars < 64)
2712 size = saved_scalars * 4;
2713 else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
2714 size = (saved_scalars - 64) * 4;
2715
2716 if (size != 256 || exec_set != -1)
2717 {
2718 exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
2719 emit_move_insn (exec, gen_int_mode (exec_set, DImode));
2720 }
2721
2722 if (prologue)
2723 emit_insn (gen_scatterv64si_insn_1offset_exec (vsp, const0_rtx, reg,
2724 as, const0_rtx, exec));
2725 else
2726 emit_insn (gen_gatherv64si_insn_1offset_exec
2727 (reg, vsp, const0_rtx, as, const0_rtx,
2728 gcn_gen_undef (V64SImode), exec));
2729
2730 /* Move our VSP to the next stack entry. */
2731 if (offreg_set != size)
2732 {
2733 offreg_set = size;
2734 emit_move_insn (offreg, GEN_INT (size));
2735 }
2736 if (exec_set != -1)
2737 {
2738 exec_set = -1;
2739 emit_move_insn (exec, GEN_INT (exec_set));
2740 }
2741 emit_insn (gen_addv64si3_vcc_dup_exec
2742 (gcn_operand_part (V64SImode, vsp, 0),
2743 offreg, gcn_operand_part (V64SImode, vsp, 0),
2744 vcc, gcn_gen_undef (V64SImode), exec));
2745 emit_insn (gen_addcv64si3_exec
2746 (gcn_operand_part (V64SImode, vsp, 1),
2747 gcn_operand_part (V64SImode, vsp, 1),
2748 const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
2749
2750 offset += size;
2751 }
2752
2753 rtx move_vectors = get_insns ();
2754 end_sequence ();
2755
2756 if (prologue)
2757 {
2758 emit_insn (move_scalars);
2759 emit_insn (move_vectors);
2760 }
2761 else
2762 {
2763 emit_insn (move_vectors);
2764 emit_insn (move_scalars);
2765 }
2766 }
2767
2768 /* Generate prologue. Called from gen_prologue during pro_and_epilogue pass.
2769
2770 For a non-kernel function, the stack layout looks like this (interim),
2771 growing *upwards*:
2772
2773 hi | + ...
2774 |__________________| <-- current SP
2775 | outgoing args |
2776 |__________________|
2777 | (alloca space) |
2778 |__________________|
2779 | local vars |
2780 |__________________| <-- FP/hard FP
2781 | callee-save regs |
2782 |__________________| <-- soft arg pointer
2783 | pretend args |
2784 |__________________| <-- incoming SP
2785 | incoming args |
2786 lo |..................|
2787
2788 This implies arguments (beyond the first N in registers) must grow
2789 downwards (as, apparently, PA has them do).
2790
2791 For a kernel function we have the simpler:
2792
2793 hi | + ...
2794 |__________________| <-- current SP
2795 | outgoing args |
2796 |__________________|
2797 | (alloca space) |
2798 |__________________|
2799 | local vars |
2800 lo |__________________| <-- FP/hard FP
2801
2802 */
2803
2804 void
2805 gcn_expand_prologue ()
2806 {
2807 machine_function *offsets = gcn_compute_frame_offsets ();
2808
2809 if (!cfun || !cfun->machine || cfun->machine->normal_function)
2810 {
2811 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2812 rtx sp_hi = gcn_operand_part (Pmode, sp, 1);
2813 rtx sp_lo = gcn_operand_part (Pmode, sp, 0);
2814 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2815 rtx fp_hi = gcn_operand_part (Pmode, fp, 1);
2816 rtx fp_lo = gcn_operand_part (Pmode, fp, 0);
2817
2818 start_sequence ();
2819
2820 if (offsets->pretend_size > 0)
2821 {
2822 /* FIXME: Do the actual saving of register pretend args to the stack.
2823 Register order needs consideration. */
2824 }
2825
2826 /* Save callee-save regs. */
2827 move_callee_saved_registers (sp, offsets, true);
2828
2829 HOST_WIDE_INT sp_adjust = offsets->pretend_size
2830 + offsets->callee_saves
2831 + offsets->local_vars + offsets->outgoing_args_size;
2832 if (sp_adjust > 0)
2833 {
2834 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
2835 we use split add explictly, and specify the DImode add in
2836 the note. */
2837 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2838 rtx adjustment = gen_int_mode (sp_adjust, SImode);
2839 rtx insn = emit_insn (gen_addsi3_scalar_carry (sp_lo, sp_lo,
2840 adjustment, scc));
2841 RTX_FRAME_RELATED_P (insn) = 1;
2842 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2843 gen_rtx_SET (sp,
2844 gen_rtx_PLUS (DImode, sp, adjustment)));
2845 emit_insn (gen_addcsi3_scalar_zero (sp_hi, sp_hi, scc));
2846 }
2847
2848 if (offsets->need_frame_pointer)
2849 {
2850 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so
2851 we use split add explictly, and specify the DImode add in
2852 the note. */
2853 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2854 int fp_adjust = -(offsets->local_vars + offsets->outgoing_args_size);
2855 rtx adjustment = gen_int_mode (fp_adjust, SImode);
2856 rtx insn = emit_insn (gen_addsi3_scalar_carry(fp_lo, sp_lo,
2857 adjustment, scc));
2858 RTX_FRAME_RELATED_P (insn) = 1;
2859 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2860 gen_rtx_SET (fp,
2861 gen_rtx_PLUS (DImode, sp, adjustment)));
2862 emit_insn (gen_addcsi3_scalar (fp_hi, sp_hi,
2863 (fp_adjust < 0 ? GEN_INT (-1)
2864 : const0_rtx),
2865 scc, scc));
2866 }
2867
2868 rtx_insn *seq = get_insns ();
2869 end_sequence ();
2870
2871 /* FIXME: Prologue insns should have this flag set for debug output, etc.
2872 but it causes issues for now.
2873 for (insn = seq; insn; insn = NEXT_INSN (insn))
2874 if (INSN_P (insn))
2875 RTX_FRAME_RELATED_P (insn) = 1;*/
2876
2877 emit_insn (seq);
2878 }
2879 else
2880 {
2881 rtx wave_offset = gen_rtx_REG (SImode,
2882 cfun->machine->args.
2883 reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
2884
2885 if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
2886 {
2887 rtx fs_init_lo =
2888 gen_rtx_REG (SImode,
2889 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
2890 rtx fs_init_hi =
2891 gen_rtx_REG (SImode,
2892 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
2893 rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
2894 rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
2895
2896 /*rtx queue = gen_rtx_REG(DImode,
2897 cfun->machine->args.reg[QUEUE_PTR_ARG]);
2898 rtx aperture = gen_rtx_MEM (SImode,
2899 gen_rtx_PLUS (DImode, queue,
2900 gen_int_mode (68, SImode)));
2901 set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
2902
2903 /* Set up flat_scratch. */
2904 emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
2905 emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
2906 gen_int_mode (8, SImode)));
2907 emit_move_insn (fs_reg_lo, fs_init_hi);
2908 }
2909
2910 /* Set up frame pointer and stack pointer. */
2911 rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
2912 rtx sp_hi = simplify_gen_subreg (SImode, sp, DImode, 4);
2913 rtx sp_lo = simplify_gen_subreg (SImode, sp, DImode, 0);
2914 rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
2915 rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
2916 rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
2917
2918 HOST_WIDE_INT sp_adjust = (offsets->local_vars
2919 + offsets->outgoing_args_size);
2920
2921 /* Initialise FP and SP from the buffer descriptor in s[0:3]. */
2922 emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0));
2923 emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1),
2924 gen_int_mode (0xffff, SImode)));
2925 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2926 emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
2927 emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
2928
2929 /* Adding RTX_FRAME_RELATED_P effectively disables spliting, so we use
2930 split add explictly, and specify the DImode add in the note.
2931 The DWARF info expects that the callee-save data is in the frame,
2932 even though it isn't (because this is the entry point), so we
2933 make a notional adjustment to the DWARF frame offset here. */
2934 rtx dbg_adjustment = gen_int_mode (sp_adjust + offsets->callee_saves,
2935 DImode);
2936 rtx insn;
2937 if (sp_adjust > 0)
2938 {
2939 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2940 rtx adjustment = gen_int_mode (sp_adjust, DImode);
2941 insn = emit_insn (gen_addsi3_scalar_carry(sp_lo, fp_lo, adjustment,
2942 scc));
2943 emit_insn (gen_addcsi3_scalar_zero (sp_hi, fp_hi, scc));
2944 }
2945 else
2946 insn = emit_move_insn (sp, fp);
2947 RTX_FRAME_RELATED_P (insn) = 1;
2948 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
2949 gen_rtx_SET (sp, gen_rtx_PLUS (DImode, sp,
2950 dbg_adjustment)));
2951
2952 /* Make sure the flat scratch reg doesn't get optimised away. */
2953 emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
2954 }
2955
2956 /* Ensure that the scheduler doesn't do anything unexpected. */
2957 emit_insn (gen_blockage ());
2958
2959 /* m0 is initialized for the usual LDS DS and FLAT memory case.
2960 The low-part is the address of the topmost addressable byte, which is
2961 size-1. The high-part is an offset and should be zero. */
2962 emit_move_insn (gen_rtx_REG (SImode, M0_REG),
2963 gen_int_mode (LDS_SIZE-1, SImode));
2964
2965 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
2966
2967 if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
2968 {
2969 /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */
2970 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
2971 emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
2972 "gomp_gcn_enter_kernel"));
2973 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
2974 }
2975 }
2976
2977 /* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass.
2978
2979 See gcn_expand_prologue for stack details. */
2980
2981 void
2982 gcn_expand_epilogue (void)
2983 {
2984 /* Ensure that the scheduler doesn't do anything unexpected. */
2985 emit_insn (gen_blockage ());
2986
2987 if (!cfun || !cfun->machine || cfun->machine->normal_function)
2988 {
2989 machine_function *offsets = gcn_compute_frame_offsets ();
2990 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2991 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2992
2993 HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
2994
2995 if (offsets->need_frame_pointer)
2996 {
2997 /* Restore old SP from the frame pointer. */
2998 if (sp_adjust > 0)
2999 emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
3000 else
3001 emit_move_insn (sp, fp);
3002 }
3003 else
3004 {
3005 /* Restore old SP from current SP. */
3006 sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
3007
3008 if (sp_adjust > 0)
3009 emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
3010 }
3011
3012 move_callee_saved_registers (sp, offsets, false);
3013
3014 /* There's no explicit use of the link register on the return insn. Emit
3015 one here instead. */
3016 if (offsets->lr_needs_saving)
3017 emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
3018
3019 /* Similar for frame pointer. */
3020 if (offsets->need_frame_pointer)
3021 emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
3022 }
3023 else if (flag_openmp)
3024 {
3025 /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */
3026 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
3027 emit_move_insn (fn_reg,
3028 gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
3029 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
3030 }
3031 else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
3032 {
3033 /* Assume that an exit value compatible with gcn-run is expected.
3034 That is, the third input parameter is an int*.
3035
3036 We can't allocate any new registers, but the kernarg_reg is
3037 dead after this, so we'll use that. */
3038 rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
3039 [KERNARG_SEGMENT_PTR_ARG]);
3040 rtx retptr_mem = gen_rtx_MEM (DImode,
3041 gen_rtx_PLUS (DImode, kernarg_reg,
3042 GEN_INT (16)));
3043 set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
3044 emit_move_insn (kernarg_reg, retptr_mem);
3045
3046 rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
3047 set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
3048 emit_move_insn (retval_mem,
3049 gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
3050 }
3051
3052 emit_jump_insn (gen_gcn_return ());
3053 }
3054
3055 /* Implement TARGET_CAN_ELIMINATE.
3056
3057 Return true if the compiler is allowed to try to replace register number
3058 FROM_REG with register number TO_REG.
3059
3060 FIXME: is the default "true" not enough? Should this be a negative set? */
3061
3062 bool
3063 gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
3064 {
3065 return (to_reg == HARD_FRAME_POINTER_REGNUM
3066 || to_reg == STACK_POINTER_REGNUM);
3067 }
3068
3069 /* Implement INITIAL_ELIMINATION_OFFSET.
3070
3071 Returns the initial difference between the specified pair of registers, in
3072 terms of stack position. */
3073
3074 HOST_WIDE_INT
3075 gcn_initial_elimination_offset (int from, int to)
3076 {
3077 machine_function *offsets = gcn_compute_frame_offsets ();
3078
3079 switch (from)
3080 {
3081 case ARG_POINTER_REGNUM:
3082 if (to == STACK_POINTER_REGNUM)
3083 return -(offsets->callee_saves + offsets->local_vars
3084 + offsets->outgoing_args_size);
3085 else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
3086 return -offsets->callee_saves;
3087 else
3088 gcc_unreachable ();
3089 break;
3090
3091 case FRAME_POINTER_REGNUM:
3092 if (to == STACK_POINTER_REGNUM)
3093 return -(offsets->local_vars + offsets->outgoing_args_size);
3094 else if (to == HARD_FRAME_POINTER_REGNUM)
3095 return 0;
3096 else
3097 gcc_unreachable ();
3098 break;
3099
3100 default:
3101 gcc_unreachable ();
3102 }
3103 }
3104
3105 /* Implement HARD_REGNO_RENAME_OK.
3106
3107 Return true if it is permissible to rename a hard register from
3108 FROM_REG to TO_REG. */
3109
3110 bool
3111 gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
3112 {
3113 if (from_reg == SCC_REG
3114 || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
3115 || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
3116 || to_reg == SCC_REG
3117 || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
3118 || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
3119 return false;
3120
3121 /* Allow the link register to be used if it was saved. */
3122 if ((to_reg & ~1) == LINK_REGNUM)
3123 return !cfun || cfun->machine->lr_needs_saving;
3124
3125 /* Allow the registers used for the static chain to be used if the chain is
3126 not in active use. */
3127 if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
3128 return !cfun
3129 || !(cfun->static_chain_decl
3130 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
3131 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
3132
3133 return true;
3134 }
3135
3136 /* Implement HARD_REGNO_CALLER_SAVE_MODE.
3137
3138 Which mode is required for saving NREGS of a pseudo-register in
3139 call-clobbered hard register REGNO. */
3140
3141 machine_mode
3142 gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
3143 machine_mode regmode)
3144 {
3145 machine_mode result = choose_hard_reg_mode (regno, nregs, NULL);
3146
3147 if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
3148 result = (nregs == 1 ? SImode : DImode);
3149
3150 return result;
3151 }
3152
3153 /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3154
3155 Output assembler code for a block containing the constant parts
3156 of a trampoline, leaving space for the variable parts. */
3157
3158 static void
3159 gcn_asm_trampoline_template (FILE *f)
3160 {
3161 /* The source operand of the move instructions must be a 32-bit
3162 constant following the opcode. */
3163 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
3164 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
3165 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
3166 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
3167 asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
3168 }
3169
3170 /* Implement TARGET_TRAMPOLINE_INIT.
3171
3172 Emit RTL insns to initialize the variable parts of a trampoline.
3173 FNDECL is the decl of the target address, M_TRAMP is a MEM for
3174 the trampoline, and CHAIN_VALUE is an RTX for the static chain
3175 to be passed to the target function. */
3176
3177 static void
3178 gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3179 {
3180 if (TARGET_GCN5_PLUS)
3181 sorry ("nested function trampolines not supported on GCN5 due to"
3182 " non-executable stacks");
3183
3184 emit_block_move (m_tramp, assemble_trampoline_template (),
3185 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3186
3187 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3188 rtx chain_value_reg = copy_to_reg (chain_value);
3189 rtx fnaddr_reg = copy_to_reg (fnaddr);
3190
3191 for (int i = 0; i < 4; i++)
3192 {
3193 rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
3194 rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
3195 emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
3196 }
3197
3198 rtx tramp_addr = XEXP (m_tramp, 0);
3199 emit_insn (gen_clear_icache (tramp_addr,
3200 plus_constant (ptr_mode, tramp_addr,
3201 TRAMPOLINE_SIZE)));
3202 }
3203
3204 /* }}} */
3205 /* {{{ Miscellaneous. */
3206
3207 /* Implement TARGET_CANNOT_COPY_INSN_P.
3208
3209 Return true if INSN must not be duplicated. */
3210
3211 static bool
3212 gcn_cannot_copy_insn_p (rtx_insn *insn)
3213 {
3214 if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
3215 return true;
3216
3217 return false;
3218 }
3219
3220 /* Implement TARGET_DEBUG_UNWIND_INFO.
3221
3222 Defines the mechanism that will be used for describing frame unwind
3223 information to the debugger. */
3224
3225 static enum unwind_info_type
3226 gcn_debug_unwind_info ()
3227 {
3228 /* No support for debug info, yet. */
3229 return UI_NONE;
3230 }
3231
3232 /* Determine if there is a suitable hardware conversion instruction.
3233 Used primarily by the machine description. */
3234
3235 bool
3236 gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
3237 {
3238 if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
3239 return false;
3240
3241 if (VECTOR_MODE_P (from))
3242 {
3243 from = GET_MODE_INNER (from);
3244 to = GET_MODE_INNER (to);
3245 }
3246
3247 switch (op)
3248 {
3249 case fix_trunc_cvt:
3250 case fixuns_trunc_cvt:
3251 if (GET_MODE_CLASS (from) != MODE_FLOAT
3252 || GET_MODE_CLASS (to) != MODE_INT)
3253 return false;
3254 break;
3255 case float_cvt:
3256 case floatuns_cvt:
3257 if (GET_MODE_CLASS (from) != MODE_INT
3258 || GET_MODE_CLASS (to) != MODE_FLOAT)
3259 return false;
3260 break;
3261 case extend_cvt:
3262 if (GET_MODE_CLASS (from) != MODE_FLOAT
3263 || GET_MODE_CLASS (to) != MODE_FLOAT
3264 || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
3265 return false;
3266 break;
3267 case trunc_cvt:
3268 if (GET_MODE_CLASS (from) != MODE_FLOAT
3269 || GET_MODE_CLASS (to) != MODE_FLOAT
3270 || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
3271 return false;
3272 break;
3273 }
3274
3275 return ((to == HImode && from == HFmode)
3276 || (to == SImode && (from == SFmode || from == DFmode))
3277 || (to == HFmode && (from == HImode || from == SFmode))
3278 || (to == SFmode && (from == SImode || from == HFmode
3279 || from == DFmode))
3280 || (to == DFmode && (from == SImode || from == SFmode)));
3281 }
3282
3283 /* Implement TARGET_EMUTLS_VAR_INIT.
3284
3285 Disable emutls (gthr-gcn.h does not support it, yet). */
3286
3287 tree
3288 gcn_emutls_var_init (tree, tree decl, tree)
3289 {
3290 sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN.");
3291 return NULL_TREE;
3292 }
3293
3294 /* }}} */
3295 /* {{{ Costs. */
3296
3297 /* Implement TARGET_RTX_COSTS.
3298
3299 Compute a (partial) cost for rtx X. Return true if the complete
3300 cost has been computed, and false if subexpressions should be
3301 scanned. In either case, *TOTAL contains the cost result. */
3302
3303 static bool
3304 gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
3305 {
3306 enum rtx_code code = GET_CODE (x);
3307 switch (code)
3308 {
3309 case CONST:
3310 case CONST_DOUBLE:
3311 case CONST_VECTOR:
3312 case CONST_INT:
3313 if (gcn_inline_constant_p (x))
3314 *total = 0;
3315 else if (code == CONST_INT
3316 && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
3317 *total = 1;
3318 else if (gcn_constant_p (x))
3319 *total = 2;
3320 else
3321 *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
3322 return true;
3323
3324 case DIV:
3325 *total = 100;
3326 return false;
3327
3328 default:
3329 *total = 3;
3330 return false;
3331 }
3332 }
3333
3334 /* Implement TARGET_MEMORY_MOVE_COST.
3335
3336 Return the cost of moving data of mode M between a
3337 register and memory. A value of 2 is the default; this cost is
3338 relative to those in `REGISTER_MOVE_COST'.
3339
3340 This function is used extensively by register_move_cost that is used to
3341 build tables at startup. Make it inline in this case.
3342 When IN is 2, return maximum of in and out move cost.
3343
3344 If moving between registers and memory is more expensive than
3345 between two registers, you should define this macro to express the
3346 relative cost.
3347
3348 Model also increased moving costs of QImode registers in non
3349 Q_REGS classes. */
3350
3351 #define LOAD_COST 32
3352 #define STORE_COST 32
3353 static int
3354 gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
3355 {
3356 int nregs = CEIL (GET_MODE_SIZE (mode), 4);
3357 switch (regclass)
3358 {
3359 case SCC_CONDITIONAL_REG:
3360 case VCCZ_CONDITIONAL_REG:
3361 case VCC_CONDITIONAL_REG:
3362 case EXECZ_CONDITIONAL_REG:
3363 case ALL_CONDITIONAL_REGS:
3364 case SGPR_REGS:
3365 case SGPR_EXEC_REGS:
3366 case EXEC_MASK_REG:
3367 case SGPR_VOP_SRC_REGS:
3368 case SGPR_MEM_SRC_REGS:
3369 case SGPR_SRC_REGS:
3370 case SGPR_DST_REGS:
3371 case GENERAL_REGS:
3372 case AFP_REGS:
3373 if (!in)
3374 return (STORE_COST + 2) * nregs;
3375 return LOAD_COST * nregs;
3376 case VGPR_REGS:
3377 if (in)
3378 return (LOAD_COST + 2) * nregs;
3379 return STORE_COST * nregs;
3380 case ALL_REGS:
3381 case ALL_GPR_REGS:
3382 case SRCDST_REGS:
3383 if (in)
3384 return (LOAD_COST + 2) * nregs;
3385 return (STORE_COST + 2) * nregs;
3386 default:
3387 gcc_unreachable ();
3388 }
3389 }
3390
3391 /* Implement TARGET_REGISTER_MOVE_COST.
3392
3393 Return the cost of moving data from a register in class CLASS1 to
3394 one in class CLASS2. Base value is 2. */
3395
3396 static int
3397 gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
3398 {
3399 /* Increase cost of moving from and to vector registers. While this is
3400 fast in hardware (I think), it has hidden cost of setting up the exec
3401 flags. */
3402 if ((src < VGPR_REGS) != (dst < VGPR_REGS))
3403 return 4;
3404 return 2;
3405 }
3406
3407 /* }}} */
3408 /* {{{ Builtins. */
3409
3410 /* Type codes used by GCN built-in definitions. */
3411
3412 enum gcn_builtin_type_index
3413 {
3414 GCN_BTI_END_OF_PARAMS,
3415
3416 GCN_BTI_VOID,
3417 GCN_BTI_BOOL,
3418 GCN_BTI_INT,
3419 GCN_BTI_UINT,
3420 GCN_BTI_SIZE_T,
3421 GCN_BTI_LLINT,
3422 GCN_BTI_LLUINT,
3423 GCN_BTI_EXEC,
3424
3425 GCN_BTI_SF,
3426 GCN_BTI_V64SI,
3427 GCN_BTI_V64SF,
3428 GCN_BTI_V64PTR,
3429 GCN_BTI_SIPTR,
3430 GCN_BTI_SFPTR,
3431 GCN_BTI_VOIDPTR,
3432
3433 GCN_BTI_LDS_VOIDPTR,
3434
3435 GCN_BTI_MAX
3436 };
3437
3438 static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
3439
3440 #define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
3441 #define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
3442 #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
3443 #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
3444 #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
3445 #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
3446 #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
3447 #define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
3448 #define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
3449
3450 static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
3451 struct gcn_builtin_description *);
3452 static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
3453 struct gcn_builtin_description *);
3454
3455 struct gcn_builtin_description;
3456 typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
3457 struct gcn_builtin_description *);
3458
3459 enum gcn_builtin_type
3460 {
3461 B_UNIMPLEMENTED, /* Sorry out */
3462 B_INSN, /* Emit a pattern */
3463 B_OVERLOAD /* Placeholder for an overloaded function */
3464 };
3465
3466 struct gcn_builtin_description
3467 {
3468 int fcode;
3469 int icode;
3470 const char *name;
3471 enum gcn_builtin_type type;
3472 /* The first element of parm is always the return type. The rest
3473 are a zero terminated list of parameters. */
3474 int parm[6];
3475 gcn_builtin_expander expander;
3476 };
3477
3478 /* Read in the GCN builtins from gcn-builtins.def. */
3479
3480 extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
3481
3482 struct gcn_builtin_description gcn_builtins[] = {
3483 #define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
3484 {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
3485
3486 #define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
3487 {GCN_BUILTIN_ ## fcode ## _V64SI, \
3488 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \
3489 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3490 GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
3491 {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
3492 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \
3493 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3494 GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
3495
3496 #include "gcn-builtins.def"
3497 #undef DEF_BUILTIN_BINOP_INT_FP
3498 #undef DEF_BUILTIN
3499 };
3500
3501 static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
3502
3503 /* Implement TARGET_BUILTIN_DECL.
3504
3505 Return the GCN builtin for CODE. */
3506
3507 tree
3508 gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
3509 {
3510 if (code >= GCN_BUILTIN_MAX)
3511 return error_mark_node;
3512
3513 return gcn_builtin_decls[code];
3514 }
3515
3516 /* Helper function for gcn_init_builtins. */
3517
3518 static void
3519 gcn_init_builtin_types (void)
3520 {
3521 gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
3522 gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
3523 gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
3524 gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
3525 gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
3526 gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
3527 gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
3528
3529 exec_type_node = unsigned_intDI_type_node;
3530 sf_type_node = float32_type_node;
3531 v64si_type_node = build_vector_type (intSI_type_node, 64);
3532 v64sf_type_node = build_vector_type (float_type_node, 64);
3533 v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
3534 /*build_pointer_type
3535 (integer_type_node) */
3536 , 64);
3537 tree tmp = build_distinct_type_copy (intSI_type_node);
3538 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3539 siptr_type_node = build_pointer_type (tmp);
3540
3541 tmp = build_distinct_type_copy (float_type_node);
3542 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3543 sfptr_type_node = build_pointer_type (tmp);
3544
3545 tmp = build_distinct_type_copy (void_type_node);
3546 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3547 voidptr_type_node = build_pointer_type (tmp);
3548
3549 tmp = build_distinct_type_copy (void_type_node);
3550 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
3551 gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
3552 }
3553
3554 /* Implement TARGET_INIT_BUILTINS.
3555
3556 Set up all builtin functions for this target. */
3557
3558 static void
3559 gcn_init_builtins (void)
3560 {
3561 gcn_init_builtin_types ();
3562
3563 struct gcn_builtin_description *d;
3564 unsigned int i;
3565 for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
3566 {
3567 tree p;
3568 char name[64]; /* build_function will make a copy. */
3569 int parm;
3570
3571 /* FIXME: Is this necessary/useful? */
3572 if (d->name == 0)
3573 continue;
3574
3575 /* Find last parm. */
3576 for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
3577 ;
3578
3579 p = void_list_node;
3580 while (parm > 1)
3581 p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
3582
3583 p = build_function_type (gcn_builtin_types[d->parm[0]], p);
3584
3585 sprintf (name, "__builtin_gcn_%s", d->name);
3586 gcn_builtin_decls[i]
3587 = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
3588
3589 /* These builtins don't throw. */
3590 TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
3591 }
3592
3593 /* FIXME: remove the ifdef once OpenACC support is merged upstream. */
3594 #ifdef BUILT_IN_GOACC_SINGLE_START
3595 /* These builtins need to take/return an LDS pointer: override the generic
3596 versions here. */
3597
3598 set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
3599 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
3600
3601 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
3602 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
3603 false);
3604
3605 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
3606 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
3607 false);
3608
3609 set_builtin_decl (BUILT_IN_GOACC_BARRIER,
3610 gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
3611 #endif
3612 }
3613
3614 /* Expand the CMP_SWAP GCN builtins. We have our own versions that do
3615 not require taking the address of any object, other than the memory
3616 cell being operated on.
3617
3618 Helper function for gcn_expand_builtin_1. */
3619
3620 static rtx
3621 gcn_expand_cmp_swap (tree exp, rtx target)
3622 {
3623 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3624 addr_space_t as
3625 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
3626 machine_mode as_mode = gcn_addr_space_address_mode (as);
3627
3628 if (!target)
3629 target = gen_reg_rtx (mode);
3630
3631 rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
3632 NULL_RTX, as_mode, EXPAND_NORMAL);
3633 rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
3634 NULL_RTX, mode, EXPAND_NORMAL);
3635 rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
3636 NULL_RTX, mode, EXPAND_NORMAL);
3637 rtx pat;
3638
3639 rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
3640 set_mem_addr_space (mem, as);
3641
3642 if (!REG_P (cmp))
3643 cmp = copy_to_mode_reg (mode, cmp);
3644 if (!REG_P (src))
3645 src = copy_to_mode_reg (mode, src);
3646
3647 if (mode == SImode)
3648 pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
3649 else
3650 pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
3651
3652 emit_insn (pat);
3653
3654 return target;
3655 }
3656
3657 /* Expand many different builtins.
3658
3659 Intended for use in gcn-builtins.def. */
3660
3661 static rtx
3662 gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
3663 machine_mode /*mode */ , int ignore,
3664 struct gcn_builtin_description *)
3665 {
3666 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3667 switch (DECL_MD_FUNCTION_CODE (fndecl))
3668 {
3669 case GCN_BUILTIN_FLAT_LOAD_INT32:
3670 {
3671 if (ignore)
3672 return target;
3673 /*rtx exec = */
3674 force_reg (DImode,
3675 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3676 EXPAND_NORMAL));
3677 /*rtx ptr = */
3678 force_reg (V64DImode,
3679 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
3680 EXPAND_NORMAL));
3681 /*emit_insn (gen_vector_flat_loadv64si
3682 (target, gcn_gen_undef (V64SImode), ptr, exec)); */
3683 return target;
3684 }
3685 case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
3686 case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
3687 {
3688 if (ignore)
3689 return target;
3690 rtx exec = force_reg (DImode,
3691 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3692 DImode,
3693 EXPAND_NORMAL));
3694 rtx ptr = force_reg (DImode,
3695 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3696 V64DImode,
3697 EXPAND_NORMAL));
3698 rtx offsets = force_reg (V64SImode,
3699 expand_expr (CALL_EXPR_ARG (exp, 2),
3700 NULL_RTX, V64DImode,
3701 EXPAND_NORMAL));
3702 rtx addrs = gen_reg_rtx (V64DImode);
3703 rtx tmp = gen_reg_rtx (V64SImode);
3704 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3705 GEN_INT (2),
3706 gcn_gen_undef (V64SImode), exec));
3707 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3708 gcn_gen_undef (V64DImode),
3709 exec));
3710 rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
3711 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3712 /* FIXME: set attributes. */
3713 emit_insn (gen_mov_with_exec (target, mem, exec));
3714 return target;
3715 }
3716 case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
3717 case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
3718 {
3719 rtx exec = force_reg (DImode,
3720 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3721 DImode,
3722 EXPAND_NORMAL));
3723 rtx ptr = force_reg (DImode,
3724 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3725 V64DImode,
3726 EXPAND_NORMAL));
3727 rtx offsets = force_reg (V64SImode,
3728 expand_expr (CALL_EXPR_ARG (exp, 2),
3729 NULL_RTX, V64DImode,
3730 EXPAND_NORMAL));
3731 machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
3732 3)));
3733 rtx val = force_reg (vmode,
3734 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3735 vmode,
3736 EXPAND_NORMAL));
3737 rtx addrs = gen_reg_rtx (V64DImode);
3738 rtx tmp = gen_reg_rtx (V64SImode);
3739 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3740 GEN_INT (2),
3741 gcn_gen_undef (V64SImode), exec));
3742 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3743 gcn_gen_undef (V64DImode),
3744 exec));
3745 rtx mem = gen_rtx_MEM (vmode, addrs);
3746 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3747 /* FIXME: set attributes. */
3748 emit_insn (gen_mov_with_exec (mem, val, exec));
3749 return target;
3750 }
3751 case GCN_BUILTIN_SQRTVF:
3752 {
3753 if (ignore)
3754 return target;
3755 rtx exec = gcn_full_exec_reg ();
3756 rtx arg = force_reg (V64SFmode,
3757 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3758 V64SFmode,
3759 EXPAND_NORMAL));
3760 emit_insn (gen_sqrtv64sf2_exec
3761 (target, arg, gcn_gen_undef (V64SFmode), exec));
3762 return target;
3763 }
3764 case GCN_BUILTIN_SQRTF:
3765 {
3766 if (ignore)
3767 return target;
3768 rtx arg = force_reg (SFmode,
3769 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3770 SFmode,
3771 EXPAND_NORMAL));
3772 emit_insn (gen_sqrtsf2 (target, arg));
3773 return target;
3774 }
3775 case GCN_BUILTIN_OMP_DIM_SIZE:
3776 {
3777 if (ignore)
3778 return target;
3779 emit_insn (gen_oacc_dim_size (target,
3780 expand_expr (CALL_EXPR_ARG (exp, 0),
3781 NULL_RTX, SImode,
3782 EXPAND_NORMAL)));
3783 return target;
3784 }
3785 case GCN_BUILTIN_OMP_DIM_POS:
3786 {
3787 if (ignore)
3788 return target;
3789 emit_insn (gen_oacc_dim_pos (target,
3790 expand_expr (CALL_EXPR_ARG (exp, 0),
3791 NULL_RTX, SImode,
3792 EXPAND_NORMAL)));
3793 return target;
3794 }
3795 case GCN_BUILTIN_CMP_SWAP:
3796 case GCN_BUILTIN_CMP_SWAPLL:
3797 return gcn_expand_cmp_swap (exp, target);
3798
3799 case GCN_BUILTIN_ACC_SINGLE_START:
3800 {
3801 if (ignore)
3802 return target;
3803
3804 rtx wavefront = gcn_oacc_dim_pos (1);
3805 rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
3806 rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
3807 emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
3808 return cc;
3809 }
3810
3811 case GCN_BUILTIN_ACC_SINGLE_COPY_START:
3812 {
3813 rtx blk = force_reg (SImode,
3814 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3815 SImode, EXPAND_NORMAL));
3816 rtx wavefront = gcn_oacc_dim_pos (1);
3817 rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
3818 rtx not_zero = gen_label_rtx ();
3819 emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
3820 emit_move_insn (blk, const0_rtx);
3821 emit_label (not_zero);
3822 return blk;
3823 }
3824
3825 case GCN_BUILTIN_ACC_SINGLE_COPY_END:
3826 return target;
3827
3828 case GCN_BUILTIN_ACC_BARRIER:
3829 emit_insn (gen_gcn_wavefront_barrier ());
3830 return target;
3831
3832 default:
3833 gcc_unreachable ();
3834 }
3835 }
3836
3837 /* Expansion of simple arithmetic and bit binary operation builtins.
3838
3839 Intended for use with gcn_builtins table. */
3840
3841 static rtx
3842 gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
3843 machine_mode /*mode */ , int ignore,
3844 struct gcn_builtin_description *d)
3845 {
3846 int icode = d->icode;
3847 if (ignore)
3848 return target;
3849
3850 rtx exec = force_reg (DImode,
3851 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3852 EXPAND_NORMAL));
3853
3854 machine_mode m1 = insn_data[icode].operand[1].mode;
3855 rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
3856 EXPAND_NORMAL);
3857 if (!insn_data[icode].operand[1].predicate (arg1, m1))
3858 arg1 = force_reg (m1, arg1);
3859
3860 machine_mode m2 = insn_data[icode].operand[2].mode;
3861 rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
3862 EXPAND_NORMAL);
3863 if (!insn_data[icode].operand[2].predicate (arg2, m2))
3864 arg2 = force_reg (m2, arg2);
3865
3866 rtx arg_prev;
3867 if (call_expr_nargs (exp) == 4)
3868 {
3869 machine_mode m_prev = insn_data[icode].operand[4].mode;
3870 arg_prev = force_reg (m_prev,
3871 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3872 m_prev, EXPAND_NORMAL));
3873 }
3874 else
3875 arg_prev = gcn_gen_undef (GET_MODE (target));
3876
3877 rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
3878 emit_insn (pat);
3879 return target;
3880 }
3881
3882 /* Implement TARGET_EXPAND_BUILTIN.
3883
3884 Expand an expression EXP that calls a built-in function, with result going
3885 to TARGET if that's convenient (and in mode MODE if that's convenient).
3886 SUBTARGET may be used as the target for computing one of EXP's operands.
3887 IGNORE is nonzero if the value is to be ignored. */
3888
3889 rtx
3890 gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
3891 int ignore)
3892 {
3893 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
3894 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
3895 struct gcn_builtin_description *d;
3896
3897 gcc_assert (fcode < GCN_BUILTIN_MAX);
3898 d = &gcn_builtins[fcode];
3899
3900 if (d->type == B_UNIMPLEMENTED)
3901 sorry ("Builtin not implemented");
3902
3903 return d->expander (exp, target, subtarget, mode, ignore, d);
3904 }
3905
3906 /* }}} */
3907 /* {{{ Vectorization. */
3908
3909 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.
3910
3911 A vector mask is a value that holds one boolean result for every element in
3912 a vector. */
3913
3914 opt_machine_mode
3915 gcn_vectorize_get_mask_mode (machine_mode)
3916 {
3917 /* GCN uses a DImode bit-mask. */
3918 return DImode;
3919 }
3920
3921 /* Return an RTX that references a vector with the i-th lane containing
3922 PERM[i]*4.
3923
3924 Helper function for gcn_vectorize_vec_perm_const. */
3925
3926 static rtx
3927 gcn_make_vec_perm_address (unsigned int *perm)
3928 {
3929 rtx x = gen_reg_rtx (V64SImode);
3930 emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
3931
3932 /* Permutation addresses use byte addressing. With each vector lane being
3933 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
3934 so only set those.
3935
3936 The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
3937 select between lanes in two vectors, but as the DS_BPERMUTE* instructions
3938 only take one source vector, the most-significant bit can be ignored
3939 here. Instead, we can use EXEC masking to select the relevant part of
3940 each source vector after they are permuted separately. */
3941 uint64_t bit_mask = 1 << 2;
3942 for (int i = 2; i < 8; i++, bit_mask <<= 1)
3943 {
3944 uint64_t exec_mask = 0;
3945 uint64_t lane_mask = 1;
3946 for (int j = 0; j < 64; j++, lane_mask <<= 1)
3947 if ((perm[j] * 4) & bit_mask)
3948 exec_mask |= lane_mask;
3949
3950 if (exec_mask)
3951 emit_insn (gen_addv64si3_exec (x, x,
3952 gcn_vec_constant (V64SImode,
3953 bit_mask),
3954 x, get_exec (exec_mask)));
3955 }
3956
3957 return x;
3958 }
3959
3960 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
3961
3962 Return true if permutation with SEL is possible.
3963
3964 If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
3965 permutations. */
3966
3967 static bool
3968 gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
3969 rtx src0, rtx src1,
3970 const vec_perm_indices & sel)
3971 {
3972 unsigned int nelt = GET_MODE_NUNITS (vmode);
3973
3974 gcc_assert (VECTOR_MODE_P (vmode));
3975 gcc_assert (nelt <= 64);
3976 gcc_assert (sel.length () == nelt);
3977
3978 if (!dst)
3979 {
3980 /* All vector permutations are possible on this architecture,
3981 with varying degrees of efficiency depending on the permutation. */
3982 return true;
3983 }
3984
3985 unsigned int perm[64];
3986 for (unsigned int i = 0; i < nelt; ++i)
3987 perm[i] = sel[i] & (2 * nelt - 1);
3988
3989 /* Make life a bit easier by swapping operands if necessary so that
3990 the first element always comes from src0. */
3991 if (perm[0] >= nelt)
3992 {
3993 rtx temp = src0;
3994 src0 = src1;
3995 src1 = temp;
3996
3997 for (unsigned int i = 0; i < nelt; ++i)
3998 if (perm[i] < nelt)
3999 perm[i] += nelt;
4000 else
4001 perm[i] -= nelt;
4002 }
4003
4004 /* TODO: There are more efficient ways to implement certain permutations
4005 using ds_swizzle_b32 and/or DPP. Test for and expand them here, before
4006 this more inefficient generic approach is used. */
4007
4008 int64_t src1_lanes = 0;
4009 int64_t lane_bit = 1;
4010
4011 for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
4012 {
4013 /* Set the bits for lanes from src1. */
4014 if (perm[i] >= nelt)
4015 src1_lanes |= lane_bit;
4016 }
4017
4018 rtx addr = gcn_make_vec_perm_address (perm);
4019 rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
4020
4021 switch (vmode)
4022 {
4023 case E_V64QImode:
4024 ds_bpermute = gen_ds_bpermutev64qi;
4025 break;
4026 case E_V64HImode:
4027 ds_bpermute = gen_ds_bpermutev64hi;
4028 break;
4029 case E_V64SImode:
4030 ds_bpermute = gen_ds_bpermutev64si;
4031 break;
4032 case E_V64HFmode:
4033 ds_bpermute = gen_ds_bpermutev64hf;
4034 break;
4035 case E_V64SFmode:
4036 ds_bpermute = gen_ds_bpermutev64sf;
4037 break;
4038 case E_V64DImode:
4039 ds_bpermute = gen_ds_bpermutev64di;
4040 break;
4041 case E_V64DFmode:
4042 ds_bpermute = gen_ds_bpermutev64df;
4043 break;
4044 default:
4045 gcc_assert (false);
4046 }
4047
4048 /* Load elements from src0 to dst. */
4049 gcc_assert (~src1_lanes);
4050 emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
4051
4052 /* Load elements from src1 to dst. */
4053 if (src1_lanes)
4054 {
4055 /* Masking a lane masks both the destination and source lanes for
4056 DS_BPERMUTE, so we need to have all lanes enabled for the permute,
4057 then add an extra masked move to merge the results of permuting
4058 the two source vectors together.
4059 */
4060 rtx tmp = gen_reg_rtx (vmode);
4061 emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
4062 emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
4063 }
4064
4065 return true;
4066 }
4067
4068 /* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
4069
4070 Return nonzero if vector MODE is supported with at least move
4071 instructions. */
4072
4073 static bool
4074 gcn_vector_mode_supported_p (machine_mode mode)
4075 {
4076 return (mode == V64QImode || mode == V64HImode
4077 || mode == V64SImode || mode == V64DImode
4078 || mode == V64SFmode || mode == V64DFmode);
4079 }
4080
4081 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
4082
4083 Enables autovectorization for all supported modes. */
4084
4085 static machine_mode
4086 gcn_vectorize_preferred_simd_mode (scalar_mode mode)
4087 {
4088 switch (mode)
4089 {
4090 case E_QImode:
4091 return V64QImode;
4092 case E_HImode:
4093 return V64HImode;
4094 case E_SImode:
4095 return V64SImode;
4096 case E_DImode:
4097 return V64DImode;
4098 case E_SFmode:
4099 return V64SFmode;
4100 case E_DFmode:
4101 return V64DFmode;
4102 default:
4103 return word_mode;
4104 }
4105 }
4106
4107 /* Implement TARGET_VECTORIZE_RELATED_MODE.
4108
4109 All GCN vectors are 64-lane, so this is simpler than other architectures.
4110 In particular, we do *not* want to match vector bit-size. */
4111
4112 static opt_machine_mode
4113 gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode),
4114 scalar_mode element_mode, poly_uint64 nunits)
4115 {
4116 if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
4117 return VOIDmode;
4118
4119 machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
4120 if (!VECTOR_MODE_P (pref_mode))
4121 return VOIDmode;
4122
4123 return pref_mode;
4124 }
4125
4126 /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
4127
4128 Returns the preferred alignment in bits for accesses to vectors of type type
4129 in vectorized code. This might be less than or greater than the ABI-defined
4130 value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
4131 of a single element, in which case the vectorizer will not try to optimize
4132 for alignment. */
4133
4134 static poly_uint64
4135 gcn_preferred_vector_alignment (const_tree type)
4136 {
4137 return TYPE_ALIGN (TREE_TYPE (type));
4138 }
4139
4140 /* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
4141
4142 Return true if the target supports misaligned vector store/load of a
4143 specific factor denoted in the misalignment parameter. */
4144
4145 static bool
4146 gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
4147 const_tree type, int misalignment,
4148 bool is_packed)
4149 {
4150 if (is_packed)
4151 return false;
4152
4153 /* If the misalignment is unknown, we should be able to handle the access
4154 so long as it is not to a member of a packed data structure. */
4155 if (misalignment == -1)
4156 return true;
4157
4158 /* Return true if the misalignment is a multiple of the natural alignment
4159 of the vector's element type. This is probably always going to be
4160 true in practice, since we've already established that this isn't a
4161 packed access. */
4162 return misalignment % TYPE_ALIGN_UNIT (type) == 0;
4163 }
4164
4165 /* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
4166
4167 Return true if vector alignment is reachable (by peeling N iterations) for
4168 the given scalar type TYPE. */
4169
4170 static bool
4171 gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
4172 {
4173 /* Vectors which aren't in packed structures will not be less aligned than
4174 the natural alignment of their element type, so this is safe. */
4175 return !is_packed;
4176 }
4177
4178 /* Generate DPP instructions used for vector reductions.
4179
4180 The opcode is given by INSN.
4181 The first operand of the operation is shifted right by SHIFT vector lanes.
4182 SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is
4183 broadcast the next row (thereby acting like a shift of 16 for the end of
4184 each row). If SHIFT is 32, lane 31 is broadcast to all the
4185 following lanes (thereby acting like a shift of 32 for lane 63). */
4186
4187 char *
4188 gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
4189 int unspec, int shift)
4190 {
4191 static char buf[128];
4192 const char *dpp;
4193 const char *vcc_in = "";
4194 const char *vcc_out = "";
4195
4196 /* Add the vcc operand if needed. */
4197 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
4198 {
4199 if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4200 vcc_in = ", vcc";
4201
4202 if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
4203 || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4204 vcc_out = ", vcc";
4205 }
4206
4207 /* Add the DPP modifiers. */
4208 switch (shift)
4209 {
4210 case 1:
4211 dpp = "row_shr:1 bound_ctrl:0";
4212 break;
4213 case 2:
4214 dpp = "row_shr:2 bound_ctrl:0";
4215 break;
4216 case 4:
4217 dpp = "row_shr:4 bank_mask:0xe";
4218 break;
4219 case 8:
4220 dpp = "row_shr:8 bank_mask:0xc";
4221 break;
4222 case 16:
4223 dpp = "row_bcast:15 row_mask:0xa";
4224 break;
4225 case 32:
4226 dpp = "row_bcast:31 row_mask:0xc";
4227 break;
4228 default:
4229 gcc_unreachable ();
4230 }
4231
4232 if (unspec == UNSPEC_MOV_DPP_SHR && vgpr_2reg_mode_p (mode))
4233 sprintf (buf, "%s\t%%L0, %%L1 %s\n\t%s\t%%H0, %%H1 %s",
4234 insn, dpp, insn, dpp);
4235 else if (unspec == UNSPEC_MOV_DPP_SHR)
4236 sprintf (buf, "%s\t%%0, %%1 %s", insn, dpp);
4237 else
4238 sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
4239
4240 return buf;
4241 }
4242
4243 /* Generate vector reductions in terms of DPP instructions.
4244
4245 The vector register SRC of mode MODE is reduced using the operation given
4246 by UNSPEC, and the scalar result is returned in lane 63 of a vector
4247 register. */
4248
4249 rtx
4250 gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
4251 {
4252 machine_mode orig_mode = mode;
4253 bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
4254 || unspec == UNSPEC_SMAX_DPP_SHR
4255 || unspec == UNSPEC_UMIN_DPP_SHR
4256 || unspec == UNSPEC_UMAX_DPP_SHR)
4257 && mode == V64DImode)
4258 || (unspec == UNSPEC_PLUS_DPP_SHR
4259 && mode == V64DFmode));
4260 rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
4261 : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
4262 : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
4263 : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
4264 : unspec == UNSPEC_PLUS_DPP_SHR ? PLUS
4265 : UNKNOWN);
4266 bool use_extends = ((unspec == UNSPEC_SMIN_DPP_SHR
4267 || unspec == UNSPEC_SMAX_DPP_SHR
4268 || unspec == UNSPEC_UMIN_DPP_SHR
4269 || unspec == UNSPEC_UMAX_DPP_SHR)
4270 && (mode == V64QImode
4271 || mode == V64HImode));
4272 bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
4273 || unspec == UNSPEC_UMAX_DPP_SHR);
4274 bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
4275 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4276 && (TARGET_GCN3 || mode == V64DImode);
4277
4278 if (use_plus_carry)
4279 unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
4280
4281 if (use_extends)
4282 {
4283 rtx tmp = gen_reg_rtx (V64SImode);
4284 convert_move (tmp, src, unsignedp);
4285 src = tmp;
4286 mode = V64SImode;
4287 }
4288
4289 /* Perform reduction by first performing the reduction operation on every
4290 pair of lanes, then on every pair of results from the previous
4291 iteration (thereby effectively reducing every 4 lanes) and so on until
4292 all lanes are reduced. */
4293 rtx in, out = src;
4294 for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
4295 {
4296 rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
4297 in = out;
4298 out = gen_reg_rtx (mode);
4299
4300 if (use_moves)
4301 {
4302 rtx tmp = gen_reg_rtx (mode);
4303 emit_insn (gen_dpp_move (mode, tmp, in, shift_val));
4304 emit_insn (gen_rtx_SET (out, gen_rtx_fmt_ee (code, mode, tmp, in)));
4305 }
4306 else
4307 {
4308 rtx insn = gen_rtx_SET (out,
4309 gen_rtx_UNSPEC (mode,
4310 gen_rtvec (3, in, in,
4311 shift_val),
4312 unspec));
4313
4314 /* Add clobber for instructions that set the carry flags. */
4315 if (use_plus_carry)
4316 {
4317 rtx clobber = gen_rtx_CLOBBER (VOIDmode,
4318 gen_rtx_REG (DImode, VCC_REG));
4319 insn = gen_rtx_PARALLEL (VOIDmode,
4320 gen_rtvec (2, insn, clobber));
4321 }
4322
4323 emit_insn (insn);
4324 }
4325 }
4326
4327 if (use_extends)
4328 {
4329 rtx tmp = gen_reg_rtx (orig_mode);
4330 convert_move (tmp, out, unsignedp);
4331 out = tmp;
4332 }
4333
4334 return out;
4335 }
4336
4337 /* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
4338
4339 int
4340 gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
4341 tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
4342 {
4343 /* Always vectorize. */
4344 return 1;
4345 }
4346
4347 /* }}} */
4348 /* {{{ md_reorg pass. */
4349
4350 /* Identify VMEM instructions from their "type" attribute. */
4351
4352 static bool
4353 gcn_vmem_insn_p (attr_type type)
4354 {
4355 switch (type)
4356 {
4357 case TYPE_MUBUF:
4358 case TYPE_MTBUF:
4359 case TYPE_FLAT:
4360 return true;
4361 case TYPE_UNKNOWN:
4362 case TYPE_SOP1:
4363 case TYPE_SOP2:
4364 case TYPE_SOPK:
4365 case TYPE_SOPC:
4366 case TYPE_SOPP:
4367 case TYPE_SMEM:
4368 case TYPE_DS:
4369 case TYPE_VOP2:
4370 case TYPE_VOP1:
4371 case TYPE_VOPC:
4372 case TYPE_VOP3A:
4373 case TYPE_VOP3B:
4374 case TYPE_VOP_SDWA:
4375 case TYPE_VOP_DPP:
4376 case TYPE_MULT:
4377 case TYPE_VMULT:
4378 return false;
4379 }
4380 gcc_unreachable ();
4381 return false;
4382 }
4383
4384 /* If INSN sets the EXEC register to a constant value, return the value,
4385 otherwise return zero. */
4386
4387 static int64_t
4388 gcn_insn_exec_value (rtx_insn *insn)
4389 {
4390 if (!NONDEBUG_INSN_P (insn))
4391 return 0;
4392
4393 rtx pattern = PATTERN (insn);
4394
4395 if (GET_CODE (pattern) == SET)
4396 {
4397 rtx dest = XEXP (pattern, 0);
4398 rtx src = XEXP (pattern, 1);
4399
4400 if (GET_MODE (dest) == DImode
4401 && REG_P (dest) && REGNO (dest) == EXEC_REG
4402 && CONST_INT_P (src))
4403 return INTVAL (src);
4404 }
4405
4406 return 0;
4407 }
4408
4409 /* Sets the EXEC register before INSN to the value that it had after
4410 LAST_EXEC_DEF. The constant value of the EXEC register is returned if
4411 known, otherwise it returns zero. */
4412
4413 static int64_t
4414 gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
4415 bool curr_exec_known, bool &last_exec_def_saved)
4416 {
4417 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4418 rtx exec;
4419
4420 int64_t exec_value = gcn_insn_exec_value (last_exec_def);
4421
4422 if (exec_value)
4423 {
4424 /* If the EXEC value is a constant and it happens to be the same as the
4425 current EXEC value, the restore can be skipped. */
4426 if (curr_exec_known && exec_value == curr_exec)
4427 return exec_value;
4428
4429 exec = GEN_INT (exec_value);
4430 }
4431 else
4432 {
4433 /* If the EXEC value is not a constant, save it in a register after the
4434 point of definition. */
4435 rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
4436
4437 if (!last_exec_def_saved)
4438 {
4439 start_sequence ();
4440 emit_move_insn (exec_save_reg, exec_reg);
4441 rtx_insn *seq = get_insns ();
4442 end_sequence ();
4443
4444 emit_insn_after (seq, last_exec_def);
4445 if (dump_file && (dump_flags & TDF_DETAILS))
4446 fprintf (dump_file, "Saving EXEC after insn %d.\n",
4447 INSN_UID (last_exec_def));
4448
4449 last_exec_def_saved = true;
4450 }
4451
4452 exec = exec_save_reg;
4453 }
4454
4455 /* Restore EXEC register before the usage. */
4456 start_sequence ();
4457 emit_move_insn (exec_reg, exec);
4458 rtx_insn *seq = get_insns ();
4459 end_sequence ();
4460 emit_insn_before (seq, insn);
4461
4462 if (dump_file && (dump_flags & TDF_DETAILS))
4463 {
4464 if (exec_value)
4465 fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
4466 exec_value, INSN_UID (insn));
4467 else
4468 fprintf (dump_file,
4469 "Restoring EXEC from saved value before insn %d.\n",
4470 INSN_UID (insn));
4471 }
4472
4473 return exec_value;
4474 }
4475
4476 /* Implement TARGET_MACHINE_DEPENDENT_REORG.
4477
4478 Ensure that pipeline dependencies and lane masking are set correctly. */
4479
4480 static void
4481 gcn_md_reorg (void)
4482 {
4483 basic_block bb;
4484 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4485 regset_head live;
4486
4487 INIT_REG_SET (&live);
4488
4489 compute_bb_for_insn ();
4490
4491 if (!optimize)
4492 {
4493 split_all_insns ();
4494 if (dump_file && (dump_flags & TDF_DETAILS))
4495 {
4496 fprintf (dump_file, "After split:\n");
4497 print_rtl_with_bb (dump_file, get_insns (), dump_flags);
4498 }
4499
4500 /* Update data-flow information for split instructions. */
4501 df_insn_rescan_all ();
4502 }
4503
4504 df_analyze ();
4505
4506 /* This pass ensures that the EXEC register is set correctly, according
4507 to the "exec" attribute. However, care must be taken so that the
4508 value that reaches explicit uses of the EXEC register remains the
4509 same as before.
4510 */
4511
4512 FOR_EACH_BB_FN (bb, cfun)
4513 {
4514 if (dump_file && (dump_flags & TDF_DETAILS))
4515 fprintf (dump_file, "BB %d:\n", bb->index);
4516
4517 rtx_insn *insn, *curr;
4518 rtx_insn *last_exec_def = BB_HEAD (bb);
4519 bool last_exec_def_saved = false;
4520 bool curr_exec_explicit = true;
4521 bool curr_exec_known = true;
4522 int64_t curr_exec = 0; /* 0 here means 'the value is that of EXEC
4523 after last_exec_def is executed'. */
4524
4525 FOR_BB_INSNS_SAFE (bb, insn, curr)
4526 {
4527 if (!NONDEBUG_INSN_P (insn))
4528 continue;
4529
4530 if (GET_CODE (PATTERN (insn)) == USE
4531 || GET_CODE (PATTERN (insn)) == CLOBBER)
4532 continue;
4533
4534 HARD_REG_SET defs, uses;
4535 CLEAR_HARD_REG_SET (defs);
4536 CLEAR_HARD_REG_SET (uses);
4537 note_stores (insn, record_hard_reg_sets, &defs);
4538 note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
4539
4540 bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
4541 bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
4542 bool exec_used = (hard_reg_set_intersect_p
4543 (uses, reg_class_contents[(int) EXEC_MASK_REG])
4544 || TEST_HARD_REG_BIT (uses, EXECZ_REG));
4545
4546 /* Check the instruction for implicit setting of EXEC via an
4547 attribute. */
4548 attr_exec exec_attr = get_attr_exec (insn);
4549 int64_t new_exec;
4550
4551 switch (exec_attr)
4552 {
4553 case EXEC_NONE:
4554 new_exec = 0;
4555 break;
4556
4557 case EXEC_SINGLE:
4558 /* Instructions that do not involve memory accesses only require
4559 bit 0 of EXEC to be set. */
4560 if (gcn_vmem_insn_p (get_attr_type (insn))
4561 || get_attr_type (insn) == TYPE_DS)
4562 new_exec = 1;
4563 else
4564 new_exec = curr_exec | 1;
4565 break;
4566
4567 case EXEC_FULL:
4568 new_exec = -1;
4569 break;
4570
4571 default: /* Auto-detect what setting is appropriate. */
4572 {
4573 new_exec = 0;
4574
4575 /* If EXEC is referenced explicitly then we don't need to do
4576 anything to set it, so we're done. */
4577 if (exec_used)
4578 break;
4579
4580 /* Scan the insn for VGPRs defs or uses. The mode determines
4581 what kind of exec is needed. */
4582 subrtx_iterator::array_type array;
4583 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
4584 {
4585 const_rtx x = *iter;
4586 if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
4587 {
4588 if (VECTOR_MODE_P (GET_MODE (x)))
4589 {
4590 new_exec = -1;
4591 break;
4592 }
4593 else
4594 new_exec = 1;
4595 }
4596 }
4597 }
4598 break;
4599 }
4600
4601 if (new_exec && (!curr_exec_known || new_exec != curr_exec))
4602 {
4603 start_sequence ();
4604 emit_move_insn (exec_reg, GEN_INT (new_exec));
4605 rtx_insn *seq = get_insns ();
4606 end_sequence ();
4607 emit_insn_before (seq, insn);
4608
4609 if (dump_file && (dump_flags & TDF_DETAILS))
4610 fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
4611 new_exec, INSN_UID (insn));
4612
4613 curr_exec = new_exec;
4614 curr_exec_explicit = false;
4615 curr_exec_known = true;
4616 }
4617 else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
4618 {
4619 fprintf (dump_file, "Exec already is %ld before insn %d.\n",
4620 new_exec, INSN_UID (insn));
4621 }
4622
4623 /* The state of the EXEC register is unknown after a
4624 function call. */
4625 if (CALL_P (insn))
4626 curr_exec_known = false;
4627
4628 /* Handle explicit uses of EXEC. If the instruction is a partial
4629 explicit definition of EXEC, then treat it as an explicit use of
4630 EXEC as well. */
4631 if (exec_used || exec_lo_def_p != exec_hi_def_p)
4632 {
4633 /* An instruction that explicitly uses EXEC should not also
4634 implicitly define it. */
4635 gcc_assert (!exec_used || !new_exec);
4636
4637 if (!curr_exec_known || !curr_exec_explicit)
4638 {
4639 /* Restore the previous explicitly defined value. */
4640 curr_exec = gcn_restore_exec (insn, last_exec_def,
4641 curr_exec, curr_exec_known,
4642 last_exec_def_saved);
4643 curr_exec_explicit = true;
4644 curr_exec_known = true;
4645 }
4646 }
4647
4648 /* Handle explicit definitions of EXEC. */
4649 if (exec_lo_def_p || exec_hi_def_p)
4650 {
4651 last_exec_def = insn;
4652 last_exec_def_saved = false;
4653 curr_exec = gcn_insn_exec_value (insn);
4654 curr_exec_explicit = true;
4655 curr_exec_known = true;
4656
4657 if (dump_file && (dump_flags & TDF_DETAILS))
4658 fprintf (dump_file,
4659 "Found %s definition of EXEC at insn %d.\n",
4660 exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
4661 INSN_UID (insn));
4662 }
4663 }
4664
4665 COPY_REG_SET (&live, DF_LR_OUT (bb));
4666 df_simulate_initialize_backwards (bb, &live);
4667
4668 /* If EXEC is live after the basic block, restore the value of EXEC
4669 at the end of the block. */
4670 if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
4671 || REGNO_REG_SET_P (&live, EXEC_HI_REG))
4672 && (!curr_exec_known || !curr_exec_explicit))
4673 {
4674 rtx_insn *end_insn = BB_END (bb);
4675
4676 /* If the instruction is not a jump instruction, do the restore
4677 after the last instruction in the basic block. */
4678 if (NONJUMP_INSN_P (end_insn))
4679 end_insn = NEXT_INSN (end_insn);
4680
4681 gcn_restore_exec (end_insn, last_exec_def, curr_exec,
4682 curr_exec_known, last_exec_def_saved);
4683 }
4684 }
4685
4686 CLEAR_REG_SET (&live);
4687
4688 /* "Manually Inserted Wait States (NOPs)."
4689
4690 GCN hardware detects most kinds of register dependencies, but there
4691 are some exceptions documented in the ISA manual. This pass
4692 detects the missed cases, and inserts the documented number of NOPs
4693 required for correct execution. */
4694
4695 const int max_waits = 5;
4696 struct ilist
4697 {
4698 rtx_insn *insn;
4699 attr_unit unit;
4700 attr_delayeduse delayeduse;
4701 HARD_REG_SET writes;
4702 HARD_REG_SET reads;
4703 int age;
4704 } back[max_waits];
4705 int oldest = 0;
4706 for (int i = 0; i < max_waits; i++)
4707 back[i].insn = NULL;
4708
4709 rtx_insn *insn, *last_insn = NULL;
4710 for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
4711 {
4712 if (!NONDEBUG_INSN_P (insn))
4713 continue;
4714
4715 if (GET_CODE (PATTERN (insn)) == USE
4716 || GET_CODE (PATTERN (insn)) == CLOBBER)
4717 continue;
4718
4719 attr_type itype = get_attr_type (insn);
4720 attr_unit iunit = get_attr_unit (insn);
4721 attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
4722 HARD_REG_SET ireads, iwrites;
4723 CLEAR_HARD_REG_SET (ireads);
4724 CLEAR_HARD_REG_SET (iwrites);
4725 note_stores (insn, record_hard_reg_sets, &iwrites);
4726 note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
4727
4728 /* Scan recent previous instructions for dependencies not handled in
4729 hardware. */
4730 int nops_rqd = 0;
4731 for (int i = oldest; i < oldest + max_waits; i++)
4732 {
4733 struct ilist *prev_insn = &back[i % max_waits];
4734
4735 if (!prev_insn->insn)
4736 continue;
4737
4738 /* VALU writes SGPR followed by VMEM reading the same SGPR
4739 requires 5 wait states. */
4740 if ((prev_insn->age + nops_rqd) < 5
4741 && prev_insn->unit == UNIT_VECTOR
4742 && gcn_vmem_insn_p (itype))
4743 {
4744 HARD_REG_SET regs = prev_insn->writes & ireads;
4745 if (hard_reg_set_intersect_p
4746 (regs, reg_class_contents[(int) SGPR_REGS]))
4747 nops_rqd = 5 - prev_insn->age;
4748 }
4749
4750 /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
4751 requires 5 wait states. */
4752 if ((prev_insn->age + nops_rqd) < 5
4753 && prev_insn->unit == UNIT_VECTOR
4754 && iunit == UNIT_VECTOR
4755 && ((hard_reg_set_intersect_p
4756 (prev_insn->writes,
4757 reg_class_contents[(int) EXEC_MASK_REG])
4758 && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
4759 ||
4760 (hard_reg_set_intersect_p
4761 (prev_insn->writes,
4762 reg_class_contents[(int) VCC_CONDITIONAL_REG])
4763 && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
4764 nops_rqd = 5 - prev_insn->age;
4765
4766 /* VALU writes SGPR/VCC followed by v_{read,write}lane using
4767 SGPR/VCC as lane select requires 4 wait states. */
4768 if ((prev_insn->age + nops_rqd) < 4
4769 && prev_insn->unit == UNIT_VECTOR
4770 && get_attr_laneselect (insn) == LANESELECT_YES)
4771 {
4772 HARD_REG_SET regs = prev_insn->writes & ireads;
4773 if (hard_reg_set_intersect_p
4774 (regs, reg_class_contents[(int) SGPR_REGS])
4775 || hard_reg_set_intersect_p
4776 (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
4777 nops_rqd = 4 - prev_insn->age;
4778 }
4779
4780 /* VALU writes VGPR followed by VALU_DPP reading that VGPR
4781 requires 2 wait states. */
4782 if ((prev_insn->age + nops_rqd) < 2
4783 && prev_insn->unit == UNIT_VECTOR
4784 && itype == TYPE_VOP_DPP)
4785 {
4786 HARD_REG_SET regs = prev_insn->writes & ireads;
4787 if (hard_reg_set_intersect_p
4788 (regs, reg_class_contents[(int) VGPR_REGS]))
4789 nops_rqd = 2 - prev_insn->age;
4790 }
4791
4792 /* Store that requires input registers are not overwritten by
4793 following instruction. */
4794 if ((prev_insn->age + nops_rqd) < 1
4795 && prev_insn->delayeduse == DELAYEDUSE_YES
4796 && ((hard_reg_set_intersect_p
4797 (prev_insn->reads, iwrites))))
4798 nops_rqd = 1 - prev_insn->age;
4799 }
4800
4801 /* Insert the required number of NOPs. */
4802 for (int i = nops_rqd; i > 0; i--)
4803 emit_insn_after (gen_nop (), last_insn);
4804
4805 /* Age the previous instructions. We can also ignore writes to
4806 registers subsequently overwritten. */
4807 HARD_REG_SET written;
4808 CLEAR_HARD_REG_SET (written);
4809 for (int i = oldest + max_waits - 1; i > oldest; i--)
4810 {
4811 struct ilist *prev_insn = &back[i % max_waits];
4812
4813 /* Assume all instructions are equivalent to one "wait", the same
4814 as s_nop. This is probably true for SALU, but not VALU (which
4815 may take longer), so this is not optimal. However, AMD do
4816 not publish the cycle times for instructions. */
4817 prev_insn->age += 1 + nops_rqd;
4818
4819 written |= iwrites;
4820 prev_insn->writes &= ~written;
4821 }
4822
4823 /* Track the current instruction as a previous instruction. */
4824 back[oldest].insn = insn;
4825 back[oldest].unit = iunit;
4826 back[oldest].delayeduse = idelayeduse;
4827 back[oldest].writes = iwrites;
4828 back[oldest].reads = ireads;
4829 back[oldest].age = 0;
4830 oldest = (oldest + 1) % max_waits;
4831
4832 last_insn = insn;
4833 }
4834 }
4835
4836 /* }}} */
4837 /* {{{ OpenACC / OpenMP. */
4838
4839 #define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */
4840 #define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */
4841 #define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */
4842
4843 /* Implement TARGET_GOACC_VALIDATE_DIMS.
4844
4845 Check the launch dimensions provided for an OpenACC compute
4846 region, or routine. */
4847
4848 static bool
4849 gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
4850 unsigned /*used*/)
4851 {
4852 bool changed = false;
4853
4854 /* FIXME: remove -facc-experimental-workers when they're ready. */
4855 int max_workers = flag_worker_partitioning ? 16 : 1;
4856
4857 gcc_assert (!flag_worker_partitioning);
4858
4859 /* The vector size must appear to be 64, to the user, unless this is a
4860 SEQ routine. The real, internal value is always 1, which means use
4861 autovectorization, but the user should not see that. */
4862 if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
4863 && dims[GOMP_DIM_VECTOR] >= 0)
4864 {
4865 if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
4866 && dims[GOMP_DIM_VECTOR] != 64)
4867 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4868 OPT_Wopenacc_dims,
4869 (dims[GOMP_DIM_VECTOR]
4870 ? G_("using vector_length (64), ignoring %d")
4871 : G_("using vector_length (64), "
4872 "ignoring runtime setting")),
4873 dims[GOMP_DIM_VECTOR]);
4874 dims[GOMP_DIM_VECTOR] = 1;
4875 changed = true;
4876 }
4877
4878 /* Check the num workers is not too large. */
4879 if (dims[GOMP_DIM_WORKER] > max_workers)
4880 {
4881 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4882 OPT_Wopenacc_dims,
4883 "using num_workers (%d), ignoring %d",
4884 max_workers, dims[GOMP_DIM_WORKER]);
4885 dims[GOMP_DIM_WORKER] = max_workers;
4886 changed = true;
4887 }
4888
4889 /* Set global defaults. */
4890 if (!decl)
4891 {
4892 dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
4893 if (dims[GOMP_DIM_WORKER] < 0)
4894 dims[GOMP_DIM_WORKER] = (flag_worker_partitioning
4895 ? GCN_DEFAULT_WORKERS : 1);
4896 if (dims[GOMP_DIM_GANG] < 0)
4897 dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
4898 changed = true;
4899 }
4900
4901 return changed;
4902 }
4903
4904 /* Helper function for oacc_dim_size instruction.
4905 Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */
4906
4907 rtx
4908 gcn_oacc_dim_size (int dim)
4909 {
4910 if (dim < 0 || dim > 2)
4911 error ("offload dimension out of range (%d)", dim);
4912
4913 /* Vectors are a special case. */
4914 if (dim == 2)
4915 return const1_rtx; /* Think of this as 1 times 64. */
4916
4917 static int offset[] = {
4918 /* Offsets into dispatch packet. */
4919 12, /* X dim = Gang / Team / Work-group. */
4920 20, /* Z dim = Worker / Thread / Wavefront. */
4921 16 /* Y dim = Vector / SIMD / Work-item. */
4922 };
4923 rtx addr = gen_rtx_PLUS (DImode,
4924 gen_rtx_REG (DImode,
4925 cfun->machine->args.
4926 reg[DISPATCH_PTR_ARG]),
4927 GEN_INT (offset[dim]));
4928 return gen_rtx_MEM (SImode, addr);
4929 }
4930
4931 /* Helper function for oacc_dim_pos instruction.
4932 Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */
4933
4934 rtx
4935 gcn_oacc_dim_pos (int dim)
4936 {
4937 if (dim < 0 || dim > 2)
4938 error ("offload dimension out of range (%d)", dim);
4939
4940 static const int reg[] = {
4941 WORKGROUP_ID_X_ARG, /* Gang / Team / Work-group. */
4942 WORK_ITEM_ID_Z_ARG, /* Worker / Thread / Wavefront. */
4943 WORK_ITEM_ID_Y_ARG /* Vector / SIMD / Work-item. */
4944 };
4945
4946 int reg_num = cfun->machine->args.reg[reg[dim]];
4947
4948 /* The information must have been requested by the kernel. */
4949 gcc_assert (reg_num >= 0);
4950
4951 return gen_rtx_REG (SImode, reg_num);
4952 }
4953
4954 /* Implement TARGET_GOACC_FORK_JOIN. */
4955
4956 static bool
4957 gcn_fork_join (gcall *ARG_UNUSED (call), const int *ARG_UNUSED (dims),
4958 bool ARG_UNUSED (is_fork))
4959 {
4960 /* GCN does not use the fork/join concept invented for NVPTX.
4961 Instead we use standard autovectorization. */
4962 return false;
4963 }
4964
4965 /* Implement ???????
4966 FIXME make this a real hook.
4967
4968 Adjust FNDECL such that options inherited from the host compiler
4969 are made appropriate for the accelerator compiler. */
4970
4971 void
4972 gcn_fixup_accel_lto_options (tree fndecl)
4973 {
4974 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4975 if (!func_optimize)
4976 return;
4977
4978 tree old_optimize
4979 = build_optimization_node (&global_options, &global_options_set);
4980 tree new_optimize;
4981
4982 /* If the function changed the optimization levels as well as
4983 setting target options, start with the optimizations
4984 specified. */
4985 if (func_optimize != old_optimize)
4986 cl_optimization_restore (&global_options, &global_options_set,
4987 TREE_OPTIMIZATION (func_optimize));
4988
4989 gcn_option_override ();
4990
4991 /* The target attributes may also change some optimization flags,
4992 so update the optimization options if necessary. */
4993 new_optimize = build_optimization_node (&global_options,
4994 &global_options_set);
4995
4996 if (old_optimize != new_optimize)
4997 {
4998 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4999 cl_optimization_restore (&global_options, &global_options_set,
5000 TREE_OPTIMIZATION (old_optimize));
5001 }
5002 }
5003
5004 /* }}} */
5005 /* {{{ ASM Output. */
5006
5007 /* Implement TARGET_ASM_FILE_START.
5008
5009 Print assembler file header text. */
5010
5011 static void
5012 output_file_start (void)
5013 {
5014 const char *cpu;
5015 switch (gcn_arch)
5016 {
5017 case PROCESSOR_FIJI: cpu = "gfx803"; break;
5018 case PROCESSOR_VEGA10: cpu = "gfx900"; break;
5019 case PROCESSOR_VEGA20: cpu = "gfx906"; break;
5020 default: gcc_unreachable ();
5021 }
5022
5023 fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", cpu);
5024 }
5025
5026 /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
5027
5028 Print the initial definition of a function name.
5029
5030 For GCN kernel entry points this includes all the HSA meta-data, special
5031 alignment constraints that don't apply to regular functions, and magic
5032 comments that pass information to mkoffload. */
5033
5034 void
5035 gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
5036 {
5037 int sgpr, vgpr;
5038 bool xnack_enabled = false;
5039
5040 fputs ("\n\n", file);
5041
5042 if (cfun && cfun->machine && cfun->machine->normal_function)
5043 {
5044 fputs ("\t.type\t", file);
5045 assemble_name (file, name);
5046 fputs (",@function\n", file);
5047 assemble_name (file, name);
5048 fputs (":\n", file);
5049 return;
5050 }
5051
5052 /* Determine count of sgpr/vgpr registers by looking for last
5053 one used. */
5054 for (sgpr = 101; sgpr >= 0; sgpr--)
5055 if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
5056 break;
5057 sgpr++;
5058 for (vgpr = 255; vgpr >= 0; vgpr--)
5059 if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
5060 break;
5061 vgpr++;
5062
5063 if (!leaf_function_p ())
5064 {
5065 /* We can't know how many registers function calls might use. */
5066 if (vgpr < MAX_NORMAL_VGPR_COUNT)
5067 vgpr = MAX_NORMAL_VGPR_COUNT;
5068 if (sgpr < MAX_NORMAL_SGPR_COUNT)
5069 sgpr = MAX_NORMAL_SGPR_COUNT;
5070 }
5071
5072 fputs ("\t.rodata\n"
5073 "\t.p2align\t6\n"
5074 "\t.amdhsa_kernel\t", file);
5075 assemble_name (file, name);
5076 fputs ("\n", file);
5077 int reg = FIRST_SGPR_REG;
5078 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
5079 {
5080 int reg_first = -1;
5081 int reg_last;
5082 if ((cfun->machine->args.requested & (1 << a))
5083 && (gcn_kernel_arg_types[a].fixed_regno < 0))
5084 {
5085 reg_first = reg;
5086 reg_last = (reg_first
5087 + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
5088 / UNITS_PER_WORD) - 1);
5089 reg = reg_last + 1;
5090 }
5091
5092 if (gcn_kernel_arg_types[a].header_pseudo)
5093 {
5094 fprintf (file, "\t %s%s\t%i",
5095 (cfun->machine->args.requested & (1 << a)) != 0 ? "" : ";",
5096 gcn_kernel_arg_types[a].header_pseudo,
5097 (cfun->machine->args.requested & (1 << a)) != 0);
5098 if (reg_first != -1)
5099 {
5100 fprintf (file, " ; (");
5101 for (int i = reg_first; i <= reg_last; ++i)
5102 {
5103 if (i != reg_first)
5104 fprintf (file, ", ");
5105 fprintf (file, "%s", reg_names[i]);
5106 }
5107 fprintf (file, ")");
5108 }
5109 fprintf (file, "\n");
5110 }
5111 else if (gcn_kernel_arg_types[a].fixed_regno >= 0
5112 && cfun->machine->args.requested & (1 << a))
5113 fprintf (file, "\t ; %s\t%i (%s)\n",
5114 gcn_kernel_arg_types[a].name,
5115 (cfun->machine->args.requested & (1 << a)) != 0,
5116 reg_names[gcn_kernel_arg_types[a].fixed_regno]);
5117 }
5118 fprintf (file, "\t .amdhsa_system_vgpr_workitem_id\t%i\n",
5119 (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
5120 ? 2
5121 : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
5122 ? 1 : 0);
5123 fprintf (file,
5124 "\t .amdhsa_next_free_vgpr\t%i\n"
5125 "\t .amdhsa_next_free_sgpr\t%i\n"
5126 "\t .amdhsa_reserve_vcc\t1\n"
5127 "\t .amdhsa_reserve_flat_scratch\t0\n"
5128 "\t .amdhsa_reserve_xnack_mask\t%i\n"
5129 "\t .amdhsa_private_segment_fixed_size\t%i\n"
5130 "\t .amdhsa_group_segment_fixed_size\t%u\n"
5131 "\t .amdhsa_float_denorm_mode_32\t3\n"
5132 "\t .amdhsa_float_denorm_mode_16_64\t3\n",
5133 vgpr,
5134 sgpr,
5135 xnack_enabled,
5136 /* workitem_private_segment_bytes_size needs to be
5137 one 64th the wave-front stack size. */
5138 stack_size_opt / 64,
5139 LDS_SIZE);
5140 fputs ("\t.end_amdhsa_kernel\n", file);
5141
5142 #if 1
5143 /* The following is YAML embedded in assembler; tabs are not allowed. */
5144 fputs (" .amdgpu_metadata\n"
5145 " amdhsa.version:\n"
5146 " - 1\n"
5147 " - 0\n"
5148 " amdhsa.kernels:\n"
5149 " - .name: ", file);
5150 assemble_name (file, name);
5151 fputs ("\n .symbol: ", file);
5152 assemble_name (file, name);
5153 fprintf (file,
5154 ".kd\n"
5155 " .kernarg_segment_size: %i\n"
5156 " .kernarg_segment_align: %i\n"
5157 " .group_segment_fixed_size: %u\n"
5158 " .private_segment_fixed_size: %i\n"
5159 " .wavefront_size: 64\n"
5160 " .sgpr_count: %i\n"
5161 " .vgpr_count: %i\n"
5162 " .max_flat_workgroup_size: 1024\n",
5163 cfun->machine->kernarg_segment_byte_size,
5164 cfun->machine->kernarg_segment_alignment,
5165 LDS_SIZE,
5166 stack_size_opt / 64,
5167 sgpr, vgpr);
5168 fputs (" .end_amdgpu_metadata\n", file);
5169 #endif
5170
5171 fputs ("\t.text\n", file);
5172 fputs ("\t.align\t256\n", file);
5173 fputs ("\t.type\t", file);
5174 assemble_name (file, name);
5175 fputs (",@function\n", file);
5176 assemble_name (file, name);
5177 fputs (":\n", file);
5178
5179 /* This comment is read by mkoffload. */
5180 if (flag_openacc)
5181 fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
5182 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
5183 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
5184 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
5185 }
5186
5187 /* Implement TARGET_ASM_SELECT_SECTION.
5188
5189 Return the section into which EXP should be placed. */
5190
5191 static section *
5192 gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
5193 {
5194 if (TREE_TYPE (exp) != error_mark_node
5195 && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
5196 {
5197 if (!DECL_P (exp))
5198 return get_section (".lds_bss",
5199 SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
5200 NULL);
5201
5202 return get_named_section (exp, ".lds_bss", reloc);
5203 }
5204
5205 return default_elf_select_section (exp, reloc, align);
5206 }
5207
5208 /* Implement TARGET_ASM_FUNCTION_PROLOGUE.
5209
5210 Emits custom text into the assembler file at the head of each function. */
5211
5212 static void
5213 gcn_target_asm_function_prologue (FILE *file)
5214 {
5215 machine_function *offsets = gcn_compute_frame_offsets ();
5216
5217 asm_fprintf (file, "\t; using %s addressing in function\n",
5218 offsets->use_flat_addressing ? "flat" : "global");
5219
5220 if (offsets->normal_function)
5221 {
5222 asm_fprintf (file, "\t; frame pointer needed: %s\n",
5223 offsets->need_frame_pointer ? "true" : "false");
5224 asm_fprintf (file, "\t; lr needs saving: %s\n",
5225 offsets->lr_needs_saving ? "true" : "false");
5226 asm_fprintf (file, "\t; outgoing args size: %wd\n",
5227 offsets->outgoing_args_size);
5228 asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
5229 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5230 asm_fprintf (file, "\t; callee save size: %wd\n",
5231 offsets->callee_saves);
5232 }
5233 else
5234 {
5235 asm_fprintf (file, "\t; HSA kernel entry point\n");
5236 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5237 asm_fprintf (file, "\t; outgoing args size: %wd\n",
5238 offsets->outgoing_args_size);
5239 }
5240 }
5241
5242 /* Helper function for print_operand and print_operand_address.
5243
5244 Print a register as the assembler requires, according to mode and name. */
5245
5246 static void
5247 print_reg (FILE *file, rtx x)
5248 {
5249 machine_mode mode = GET_MODE (x);
5250 if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
5251 || mode == HFmode || mode == SFmode
5252 || mode == V64SFmode || mode == V64SImode
5253 || mode == V64QImode || mode == V64HImode)
5254 fprintf (file, "%s", reg_names[REGNO (x)]);
5255 else if (mode == DImode || mode == V64DImode
5256 || mode == DFmode || mode == V64DFmode)
5257 {
5258 if (SGPR_REGNO_P (REGNO (x)))
5259 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5260 REGNO (x) - FIRST_SGPR_REG + 1);
5261 else if (VGPR_REGNO_P (REGNO (x)))
5262 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5263 REGNO (x) - FIRST_VGPR_REG + 1);
5264 else if (REGNO (x) == FLAT_SCRATCH_REG)
5265 fprintf (file, "flat_scratch");
5266 else if (REGNO (x) == EXEC_REG)
5267 fprintf (file, "exec");
5268 else if (REGNO (x) == VCC_LO_REG)
5269 fprintf (file, "vcc");
5270 else
5271 fprintf (file, "[%s:%s]",
5272 reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
5273 }
5274 else if (mode == TImode)
5275 {
5276 if (SGPR_REGNO_P (REGNO (x)))
5277 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5278 REGNO (x) - FIRST_SGPR_REG + 3);
5279 else if (VGPR_REGNO_P (REGNO (x)))
5280 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5281 REGNO (x) - FIRST_VGPR_REG + 3);
5282 else
5283 gcc_unreachable ();
5284 }
5285 else
5286 gcc_unreachable ();
5287 }
5288
5289 /* Implement TARGET_SECTION_TYPE_FLAGS.
5290
5291 Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */
5292
5293 static unsigned int
5294 gcn_section_type_flags (tree decl, const char *name, int reloc)
5295 {
5296 if (strcmp (name, ".lds_bss") == 0)
5297 return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
5298
5299 return default_section_type_flags (decl, name, reloc);
5300 }
5301
5302 /* Helper function for gcn_asm_output_symbol_ref.
5303
5304 FIXME: If we want to have propagation blocks allocated separately and
5305 statically like this, it would be better done via symbol refs and the
5306 assembler/linker. This is a temporary hack. */
5307
5308 static void
5309 gcn_print_lds_decl (FILE *f, tree var)
5310 {
5311 int *offset;
5312 machine_function *machfun = cfun->machine;
5313
5314 if ((offset = machfun->lds_allocs->get (var)))
5315 fprintf (f, "%u", (unsigned) *offset);
5316 else
5317 {
5318 unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
5319 tree type = TREE_TYPE (var);
5320 unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
5321 if (size > align && size > 4 && align < 8)
5322 align = 8;
5323
5324 machfun->lds_allocated = ((machfun->lds_allocated + align - 1)
5325 & ~(align - 1));
5326
5327 machfun->lds_allocs->put (var, machfun->lds_allocated);
5328 fprintf (f, "%u", machfun->lds_allocated);
5329 machfun->lds_allocated += size;
5330 if (machfun->lds_allocated > LDS_SIZE)
5331 error ("local data-share memory exhausted");
5332 }
5333 }
5334
5335 /* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */
5336
5337 void
5338 gcn_asm_output_symbol_ref (FILE *file, rtx x)
5339 {
5340 tree decl;
5341 if (cfun
5342 && (decl = SYMBOL_REF_DECL (x)) != 0
5343 && TREE_CODE (decl) == VAR_DECL
5344 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5345 {
5346 /* LDS symbols (emitted using this hook) are only used at present
5347 to propagate worker values from an active thread to neutered
5348 threads. Use the same offset for each such block, but don't
5349 use zero because null pointers are used to identify the active
5350 thread in GOACC_single_copy_start calls. */
5351 gcn_print_lds_decl (file, decl);
5352 }
5353 else
5354 {
5355 assemble_name (file, XSTR (x, 0));
5356 /* FIXME: See above -- this condition is unreachable. */
5357 if (cfun
5358 && (decl = SYMBOL_REF_DECL (x)) != 0
5359 && TREE_CODE (decl) == VAR_DECL
5360 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5361 fputs ("@abs32", file);
5362 }
5363 }
5364
5365 /* Implement TARGET_CONSTANT_ALIGNMENT.
5366
5367 Returns the alignment in bits of a constant that is being placed in memory.
5368 CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
5369 would ordinarily have. */
5370
5371 static HOST_WIDE_INT
5372 gcn_constant_alignment (const_tree ARG_UNUSED (constant),
5373 HOST_WIDE_INT basic_align)
5374 {
5375 return basic_align > 128 ? basic_align : 128;
5376 }
5377
5378 /* Implement PRINT_OPERAND_ADDRESS via gcn.h. */
5379
5380 void
5381 print_operand_address (FILE *file, rtx mem)
5382 {
5383 gcc_assert (MEM_P (mem));
5384
5385 rtx reg;
5386 rtx offset;
5387 addr_space_t as = MEM_ADDR_SPACE (mem);
5388 rtx addr = XEXP (mem, 0);
5389 gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
5390
5391 if (AS_SCRATCH_P (as))
5392 switch (GET_CODE (addr))
5393 {
5394 case REG:
5395 print_reg (file, addr);
5396 break;
5397
5398 case PLUS:
5399 reg = XEXP (addr, 0);
5400 offset = XEXP (addr, 1);
5401 print_reg (file, reg);
5402 if (GET_CODE (offset) == CONST_INT)
5403 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5404 else
5405 abort ();
5406 break;
5407
5408 default:
5409 debug_rtx (addr);
5410 abort ();
5411 }
5412 else if (AS_ANY_FLAT_P (as))
5413 {
5414 if (GET_CODE (addr) == REG)
5415 print_reg (file, addr);
5416 else
5417 {
5418 gcc_assert (TARGET_GCN5_PLUS);
5419 print_reg (file, XEXP (addr, 0));
5420 }
5421 }
5422 else if (AS_GLOBAL_P (as))
5423 {
5424 gcc_assert (TARGET_GCN5_PLUS);
5425
5426 rtx base = addr;
5427 rtx vgpr_offset = NULL_RTX;
5428
5429 if (GET_CODE (addr) == PLUS)
5430 {
5431 base = XEXP (addr, 0);
5432
5433 if (GET_CODE (base) == PLUS)
5434 {
5435 /* (SGPR + VGPR) + CONST */
5436 vgpr_offset = XEXP (base, 1);
5437 base = XEXP (base, 0);
5438 }
5439 else
5440 {
5441 rtx offset = XEXP (addr, 1);
5442
5443 if (REG_P (offset))
5444 /* SGPR + VGPR */
5445 vgpr_offset = offset;
5446 else if (CONST_INT_P (offset))
5447 /* VGPR + CONST or SGPR + CONST */
5448 ;
5449 else
5450 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5451 }
5452 }
5453
5454 if (REG_P (base))
5455 {
5456 if (VGPR_REGNO_P (REGNO (base)))
5457 print_reg (file, base);
5458 else if (SGPR_REGNO_P (REGNO (base)))
5459 {
5460 /* The assembler requires a 64-bit VGPR pair here, even though
5461 the offset should be only 32-bit. */
5462 if (vgpr_offset == NULL_RTX)
5463 /* In this case, the vector offset is zero, so we use the first
5464 lane of v1, which is initialized to zero. */
5465 fprintf (file, "v[1:2]");
5466 else if (REG_P (vgpr_offset)
5467 && VGPR_REGNO_P (REGNO (vgpr_offset)))
5468 {
5469 fprintf (file, "v[%d:%d]",
5470 REGNO (vgpr_offset) - FIRST_VGPR_REG,
5471 REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
5472 }
5473 else
5474 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5475 }
5476 }
5477 else
5478 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5479 }
5480 else if (AS_ANY_DS_P (as))
5481 switch (GET_CODE (addr))
5482 {
5483 case REG:
5484 print_reg (file, addr);
5485 break;
5486
5487 case PLUS:
5488 reg = XEXP (addr, 0);
5489 print_reg (file, reg);
5490 break;
5491
5492 default:
5493 debug_rtx (addr);
5494 abort ();
5495 }
5496 else
5497 switch (GET_CODE (addr))
5498 {
5499 case REG:
5500 print_reg (file, addr);
5501 fprintf (file, ", 0");
5502 break;
5503
5504 case PLUS:
5505 reg = XEXP (addr, 0);
5506 offset = XEXP (addr, 1);
5507 print_reg (file, reg);
5508 fprintf (file, ", ");
5509 if (GET_CODE (offset) == REG)
5510 print_reg (file, reg);
5511 else if (GET_CODE (offset) == CONST_INT)
5512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5513 else
5514 abort ();
5515 break;
5516
5517 default:
5518 debug_rtx (addr);
5519 abort ();
5520 }
5521 }
5522
5523 /* Implement PRINT_OPERAND via gcn.h.
5524
5525 b - print operand size as untyped operand (b8/b16/b32/b64)
5526 B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
5527 i - print operand size as untyped operand (i16/b32/i64)
5528 I - print operand size as SI/DI untyped operand(i32/b32/i64)
5529 u - print operand size as untyped operand (u16/u32/u64)
5530 U - print operand size as SI/DI untyped operand(u32/u64)
5531 o - print operand size as memory access size for loads
5532 (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
5533 s - print operand size as memory access size for stores
5534 (byte/short/dword/dwordx2/wordx3/dwordx4)
5535 C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
5536 c - print inverse conditional code for s_cbranch
5537 D - print conditional code for s_cmp (eq_u64/lg_u64...)
5538 E - print conditional code for v_cmp (eq_u64/ne_u64...)
5539 A - print address in formatting suitable for given address space.
5540 O - print offset:n for data share operations.
5541 ^ - print "_co" suffix for GCN5 mnemonics
5542 g - print "glc", if appropriate for given MEM
5543 */
5544
5545 void
5546 print_operand (FILE *file, rtx x, int code)
5547 {
5548 int xcode = x ? GET_CODE (x) : 0;
5549 bool invert = false;
5550 switch (code)
5551 {
5552 /* Instructions have the following suffixes.
5553 If there are two suffixes, the first is the destination type,
5554 and the second is the source type.
5555
5556 B32 Bitfield (untyped data) 32-bit
5557 B64 Bitfield (untyped data) 64-bit
5558 F16 floating-point 16-bit
5559 F32 floating-point 32-bit (IEEE 754 single-precision float)
5560 F64 floating-point 64-bit (IEEE 754 double-precision float)
5561 I16 signed 32-bit integer
5562 I32 signed 32-bit integer
5563 I64 signed 64-bit integer
5564 U16 unsigned 32-bit integer
5565 U32 unsigned 32-bit integer
5566 U64 unsigned 64-bit integer */
5567
5568 /* Print operand size as untyped suffix. */
5569 case 'b':
5570 {
5571 const char *s = "";
5572 machine_mode mode = GET_MODE (x);
5573 if (VECTOR_MODE_P (mode))
5574 mode = GET_MODE_INNER (mode);
5575 switch (GET_MODE_SIZE (mode))
5576 {
5577 case 1:
5578 s = "_b8";
5579 break;
5580 case 2:
5581 s = "_b16";
5582 break;
5583 case 4:
5584 s = "_b32";
5585 break;
5586 case 8:
5587 s = "_b64";
5588 break;
5589 default:
5590 output_operand_lossage ("invalid operand %%xn code");
5591 return;
5592 }
5593 fputs (s, file);
5594 }
5595 return;
5596 case 'B':
5597 {
5598 const char *s = "";
5599 machine_mode mode = GET_MODE (x);
5600 if (VECTOR_MODE_P (mode))
5601 mode = GET_MODE_INNER (mode);
5602 switch (GET_MODE_SIZE (mode))
5603 {
5604 case 1:
5605 case 2:
5606 case 4:
5607 s = "_b32";
5608 break;
5609 case 8:
5610 s = "_b64";
5611 break;
5612 default:
5613 output_operand_lossage ("invalid operand %%xn code");
5614 return;
5615 }
5616 fputs (s, file);
5617 }
5618 return;
5619 case 'e':
5620 fputs ("sext(", file);
5621 print_operand (file, x, 0);
5622 fputs (")", file);
5623 return;
5624 case 'i':
5625 case 'I':
5626 case 'u':
5627 case 'U':
5628 {
5629 bool signed_p = code == 'i';
5630 bool min32_p = code == 'I' || code == 'U';
5631 const char *s = "";
5632 machine_mode mode = GET_MODE (x);
5633 if (VECTOR_MODE_P (mode))
5634 mode = GET_MODE_INNER (mode);
5635 if (mode == VOIDmode)
5636 switch (GET_CODE (x))
5637 {
5638 case CONST_INT:
5639 s = signed_p ? "_i32" : "_u32";
5640 break;
5641 case CONST_DOUBLE:
5642 s = "_f64";
5643 break;
5644 default:
5645 output_operand_lossage ("invalid operand %%xn code");
5646 return;
5647 }
5648 else if (FLOAT_MODE_P (mode))
5649 switch (GET_MODE_SIZE (mode))
5650 {
5651 case 2:
5652 s = "_f16";
5653 break;
5654 case 4:
5655 s = "_f32";
5656 break;
5657 case 8:
5658 s = "_f64";
5659 break;
5660 default:
5661 output_operand_lossage ("invalid operand %%xn code");
5662 return;
5663 }
5664 else if (min32_p)
5665 switch (GET_MODE_SIZE (mode))
5666 {
5667 case 1:
5668 case 2:
5669 case 4:
5670 s = signed_p ? "_i32" : "_u32";
5671 break;
5672 case 8:
5673 s = signed_p ? "_i64" : "_u64";
5674 break;
5675 default:
5676 output_operand_lossage ("invalid operand %%xn code");
5677 return;
5678 }
5679 else
5680 switch (GET_MODE_SIZE (mode))
5681 {
5682 case 1:
5683 s = signed_p ? "_i8" : "_u8";
5684 break;
5685 case 2:
5686 s = signed_p ? "_i16" : "_u16";
5687 break;
5688 case 4:
5689 s = signed_p ? "_i32" : "_u32";
5690 break;
5691 case 8:
5692 s = signed_p ? "_i64" : "_u64";
5693 break;
5694 default:
5695 output_operand_lossage ("invalid operand %%xn code");
5696 return;
5697 }
5698 fputs (s, file);
5699 }
5700 return;
5701 /* Print operand size as untyped suffix. */
5702 case 'o':
5703 {
5704 const char *s = 0;
5705 switch (GET_MODE_SIZE (GET_MODE (x)))
5706 {
5707 case 1:
5708 s = "_ubyte";
5709 break;
5710 case 2:
5711 s = "_ushort";
5712 break;
5713 /* The following are full-vector variants. */
5714 case 64:
5715 s = "_ubyte";
5716 break;
5717 case 128:
5718 s = "_ushort";
5719 break;
5720 }
5721
5722 if (s)
5723 {
5724 fputs (s, file);
5725 return;
5726 }
5727
5728 /* Fall-through - the other cases for 'o' are the same as for 's'. */
5729 gcc_fallthrough();
5730 }
5731 case 's':
5732 {
5733 const char *s = "";
5734 switch (GET_MODE_SIZE (GET_MODE (x)))
5735 {
5736 case 1:
5737 s = "_byte";
5738 break;
5739 case 2:
5740 s = "_short";
5741 break;
5742 case 4:
5743 s = "_dword";
5744 break;
5745 case 8:
5746 s = "_dwordx2";
5747 break;
5748 case 12:
5749 s = "_dwordx3";
5750 break;
5751 case 16:
5752 s = "_dwordx4";
5753 break;
5754 case 32:
5755 s = "_dwordx8";
5756 break;
5757 case 64:
5758 s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
5759 break;
5760 /* The following are full-vector variants. */
5761 case 128:
5762 s = "_short";
5763 break;
5764 case 256:
5765 s = "_dword";
5766 break;
5767 case 512:
5768 s = "_dwordx2";
5769 break;
5770 default:
5771 output_operand_lossage ("invalid operand %%xn code");
5772 return;
5773 }
5774 fputs (s, file);
5775 }
5776 return;
5777 case 'A':
5778 if (xcode != MEM)
5779 {
5780 output_operand_lossage ("invalid %%xn code");
5781 return;
5782 }
5783 print_operand_address (file, x);
5784 return;
5785 case 'O':
5786 {
5787 if (xcode != MEM)
5788 {
5789 output_operand_lossage ("invalid %%xn code");
5790 return;
5791 }
5792 if (AS_GDS_P (MEM_ADDR_SPACE (x)))
5793 fprintf (file, " gds");
5794
5795 rtx x0 = XEXP (x, 0);
5796 if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
5797 {
5798 gcc_assert (TARGET_GCN5_PLUS);
5799
5800 fprintf (file, ", ");
5801
5802 rtx base = x0;
5803 rtx const_offset = NULL_RTX;
5804
5805 if (GET_CODE (base) == PLUS)
5806 {
5807 rtx offset = XEXP (x0, 1);
5808 base = XEXP (x0, 0);
5809
5810 if (GET_CODE (base) == PLUS)
5811 /* (SGPR + VGPR) + CONST */
5812 /* Ignore the VGPR offset for this operand. */
5813 base = XEXP (base, 0);
5814
5815 if (CONST_INT_P (offset))
5816 const_offset = XEXP (x0, 1);
5817 else if (REG_P (offset))
5818 /* SGPR + VGPR */
5819 /* Ignore the VGPR offset for this operand. */
5820 ;
5821 else
5822 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5823 }
5824
5825 if (REG_P (base))
5826 {
5827 if (VGPR_REGNO_P (REGNO (base)))
5828 /* The VGPR address is specified in the %A operand. */
5829 fprintf (file, "off");
5830 else if (SGPR_REGNO_P (REGNO (base)))
5831 print_reg (file, base);
5832 else
5833 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5834 }
5835 else
5836 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5837
5838 if (const_offset != NULL_RTX)
5839 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
5840 INTVAL (const_offset));
5841
5842 return;
5843 }
5844
5845 if (GET_CODE (x0) == REG)
5846 return;
5847 if (GET_CODE (x0) != PLUS)
5848 {
5849 output_operand_lossage ("invalid %%xn code");
5850 return;
5851 }
5852 rtx val = XEXP (x0, 1);
5853 if (GET_CODE (val) == CONST_VECTOR)
5854 val = CONST_VECTOR_ELT (val, 0);
5855 if (GET_CODE (val) != CONST_INT)
5856 {
5857 output_operand_lossage ("invalid %%xn code");
5858 return;
5859 }
5860 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
5861
5862 }
5863 return;
5864 case 'c':
5865 invert = true;
5866 /* Fall through. */
5867 case 'C':
5868 {
5869 const char *s;
5870 bool num = false;
5871 if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
5872 {
5873 output_operand_lossage ("invalid %%xn code");
5874 return;
5875 }
5876 switch (REGNO (XEXP (x, 0)))
5877 {
5878 case VCC_REG:
5879 case VCCZ_REG:
5880 s = "_vcc";
5881 break;
5882 case SCC_REG:
5883 /* For some reason llvm-mc insists on scc0 instead of sccz. */
5884 num = true;
5885 s = "_scc";
5886 break;
5887 case EXECZ_REG:
5888 s = "_exec";
5889 break;
5890 default:
5891 output_operand_lossage ("invalid %%xn code");
5892 return;
5893 }
5894 fputs (s, file);
5895 if (xcode == (invert ? NE : EQ))
5896 fputc (num ? '0' : 'z', file);
5897 else
5898 fputs (num ? "1" : "nz", file);
5899 return;
5900 }
5901 case 'D':
5902 {
5903 const char *s;
5904 bool cmp_signed = false;
5905 switch (xcode)
5906 {
5907 case EQ:
5908 s = "_eq_";
5909 break;
5910 case NE:
5911 s = "_lg_";
5912 break;
5913 case LT:
5914 s = "_lt_";
5915 cmp_signed = true;
5916 break;
5917 case LE:
5918 s = "_le_";
5919 cmp_signed = true;
5920 break;
5921 case GT:
5922 s = "_gt_";
5923 cmp_signed = true;
5924 break;
5925 case GE:
5926 s = "_ge_";
5927 cmp_signed = true;
5928 break;
5929 case LTU:
5930 s = "_lt_";
5931 break;
5932 case LEU:
5933 s = "_le_";
5934 break;
5935 case GTU:
5936 s = "_gt_";
5937 break;
5938 case GEU:
5939 s = "_ge_";
5940 break;
5941 default:
5942 output_operand_lossage ("invalid %%xn code");
5943 return;
5944 }
5945 fputs (s, file);
5946 fputc (cmp_signed ? 'i' : 'u', file);
5947
5948 machine_mode mode = GET_MODE (XEXP (x, 0));
5949
5950 if (mode == VOIDmode)
5951 mode = GET_MODE (XEXP (x, 1));
5952
5953 /* If both sides are constants, then assume the instruction is in
5954 SImode since s_cmp can only do integer compares. */
5955 if (mode == VOIDmode)
5956 mode = SImode;
5957
5958 switch (GET_MODE_SIZE (mode))
5959 {
5960 case 4:
5961 s = "32";
5962 break;
5963 case 8:
5964 s = "64";
5965 break;
5966 default:
5967 output_operand_lossage ("invalid operand %%xn code");
5968 return;
5969 }
5970 fputs (s, file);
5971 return;
5972 }
5973 case 'E':
5974 {
5975 const char *s;
5976 bool cmp_signed = false;
5977 machine_mode mode = GET_MODE (XEXP (x, 0));
5978
5979 if (mode == VOIDmode)
5980 mode = GET_MODE (XEXP (x, 1));
5981
5982 /* If both sides are constants, assume the instruction is in SFmode
5983 if either operand is floating point, otherwise assume SImode. */
5984 if (mode == VOIDmode)
5985 {
5986 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5987 || GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
5988 mode = SFmode;
5989 else
5990 mode = SImode;
5991 }
5992
5993 /* Use the same format code for vector comparisons. */
5994 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5995 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
5996 mode = GET_MODE_INNER (mode);
5997
5998 bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
5999
6000 switch (xcode)
6001 {
6002 case EQ:
6003 s = "_eq_";
6004 break;
6005 case NE:
6006 s = float_p ? "_neq_" : "_ne_";
6007 break;
6008 case LT:
6009 s = "_lt_";
6010 cmp_signed = true;
6011 break;
6012 case LE:
6013 s = "_le_";
6014 cmp_signed = true;
6015 break;
6016 case GT:
6017 s = "_gt_";
6018 cmp_signed = true;
6019 break;
6020 case GE:
6021 s = "_ge_";
6022 cmp_signed = true;
6023 break;
6024 case LTU:
6025 s = "_lt_";
6026 break;
6027 case LEU:
6028 s = "_le_";
6029 break;
6030 case GTU:
6031 s = "_gt_";
6032 break;
6033 case GEU:
6034 s = "_ge_";
6035 break;
6036 case ORDERED:
6037 s = "_o_";
6038 break;
6039 case UNORDERED:
6040 s = "_u_";
6041 break;
6042 case UNEQ:
6043 s = "_nlg_";
6044 break;
6045 case UNGE:
6046 s = "_nlt_";
6047 break;
6048 case UNGT:
6049 s = "_nle_";
6050 break;
6051 case UNLE:
6052 s = "_ngt_";
6053 break;
6054 case UNLT:
6055 s = "_nge_";
6056 break;
6057 case LTGT:
6058 s = "_lg_";
6059 break;
6060 default:
6061 output_operand_lossage ("invalid %%xn code");
6062 return;
6063 }
6064 fputs (s, file);
6065 fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
6066
6067 switch (GET_MODE_SIZE (mode))
6068 {
6069 case 1:
6070 output_operand_lossage ("operand %%xn code invalid for QImode");
6071 return;
6072 case 2:
6073 s = "16";
6074 break;
6075 case 4:
6076 s = "32";
6077 break;
6078 case 8:
6079 s = "64";
6080 break;
6081 default:
6082 output_operand_lossage ("invalid operand %%xn code");
6083 return;
6084 }
6085 fputs (s, file);
6086 return;
6087 }
6088 case 'L':
6089 print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
6090 return;
6091 case 'H':
6092 print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
6093 return;
6094 case 'R':
6095 /* Print a scalar register number as an integer. Temporary hack. */
6096 gcc_assert (REG_P (x));
6097 fprintf (file, "%u", (int) REGNO (x));
6098 return;
6099 case 'V':
6100 /* Print a vector register number as an integer. Temporary hack. */
6101 gcc_assert (REG_P (x));
6102 fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
6103 return;
6104 case 0:
6105 if (xcode == REG)
6106 print_reg (file, x);
6107 else if (xcode == MEM)
6108 output_address (GET_MODE (x), x);
6109 else if (xcode == CONST_INT)
6110 fprintf (file, "%i", (int) INTVAL (x));
6111 else if (xcode == CONST_VECTOR)
6112 print_operand (file, CONST_VECTOR_ELT (x, 0), code);
6113 else if (xcode == CONST_DOUBLE)
6114 {
6115 const char *str;
6116 switch (gcn_inline_fp_constant_p (x, false))
6117 {
6118 case 240:
6119 str = "0.5";
6120 break;
6121 case 241:
6122 str = "-0.5";
6123 break;
6124 case 242:
6125 str = "1.0";
6126 break;
6127 case 243:
6128 str = "-1.0";
6129 break;
6130 case 244:
6131 str = "2.0";
6132 break;
6133 case 245:
6134 str = "-2.0";
6135 break;
6136 case 246:
6137 str = "4.0";
6138 break;
6139 case 247:
6140 str = "-4.0";
6141 break;
6142 case 248:
6143 str = "1/pi";
6144 break;
6145 default:
6146 rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
6147 ? DImode : SImode,
6148 x, GET_MODE (x), 0);
6149 if (x)
6150 print_operand (file, ix, code);
6151 else
6152 output_operand_lossage ("invalid fp constant");
6153 return;
6154 break;
6155 }
6156 fprintf (file, str);
6157 return;
6158 }
6159 else
6160 output_addr_const (file, x);
6161 return;
6162 case '^':
6163 if (TARGET_GCN5_PLUS)
6164 fputs ("_co", file);
6165 return;
6166 case 'g':
6167 gcc_assert (xcode == MEM);
6168 if (MEM_VOLATILE_P (x))
6169 fputs (" glc", file);
6170 return;
6171 default:
6172 output_operand_lossage ("invalid %%xn code");
6173 }
6174 gcc_unreachable ();
6175 }
6176
6177 /* Implement DBX_REGISTER_NUMBER macro.
6178
6179 Return the DWARF register number that corresponds to the GCC internal
6180 REGNO. */
6181
6182 unsigned int
6183 gcn_dwarf_register_number (unsigned int regno)
6184 {
6185 /* Registers defined in DWARF. */
6186 if (regno == EXEC_LO_REG)
6187 return 17;
6188 /* We need to use a more complex DWARF expression for this
6189 else if (regno == EXEC_HI_REG)
6190 return 17; */
6191 else if (regno == VCC_LO_REG)
6192 return 768;
6193 /* We need to use a more complex DWARF expression for this
6194 else if (regno == VCC_HI_REG)
6195 return 768; */
6196 else if (regno == SCC_REG)
6197 return 128;
6198 else if (SGPR_REGNO_P (regno))
6199 {
6200 if (regno - FIRST_SGPR_REG < 64)
6201 return (regno - FIRST_SGPR_REG + 32);
6202 else
6203 return (regno - FIRST_SGPR_REG + 1024);
6204 }
6205 else if (VGPR_REGNO_P (regno))
6206 return (regno - FIRST_VGPR_REG + 2560);
6207
6208 /* Otherwise, there's nothing sensible to do. */
6209 return regno + 100000;
6210 }
6211
6212 /* Implement TARGET_DWARF_REGISTER_SPAN.
6213
6214 DImode and Vector DImode require additional registers. */
6215
6216 static rtx
6217 gcn_dwarf_register_span (rtx rtl)
6218 {
6219 machine_mode mode = GET_MODE (rtl);
6220
6221 if (VECTOR_MODE_P (mode))
6222 mode = GET_MODE_INNER (mode);
6223
6224 if (GET_MODE_SIZE (mode) != 8)
6225 return NULL_RTX;
6226
6227 rtx p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
6228 unsigned regno = REGNO (rtl);
6229 XVECEXP (p, 0, 0) = gen_rtx_REG (SImode, regno);
6230 XVECEXP (p, 0, 1) = gen_rtx_REG (SImode, regno + 1);
6231
6232 return p;
6233 }
6234
6235 /* }}} */
6236 /* {{{ TARGET hook overrides. */
6237
6238 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
6239 #define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
6240 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
6241 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
6242 gcn_addr_space_legitimate_address_p
6243 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
6244 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
6245 #undef TARGET_ADDR_SPACE_POINTER_MODE
6246 #define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
6247 #undef TARGET_ADDR_SPACE_SUBSET_P
6248 #define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
6249 #undef TARGET_ADDR_SPACE_CONVERT
6250 #define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
6251 #undef TARGET_ARG_PARTIAL_BYTES
6252 #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
6253 #undef TARGET_ASM_ALIGNED_DI_OP
6254 #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
6255 #undef TARGET_ASM_FILE_START
6256 #define TARGET_ASM_FILE_START output_file_start
6257 #undef TARGET_ASM_FUNCTION_PROLOGUE
6258 #define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
6259 #undef TARGET_ASM_SELECT_SECTION
6260 #define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
6261 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6262 #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
6263 #undef TARGET_ATTRIBUTE_TABLE
6264 #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
6265 #undef TARGET_BUILTIN_DECL
6266 #define TARGET_BUILTIN_DECL gcn_builtin_decl
6267 #undef TARGET_CAN_CHANGE_MODE_CLASS
6268 #define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
6269 #undef TARGET_CAN_ELIMINATE
6270 #define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
6271 #undef TARGET_CANNOT_COPY_INSN_P
6272 #define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
6273 #undef TARGET_CLASS_LIKELY_SPILLED_P
6274 #define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
6275 #undef TARGET_CLASS_MAX_NREGS
6276 #define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
6277 #undef TARGET_CONDITIONAL_REGISTER_USAGE
6278 #define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
6279 #undef TARGET_CONSTANT_ALIGNMENT
6280 #define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
6281 #undef TARGET_DEBUG_UNWIND_INFO
6282 #define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
6283 #undef TARGET_DWARF_REGISTER_SPAN
6284 #define TARGET_DWARF_REGISTER_SPAN gcn_dwarf_register_span
6285 #undef TARGET_EMUTLS_VAR_INIT
6286 #define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
6287 #undef TARGET_EXPAND_BUILTIN
6288 #define TARGET_EXPAND_BUILTIN gcn_expand_builtin
6289 #undef TARGET_FUNCTION_ARG
6290 #undef TARGET_FUNCTION_ARG_ADVANCE
6291 #define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
6292 #define TARGET_FUNCTION_ARG gcn_function_arg
6293 #undef TARGET_FUNCTION_VALUE
6294 #define TARGET_FUNCTION_VALUE gcn_function_value
6295 #undef TARGET_FUNCTION_VALUE_REGNO_P
6296 #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
6297 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
6298 #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
6299 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
6300 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
6301 #undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD
6302 #define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
6303 gcn_goacc_adjust_propagation_record
6304 #undef TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
6305 #define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
6306 #undef TARGET_GOACC_FORK_JOIN
6307 #define TARGET_GOACC_FORK_JOIN gcn_fork_join
6308 #undef TARGET_GOACC_REDUCTION
6309 #define TARGET_GOACC_REDUCTION gcn_goacc_reduction
6310 #undef TARGET_GOACC_VALIDATE_DIMS
6311 #define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
6312 #undef TARGET_HARD_REGNO_MODE_OK
6313 #define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
6314 #undef TARGET_HARD_REGNO_NREGS
6315 #define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
6316 #undef TARGET_HAVE_SPECULATION_SAFE_VALUE
6317 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
6318 #undef TARGET_INIT_BUILTINS
6319 #define TARGET_INIT_BUILTINS gcn_init_builtins
6320 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
6321 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
6322 gcn_ira_change_pseudo_allocno_class
6323 #undef TARGET_LEGITIMATE_CONSTANT_P
6324 #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
6325 #undef TARGET_LRA_P
6326 #define TARGET_LRA_P hook_bool_void_true
6327 #undef TARGET_MACHINE_DEPENDENT_REORG
6328 #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
6329 #undef TARGET_MEMORY_MOVE_COST
6330 #define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
6331 #undef TARGET_MODES_TIEABLE_P
6332 #define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
6333 #undef TARGET_OPTION_OVERRIDE
6334 #define TARGET_OPTION_OVERRIDE gcn_option_override
6335 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
6336 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
6337 gcn_pretend_outgoing_varargs_named
6338 #undef TARGET_PROMOTE_FUNCTION_MODE
6339 #define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
6340 #undef TARGET_REGISTER_MOVE_COST
6341 #define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
6342 #undef TARGET_RETURN_IN_MEMORY
6343 #define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
6344 #undef TARGET_RTX_COSTS
6345 #define TARGET_RTX_COSTS gcn_rtx_costs
6346 #undef TARGET_SECONDARY_RELOAD
6347 #define TARGET_SECONDARY_RELOAD gcn_secondary_reload
6348 #undef TARGET_SECTION_TYPE_FLAGS
6349 #define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
6350 #undef TARGET_SCALAR_MODE_SUPPORTED_P
6351 #define TARGET_SCALAR_MODE_SUPPORTED_P gcn_scalar_mode_supported_p
6352 #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
6353 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
6354 gcn_small_register_classes_for_mode_p
6355 #undef TARGET_SPILL_CLASS
6356 #define TARGET_SPILL_CLASS gcn_spill_class
6357 #undef TARGET_STRICT_ARGUMENT_NAMING
6358 #define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
6359 #undef TARGET_TRAMPOLINE_INIT
6360 #define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
6361 #undef TARGET_TRULY_NOOP_TRUNCATION
6362 #define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
6363 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
6364 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
6365 #undef TARGET_VECTORIZE_GET_MASK_MODE
6366 #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
6367 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6368 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
6369 #undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
6370 #define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
6371 gcn_preferred_vector_alignment
6372 #undef TARGET_VECTORIZE_RELATED_MODE
6373 #define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
6374 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
6375 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
6376 gcn_vectorize_support_vector_misalignment
6377 #undef TARGET_VECTORIZE_VEC_PERM_CONST
6378 #define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
6379 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
6380 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
6381 gcn_vector_alignment_reachable
6382 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6383 #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
6384
6385 struct gcc_target targetm = TARGET_INITIALIZER;
6386
6387 #include "gt-gcn.h"
6388 /* }}} */