]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/gcn/gcn.c
Fix testsuite regression due to recent IRA changes.
[thirdparty/gcc.git] / gcc / config / gcn / gcn.c
CommitLineData
8d9254fc 1/* Copyright (C) 2016-2020 Free Software Foundation, Inc.
5326695a
AS
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17/* {{{ Includes. */
18
19/* We want GET_MODE_SIZE et al to return integers, please. */
20#define IN_TARGET_CODE 1
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "target.h"
27#include "memmodel.h"
28#include "rtl.h"
29#include "tree.h"
30#include "df.h"
31#include "tm_p.h"
32#include "stringpool.h"
33#include "optabs.h"
34#include "regs.h"
35#include "emit-rtl.h"
36#include "recog.h"
37#include "diagnostic-core.h"
38#include "insn-attr.h"
39#include "fold-const.h"
40#include "calls.h"
41#include "explow.h"
42#include "expr.h"
43#include "output.h"
44#include "cfgrtl.h"
45#include "langhooks.h"
46#include "builtins.h"
47#include "omp-general.h"
48#include "print-rtl.h"
49#include "attribs.h"
50#include "varasm.h"
51#include "intl.h"
52#include "rtl-iter.h"
53
54/* This file should be included last. */
55#include "target-def.h"
56
57/* }}} */
58/* {{{ Global variables. */
59
60/* Constants used by FP instructions. */
61
62static REAL_VALUE_TYPE dconst4, dconst1over2pi;
63static bool ext_gcn_constants_init = 0;
64
65/* Holds the ISA variant, derived from the command line parameters. */
66
67int gcn_isa = 3; /* Default to GCN3. */
68
69/* Reserve this much space for LDS (for propagating variables from
70 worker-single mode to worker-partitioned mode), per workgroup. Global
71 analysis could calculate an exact bound, but we don't do that yet.
72
86b0eb81 73 We want to permit full occupancy, so size accordingly. */
5326695a 74
86b0eb81
AS
75#define OMP_LDS_SIZE 0x600 /* 0x600 is 1/40 total, rounded down. */
76#define ACC_LDS_SIZE 32768 /* Half of the total should be fine. */
77#define OTHER_LDS_SIZE 65536 /* If in doubt, reserve all of it. */
78
79#define LDS_SIZE (flag_openacc ? ACC_LDS_SIZE \
80 : flag_openmp ? OMP_LDS_SIZE \
81 : OTHER_LDS_SIZE)
5326695a 82
87fdbe69
KCY
83/* The number of registers usable by normal non-kernel functions.
84 The SGPR count includes any special extra registers such as VCC. */
85
86#define MAX_NORMAL_SGPR_COUNT 64
87#define MAX_NORMAL_VGPR_COUNT 24
88
5326695a
AS
89/* }}} */
90/* {{{ Initialization and options. */
91
92/* Initialize machine_function. */
93
94static struct machine_function *
95gcn_init_machine_status (void)
96{
97 struct machine_function *f;
98
99 f = ggc_cleared_alloc<machine_function> ();
100
101 /* Set up LDS allocation for broadcasting for this function. */
102 f->lds_allocated = 32;
103 f->lds_allocs = hash_map<tree, int>::create_ggc (64);
104
105 /* And LDS temporary decls for worker reductions. */
106 vec_alloc (f->reduc_decls, 0);
107
108 if (TARGET_GCN3)
109 f->use_flat_addressing = true;
110
111 return f;
112}
113
114/* Implement TARGET_OPTION_OVERRIDE.
115
116 Override option settings where defaults are variable, or we have specific
117 needs to consider. */
118
119static void
120gcn_option_override (void)
121{
122 init_machine_status = gcn_init_machine_status;
123
124 /* The HSA runtime does not respect ELF load addresses, so force PIE. */
125 if (!flag_pie)
126 flag_pie = 2;
127 if (!flag_pic)
128 flag_pic = flag_pie;
129
130 gcn_isa = gcn_arch == PROCESSOR_VEGA ? 5 : 3;
131
132 /* The default stack size needs to be small for offload kernels because
133 there may be many, many threads. Also, a smaller stack gives a
134 measureable performance boost. But, a small stack is insufficient
135 for running the testsuite, so we use a larger default for the stand
136 alone case. */
137 if (stack_size_opt == -1)
138 {
139 if (flag_openacc || flag_openmp)
140 /* 512 bytes per work item = 32kB total. */
141 stack_size_opt = 512 * 64;
142 else
143 /* 1MB total. */
144 stack_size_opt = 1048576;
145 }
146}
147
148/* }}} */
149/* {{{ Attributes. */
150
151/* This table defines the arguments that are permitted in
152 __attribute__ ((amdgpu_hsa_kernel (...))).
153
154 The names and values correspond to the HSA metadata that is encoded
155 into the assembler file and binary. */
156
157static const struct gcn_kernel_arg_type
158{
159 const char *name;
160 const char *header_pseudo;
161 machine_mode mode;
162
163 /* This should be set to -1 or -2 for a dynamically allocated register
164 number. Use -1 if this argument contributes to the user_sgpr_count,
165 -2 otherwise. */
166 int fixed_regno;
167} gcn_kernel_arg_types[] = {
168 {"exec", NULL, DImode, EXEC_REG},
169#define PRIVATE_SEGMENT_BUFFER_ARG 1
170 {"private_segment_buffer",
171 "enable_sgpr_private_segment_buffer", TImode, -1},
172#define DISPATCH_PTR_ARG 2
173 {"dispatch_ptr", "enable_sgpr_dispatch_ptr", DImode, -1},
174#define QUEUE_PTR_ARG 3
175 {"queue_ptr", "enable_sgpr_queue_ptr", DImode, -1},
176#define KERNARG_SEGMENT_PTR_ARG 4
177 {"kernarg_segment_ptr", "enable_sgpr_kernarg_segment_ptr", DImode, -1},
178 {"dispatch_id", "enable_sgpr_dispatch_id", DImode, -1},
179#define FLAT_SCRATCH_INIT_ARG 6
180 {"flat_scratch_init", "enable_sgpr_flat_scratch_init", DImode, -1},
181#define FLAT_SCRATCH_SEGMENT_SIZE_ARG 7
182 {"private_segment_size", "enable_sgpr_private_segment_size", SImode, -1},
183 {"grid_workgroup_count_X",
184 "enable_sgpr_grid_workgroup_count_x", SImode, -1},
185 {"grid_workgroup_count_Y",
186 "enable_sgpr_grid_workgroup_count_y", SImode, -1},
187 {"grid_workgroup_count_Z",
188 "enable_sgpr_grid_workgroup_count_z", SImode, -1},
189#define WORKGROUP_ID_X_ARG 11
190 {"workgroup_id_X", "enable_sgpr_workgroup_id_x", SImode, -2},
191 {"workgroup_id_Y", "enable_sgpr_workgroup_id_y", SImode, -2},
192 {"workgroup_id_Z", "enable_sgpr_workgroup_id_z", SImode, -2},
193 {"workgroup_info", "enable_sgpr_workgroup_info", SImode, -1},
194#define PRIVATE_SEGMENT_WAVE_OFFSET_ARG 15
195 {"private_segment_wave_offset",
196 "enable_sgpr_private_segment_wave_byte_offset", SImode, -2},
197#define WORK_ITEM_ID_X_ARG 16
198 {"work_item_id_X", NULL, V64SImode, FIRST_VGPR_REG},
199#define WORK_ITEM_ID_Y_ARG 17
200 {"work_item_id_Y", NULL, V64SImode, FIRST_VGPR_REG + 1},
201#define WORK_ITEM_ID_Z_ARG 18
202 {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
203};
204
342f9464
KCY
205static const long default_requested_args
206 = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
207 | (1 << DISPATCH_PTR_ARG)
208 | (1 << QUEUE_PTR_ARG)
209 | (1 << KERNARG_SEGMENT_PTR_ARG)
210 | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
211 | (1 << WORKGROUP_ID_X_ARG)
212 | (1 << WORK_ITEM_ID_X_ARG)
213 | (1 << WORK_ITEM_ID_Y_ARG)
214 | (1 << WORK_ITEM_ID_Z_ARG);
215
5326695a
AS
216/* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
217 This function also sets the default values for some arguments.
218
219 Return true on success, with ARGS populated. */
220
221static bool
222gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
223 tree list)
224{
225 bool err = false;
342f9464 226 args->requested = default_requested_args;
5326695a
AS
227 args->nargs = 0;
228
229 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
230 args->reg[a] = -1;
231
232 for (; list; list = TREE_CHAIN (list))
233 {
234 const char *str;
235 if (TREE_CODE (TREE_VALUE (list)) != STRING_CST)
236 {
237 error ("amdgpu_hsa_kernel attribute requires string constant "
238 "arguments");
239 break;
240 }
241 str = TREE_STRING_POINTER (TREE_VALUE (list));
242 int a;
243 for (a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
244 {
245 if (!strcmp (str, gcn_kernel_arg_types[a].name))
246 break;
247 }
248 if (a == GCN_KERNEL_ARG_TYPES)
249 {
250 error ("unknown specifier %s in amdgpu_hsa_kernel attribute", str);
251 err = true;
252 break;
253 }
254 if (args->requested & (1 << a))
255 {
256 error ("duplicated parameter specifier %s in amdgpu_hsa_kernel "
257 "attribute", str);
258 err = true;
259 break;
260 }
261 args->requested |= (1 << a);
262 args->order[args->nargs++] = a;
263 }
5326695a
AS
264
265 /* Requesting WORK_ITEM_ID_Z_ARG implies requesting WORK_ITEM_ID_X_ARG and
266 WORK_ITEM_ID_Y_ARG. Similarly, requesting WORK_ITEM_ID_Y_ARG implies
267 requesting WORK_ITEM_ID_X_ARG. */
268 if (args->requested & (1 << WORK_ITEM_ID_Z_ARG))
269 args->requested |= (1 << WORK_ITEM_ID_Y_ARG);
270 if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
271 args->requested |= (1 << WORK_ITEM_ID_X_ARG);
272
5326695a
AS
273 int sgpr_regno = FIRST_SGPR_REG;
274 args->nsgprs = 0;
275 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
276 {
277 if (!(args->requested & (1 << a)))
278 continue;
279
280 if (gcn_kernel_arg_types[a].fixed_regno >= 0)
281 args->reg[a] = gcn_kernel_arg_types[a].fixed_regno;
282 else
283 {
284 int reg_count;
285
286 switch (gcn_kernel_arg_types[a].mode)
287 {
288 case E_SImode:
289 reg_count = 1;
290 break;
291 case E_DImode:
292 reg_count = 2;
293 break;
294 case E_TImode:
295 reg_count = 4;
296 break;
297 default:
298 gcc_unreachable ();
299 }
300 args->reg[a] = sgpr_regno;
301 sgpr_regno += reg_count;
302 if (gcn_kernel_arg_types[a].fixed_regno == -1)
303 args->nsgprs += reg_count;
304 }
305 }
306 if (sgpr_regno > FIRST_SGPR_REG + 16)
307 {
308 error ("too many arguments passed in sgpr registers");
309 }
310 return err;
311}
312
313/* Referenced by TARGET_ATTRIBUTE_TABLE.
314
315 Validates target specific attributes. */
316
317static tree
318gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
319 tree args, int, bool *no_add_attrs)
320{
7039cebf 321 if (!FUNC_OR_METHOD_TYPE_P (*node))
5326695a
AS
322 {
323 warning (OPT_Wattributes, "%qE attribute only applies to functions",
324 name);
325 *no_add_attrs = true;
326 return NULL_TREE;
327 }
328
329 /* Can combine regparm with all attributes but fastcall, and thiscall. */
330 if (is_attribute_p ("gcnhsa_kernel", name))
331 {
332 struct gcn_kernel_args kernelarg;
333
334 if (gcn_parse_amdgpu_hsa_kernel_attribute (&kernelarg, args))
335 *no_add_attrs = true;
336
337 return NULL_TREE;
338 }
339
340 return NULL_TREE;
341}
342
343/* Implement TARGET_ATTRIBUTE_TABLE.
344
345 Create target-specific __attribute__ types. */
346
347static const struct attribute_spec gcn_attribute_table[] = {
348 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
349 affects_type_identity } */
350 {"amdgpu_hsa_kernel", 0, GCN_KERNEL_ARG_TYPES, false, true,
351 true, true, gcn_handle_amdgpu_hsa_kernel_attribute, NULL},
352 /* End element. */
353 {NULL, 0, 0, false, false, false, false, NULL, NULL}
354};
355
356/* }}} */
357/* {{{ Registers and modes. */
358
359/* Implement TARGET_CLASS_MAX_NREGS.
360
361 Return the number of hard registers needed to hold a value of MODE in
362 a register of class RCLASS. */
363
364static unsigned char
365gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
366{
367 /* Scalar registers are 32bit, vector registers are in fact tuples of
368 64 lanes. */
369 if (rclass == VGPR_REGS)
370 {
371 if (vgpr_1reg_mode_p (mode))
372 return 1;
373 if (vgpr_2reg_mode_p (mode))
374 return 2;
375 /* TImode is used by DImode compare_and_swap. */
376 if (mode == TImode)
377 return 4;
378 }
379 else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
380 return 2;
381 return CEIL (GET_MODE_SIZE (mode), 4);
382}
383
384/* Implement TARGET_HARD_REGNO_NREGS.
385
386 Return the number of hard registers needed to hold a value of MODE in
387 REGNO. */
388
389unsigned int
390gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
391{
392 return gcn_class_max_nregs (REGNO_REG_CLASS (regno), mode);
393}
394
395/* Implement TARGET_HARD_REGNO_MODE_OK.
396
397 Return true if REGNO can hold value in MODE. */
398
399bool
400gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
401{
402 /* Treat a complex mode as if it were a scalar mode of the same overall
403 size for the purposes of allocating hard registers. */
404 if (COMPLEX_MODE_P (mode))
405 switch (mode)
406 {
407 case E_CQImode:
408 case E_CHImode:
409 mode = SImode;
410 break;
411 case E_CSImode:
412 mode = DImode;
413 break;
414 case E_CDImode:
415 mode = TImode;
416 break;
417 case E_HCmode:
418 mode = SFmode;
419 break;
420 case E_SCmode:
421 mode = DFmode;
422 break;
423 default:
424 /* Not supported. */
425 return false;
426 }
427
428 switch (regno)
429 {
430 case FLAT_SCRATCH_LO_REG:
431 case XNACK_MASK_LO_REG:
432 case TBA_LO_REG:
433 case TMA_LO_REG:
434 return (mode == SImode || mode == DImode);
435 case VCC_LO_REG:
436 case EXEC_LO_REG:
437 return (mode == BImode || mode == SImode || mode == DImode);
438 case M0_REG:
439 case FLAT_SCRATCH_HI_REG:
440 case XNACK_MASK_HI_REG:
441 case TBA_HI_REG:
442 case TMA_HI_REG:
443 return mode == SImode;
444 case VCC_HI_REG:
445 return false;
446 case EXEC_HI_REG:
447 return mode == SImode /*|| mode == V32BImode */ ;
448 case SCC_REG:
449 case VCCZ_REG:
450 case EXECZ_REG:
451 return mode == BImode;
452 }
453 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
454 return true;
455 if (SGPR_REGNO_P (regno))
456 /* We restrict double register values to aligned registers. */
457 return (sgpr_1reg_mode_p (mode)
458 || (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
459 || (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
460 if (VGPR_REGNO_P (regno))
3abfd4f3
AS
461 /* Vector instructions do not care about the alignment of register
462 pairs, but where there is no 64-bit instruction, many of the
463 define_split do not work if the input and output registers partially
464 overlap. We tried to fix this with early clobber and match
465 constraints, but it was bug prone, added complexity, and conflicts
466 with the 'U0' constraints on vec_merge.
467 Therefore, we restrict ourselved to aligned registers. */
468 return (vgpr_1reg_mode_p (mode)
469 || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
5326695a
AS
470 /* TImode is used by DImode compare_and_swap. */
471 || mode == TImode);
472 return false;
473}
474
475/* Implement REGNO_REG_CLASS via gcn.h.
476
477 Return smallest class containing REGNO. */
478
479enum reg_class
480gcn_regno_reg_class (int regno)
481{
482 switch (regno)
483 {
484 case SCC_REG:
485 return SCC_CONDITIONAL_REG;
9ecf84e6
KCY
486 case VCC_LO_REG:
487 case VCC_HI_REG:
488 return VCC_CONDITIONAL_REG;
5326695a
AS
489 case VCCZ_REG:
490 return VCCZ_CONDITIONAL_REG;
491 case EXECZ_REG:
492 return EXECZ_CONDITIONAL_REG;
493 case EXEC_LO_REG:
494 case EXEC_HI_REG:
495 return EXEC_MASK_REG;
496 }
497 if (VGPR_REGNO_P (regno))
498 return VGPR_REGS;
499 if (SGPR_REGNO_P (regno))
500 return SGPR_REGS;
501 if (regno < FIRST_VGPR_REG)
502 return GENERAL_REGS;
503 if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
504 return AFP_REGS;
505 return ALL_REGS;
506}
507
508/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
509
510 GCC assumes that lowpart contains first part of value as stored in memory.
511 This is not the case for vector registers. */
512
513bool
514gcn_can_change_mode_class (machine_mode from, machine_mode to,
515 reg_class_t regclass)
516{
517 if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
518 return true;
519 return (gcn_class_max_nregs (regclass, from)
520 == gcn_class_max_nregs (regclass, to));
521}
522
523/* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
524
525 When this hook returns true for MODE, the compiler allows
526 registers explicitly used in the rtl to be used as spill registers
527 but prevents the compiler from extending the lifetime of these
528 registers. */
529
530bool
531gcn_small_register_classes_for_mode_p (machine_mode mode)
532{
533 /* We allocate into exec and vcc regs. Those make small register class. */
534 return mode == DImode || mode == SImode;
535}
536
537/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
538
539 Returns true if pseudos that have been assigned to registers of class RCLASS
540 would likely be spilled because registers of RCLASS are needed for spill
541 registers. */
542
543static bool
544gcn_class_likely_spilled_p (reg_class_t rclass)
545{
546 return (rclass == EXEC_MASK_REG
547 || reg_classes_intersect_p (ALL_CONDITIONAL_REGS, rclass));
548}
549
550/* Implement TARGET_MODES_TIEABLE_P.
551
552 Returns true if a value of MODE1 is accessible in MODE2 without
553 copying. */
554
555bool
556gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
557{
558 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
559 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
560}
561
562/* Implement TARGET_TRULY_NOOP_TRUNCATION.
563
564 Returns true if it is safe to “convert” a value of INPREC bits to one of
565 OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
566 it as if it had only OUTPREC bits. */
567
568bool
569gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
570{
571 return ((inprec <= 32) && (outprec <= inprec));
572}
573
574/* Return N-th part of value occupying multiple registers. */
575
576rtx
577gcn_operand_part (machine_mode mode, rtx op, int n)
578{
579 if (GET_MODE_SIZE (mode) >= 256)
580 {
581 /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */
582
583 if (REG_P (op))
584 {
585 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
586 return gen_rtx_REG (V64SImode, REGNO (op) + n);
587 }
588 if (GET_CODE (op) == CONST_VECTOR)
589 {
590 int units = GET_MODE_NUNITS (mode);
591 rtvec v = rtvec_alloc (units);
592
593 for (int i = 0; i < units; ++i)
594 RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
595 CONST_VECTOR_ELT (op, i), n);
596
597 return gen_rtx_CONST_VECTOR (V64SImode, v);
598 }
599 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
600 return gcn_gen_undef (V64SImode);
601 gcc_unreachable ();
602 }
603 else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
604 {
605 gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
606 return gen_rtx_REG (SImode, REGNO (op) + n);
607 }
608 else
609 {
610 if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
611 return gcn_gen_undef (SImode);
612
613 /* If it's a constant then let's assume it is of the largest mode
614 available, otherwise simplify_gen_subreg will fail. */
615 if (mode == VOIDmode && CONST_INT_P (op))
616 mode = DImode;
617 return simplify_gen_subreg (SImode, op, mode, n * 4);
618 }
619}
620
621/* Return N-th part of value occupying multiple registers. */
622
623rtx
624gcn_operand_doublepart (machine_mode mode, rtx op, int n)
625{
626 return simplify_gen_subreg (DImode, op, mode, n * 8);
627}
628
629/* Return true if OP can be split into subregs or high/low parts.
630 This is always true for scalars, but not normally true for vectors.
631 However, for vectors in hardregs we can use the low and high registers. */
632
633bool
634gcn_can_split_p (machine_mode, rtx op)
635{
636 if (vgpr_vector_mode_p (GET_MODE (op)))
637 {
638 if (GET_CODE (op) == SUBREG)
639 op = SUBREG_REG (op);
640 if (!REG_P (op))
641 return true;
642 return REGNO (op) <= FIRST_PSEUDO_REGISTER;
643 }
644 return true;
645}
646
647/* Implement TARGET_SPILL_CLASS.
648
649 Return class of registers which could be used for pseudo of MODE
650 and of class RCLASS for spilling instead of memory. Return NO_REGS
651 if it is not possible or non-profitable. */
652
653static reg_class_t
654gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
655{
9ecf84e6
KCY
656 if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
657 || c == VCC_CONDITIONAL_REG)
5326695a
AS
658 return SGPR_REGS;
659 else
660 return NO_REGS;
661}
662
663/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
664
665 Change allocno class for given pseudo from allocno and best class
666 calculated by IRA. */
667
668static reg_class_t
669gcn_ira_change_pseudo_allocno_class (int regno, reg_class_t cl,
670 reg_class_t best_cl)
671{
672 /* Avoid returning classes that contain both vgpr and sgpr registers. */
673 if (cl != ALL_REGS && cl != SRCDST_REGS && cl != ALL_GPR_REGS)
674 return cl;
675 if (best_cl != ALL_REGS && best_cl != SRCDST_REGS
676 && best_cl != ALL_GPR_REGS)
677 return best_cl;
678
679 machine_mode mode = PSEUDO_REGNO_MODE (regno);
680 if (vgpr_vector_mode_p (mode))
681 return VGPR_REGS;
682
683 return GENERAL_REGS;
684}
685
686/* Create a new DImode pseudo reg and emit an instruction to initialize
687 it to VAL. */
688
689static rtx
690get_exec (int64_t val)
691{
692 rtx reg = gen_reg_rtx (DImode);
693 emit_insn (gen_rtx_SET (reg, gen_int_mode (val, DImode)));
694 return reg;
695}
696
697/* Return value of scalar exec register. */
698
699rtx
700gcn_scalar_exec ()
701{
702 return const1_rtx;
703}
704
705/* Return pseudo holding scalar exec register. */
706
707rtx
708gcn_scalar_exec_reg ()
709{
710 return get_exec (1);
711}
712
713/* Return value of full exec register. */
714
715rtx
716gcn_full_exec ()
717{
718 return constm1_rtx;
719}
720
721/* Return pseudo holding full exec register. */
722
723rtx
724gcn_full_exec_reg ()
725{
726 return get_exec (-1);
727}
728
729/* }}} */
730/* {{{ Immediate constants. */
731
732/* Initialize shared numeric constants. */
733
734static void
735init_ext_gcn_constants (void)
736{
737 real_from_integer (&dconst4, DFmode, 4, SIGNED);
738
739 /* FIXME: this constant probably does not match what hardware really loads.
740 Reality check it eventually. */
741 real_from_string (&dconst1over2pi,
742 "0.1591549430918953357663423455968866839");
743 real_convert (&dconst1over2pi, SFmode, &dconst1over2pi);
744
745 ext_gcn_constants_init = 1;
746}
747
748/* Return non-zero if X is a constant that can appear as an inline operand.
749 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
750 Or a vector of those.
751 The value returned should be the encoding of this constant. */
752
753int
754gcn_inline_fp_constant_p (rtx x, bool allow_vector)
755{
756 machine_mode mode = GET_MODE (x);
757
758 if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
759 && allow_vector)
760 {
761 int n;
762 if (GET_CODE (x) != CONST_VECTOR)
763 return 0;
764 n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
765 if (!n)
766 return 0;
767 for (int i = 1; i < 64; i++)
768 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
769 return 0;
770 return 1;
771 }
772
773 if (mode != HFmode && mode != SFmode && mode != DFmode)
774 return 0;
775
776 const REAL_VALUE_TYPE *r;
777
778 if (x == CONST0_RTX (mode))
779 return 128;
780 if (x == CONST1_RTX (mode))
781 return 242;
782
783 r = CONST_DOUBLE_REAL_VALUE (x);
784
785 if (real_identical (r, &dconstm1))
786 return 243;
787
788 if (real_identical (r, &dconsthalf))
789 return 240;
790 if (real_identical (r, &dconstm1))
791 return 243;
792 if (real_identical (r, &dconst2))
793 return 244;
794 if (real_identical (r, &dconst4))
795 return 246;
796 if (real_identical (r, &dconst1over2pi))
797 return 248;
798 if (!ext_gcn_constants_init)
799 init_ext_gcn_constants ();
800 real_value_negate (r);
801 if (real_identical (r, &dconsthalf))
802 return 241;
803 if (real_identical (r, &dconst2))
804 return 245;
805 if (real_identical (r, &dconst4))
806 return 247;
807
808 /* FIXME: add 4, -4 and 1/(2*PI). */
809
810 return 0;
811}
812
813/* Return non-zero if X is a constant that can appear as an immediate operand.
814 This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi)
815 Or a vector of those.
816 The value returned should be the encoding of this constant. */
817
818bool
819gcn_fp_constant_p (rtx x, bool allow_vector)
820{
821 machine_mode mode = GET_MODE (x);
822
823 if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
824 && allow_vector)
825 {
826 int n;
827 if (GET_CODE (x) != CONST_VECTOR)
828 return false;
829 n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
830 if (!n)
831 return false;
832 for (int i = 1; i < 64; i++)
833 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
834 return false;
835 return true;
836 }
837 if (mode != HFmode && mode != SFmode && mode != DFmode)
838 return false;
839
840 if (gcn_inline_fp_constant_p (x, false))
841 return true;
842 /* FIXME: It is not clear how 32bit immediates are interpreted here. */
843 return (mode != DFmode);
844}
845
846/* Return true if X is a constant representable as an inline immediate
847 constant in a 32-bit instruction encoding. */
848
849bool
850gcn_inline_constant_p (rtx x)
851{
852 if (GET_CODE (x) == CONST_INT)
5960de78 853 return INTVAL (x) >= -16 && INTVAL (x) <= 64;
5326695a
AS
854 if (GET_CODE (x) == CONST_DOUBLE)
855 return gcn_inline_fp_constant_p (x, false);
856 if (GET_CODE (x) == CONST_VECTOR)
857 {
858 int n;
859 if (!vgpr_vector_mode_p (GET_MODE (x)))
860 return false;
861 n = gcn_inline_constant_p (CONST_VECTOR_ELT (x, 0));
862 if (!n)
863 return false;
864 for (int i = 1; i < 64; i++)
865 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
866 return false;
867 return 1;
868 }
869 return false;
870}
871
872/* Return true if X is a constant representable as an immediate constant
873 in a 32 or 64-bit instruction encoding. */
874
875bool
876gcn_constant_p (rtx x)
877{
878 switch (GET_CODE (x))
879 {
880 case CONST_INT:
881 return true;
882
883 case CONST_DOUBLE:
884 return gcn_fp_constant_p (x, false);
885
886 case CONST_VECTOR:
887 {
888 int n;
889 if (!vgpr_vector_mode_p (GET_MODE (x)))
890 return false;
891 n = gcn_constant_p (CONST_VECTOR_ELT (x, 0));
892 if (!n)
893 return false;
894 for (int i = 1; i < 64; i++)
895 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
896 return false;
897 return true;
898 }
899
900 case SYMBOL_REF:
901 case LABEL_REF:
902 return true;
903
904 default:
905 ;
906 }
907
908 return false;
909}
910
911/* Return true if X is a constant representable as two inline immediate
912 constants in a 64-bit instruction that is split into two 32-bit
66b01cc3
AS
913 instructions.
914 When MIXED is set, the low-part is permitted to use the full 32-bits. */
5326695a
AS
915
916bool
66b01cc3 917gcn_inline_constant64_p (rtx x, bool mixed)
5326695a
AS
918{
919 if (GET_CODE (x) == CONST_VECTOR)
920 {
921 if (!vgpr_vector_mode_p (GET_MODE (x)))
922 return false;
66b01cc3 923 if (!gcn_inline_constant64_p (CONST_VECTOR_ELT (x, 0), mixed))
5326695a
AS
924 return false;
925 for (int i = 1; i < 64; i++)
926 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
927 return false;
928
929 return true;
930 }
931
932 if (GET_CODE (x) != CONST_INT)
933 return false;
934
935 rtx val_lo = gcn_operand_part (DImode, x, 0);
936 rtx val_hi = gcn_operand_part (DImode, x, 1);
66b01cc3
AS
937 return ((mixed || gcn_inline_constant_p (val_lo))
938 && gcn_inline_constant_p (val_hi));
5326695a
AS
939}
940
941/* Return true if X is a constant representable as an immediate constant
942 in a 32 or 64-bit instruction encoding where the hardware will
943 extend the immediate to 64-bits. */
944
945bool
946gcn_constant64_p (rtx x)
947{
948 if (!gcn_constant_p (x))
949 return false;
950
951 if (GET_CODE (x) != CONST_INT)
952 return true;
953
954 /* Negative numbers are only allowed if they can be encoded within src0,
955 because the 32-bit immediates do not get sign-extended.
956 Unsigned numbers must not be encodable as 32-bit -1..-16, because the
957 assembler will use a src0 inline immediate and that will get
958 sign-extended. */
959 HOST_WIDE_INT val = INTVAL (x);
960 return (((val & 0xffffffff) == val /* Positive 32-bit. */
961 && (val & 0xfffffff0) != 0xfffffff0) /* Not -1..-16. */
962 || gcn_inline_constant_p (x)); /* Src0. */
963}
964
965/* Implement TARGET_LEGITIMATE_CONSTANT_P.
966
967 Returns true if X is a legitimate constant for a MODE immediate operand. */
968
969bool
970gcn_legitimate_constant_p (machine_mode, rtx x)
971{
972 return gcn_constant_p (x);
973}
974
975/* Return true if X is a CONST_VECTOR of single constant. */
976
977static bool
978single_cst_vector_p (rtx x)
979{
980 if (GET_CODE (x) != CONST_VECTOR)
981 return false;
982 for (int i = 1; i < 64; i++)
983 if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
984 return false;
985 return true;
986}
987
988/* Create a CONST_VECTOR of duplicated value A. */
989
990rtx
991gcn_vec_constant (machine_mode mode, int a)
992{
993 /*if (!a)
994 return CONST0_RTX (mode);
995 if (a == -1)
996 return CONSTM1_RTX (mode);
997 if (a == 1)
998 return CONST1_RTX (mode);
999 if (a == 2)
1000 return CONST2_RTX (mode);*/
1001
1002 int units = GET_MODE_NUNITS (mode);
95607c12
AS
1003 machine_mode innermode = GET_MODE_INNER (mode);
1004
1005 rtx tem;
1006 if (FLOAT_MODE_P (innermode))
1007 {
1008 REAL_VALUE_TYPE rv;
1009 real_from_integer (&rv, NULL, a, SIGNED);
1010 tem = const_double_from_real_value (rv, innermode);
1011 }
1012 else
1013 tem = gen_int_mode (a, innermode);
5326695a 1014
95607c12 1015 rtvec v = rtvec_alloc (units);
5326695a
AS
1016 for (int i = 0; i < units; ++i)
1017 RTVEC_ELT (v, i) = tem;
1018
1019 return gen_rtx_CONST_VECTOR (mode, v);
1020}
1021
1022/* Create a CONST_VECTOR of duplicated value A. */
1023
1024rtx
1025gcn_vec_constant (machine_mode mode, rtx a)
1026{
1027 int units = GET_MODE_NUNITS (mode);
1028 rtvec v = rtvec_alloc (units);
1029
1030 for (int i = 0; i < units; ++i)
1031 RTVEC_ELT (v, i) = a;
1032
1033 return gen_rtx_CONST_VECTOR (mode, v);
1034}
1035
1036/* Create an undefined vector value, used where an insn operand is
1037 optional. */
1038
1039rtx
1040gcn_gen_undef (machine_mode mode)
1041{
1042 return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
1043}
1044
1045/* }}} */
1046/* {{{ Addresses, pointers and moves. */
1047
1048/* Return true is REG is a valid place to store a pointer,
1049 for instructions that require an SGPR.
1050 FIXME rename. */
1051
1052static bool
1053gcn_address_register_p (rtx reg, machine_mode mode, bool strict)
1054{
1055 if (GET_CODE (reg) == SUBREG)
1056 reg = SUBREG_REG (reg);
1057
1058 if (!REG_P (reg))
1059 return false;
1060
1061 if (GET_MODE (reg) != mode)
1062 return false;
1063
1064 int regno = REGNO (reg);
1065
1066 if (regno >= FIRST_PSEUDO_REGISTER)
1067 {
1068 if (!strict)
1069 return true;
1070
1071 if (!reg_renumber)
1072 return false;
1073
1074 regno = reg_renumber[regno];
1075 }
1076
1077 return (SGPR_REGNO_P (regno) || regno == M0_REG
1078 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1079}
1080
1081/* Return true is REG is a valid place to store a pointer,
1082 for instructions that require a VGPR. */
1083
1084static bool
1085gcn_vec_address_register_p (rtx reg, machine_mode mode, bool strict)
1086{
1087 if (GET_CODE (reg) == SUBREG)
1088 reg = SUBREG_REG (reg);
1089
1090 if (!REG_P (reg))
1091 return false;
1092
1093 if (GET_MODE (reg) != mode)
1094 return false;
1095
1096 int regno = REGNO (reg);
1097
1098 if (regno >= FIRST_PSEUDO_REGISTER)
1099 {
1100 if (!strict)
1101 return true;
1102
1103 if (!reg_renumber)
1104 return false;
1105
1106 regno = reg_renumber[regno];
1107 }
1108
1109 return VGPR_REGNO_P (regno);
1110}
1111
1112/* Return true if X would be valid inside a MEM using the Flat address
1113 space. */
1114
1115bool
1116gcn_flat_address_p (rtx x, machine_mode mode)
1117{
1118 bool vec_mode = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1119 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1120
1121 if (vec_mode && gcn_address_register_p (x, DImode, false))
1122 return true;
1123
1124 if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
1125 return true;
1126
1127 if (TARGET_GCN5_PLUS
1128 && GET_CODE (x) == PLUS
1129 && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
1130 && CONST_INT_P (XEXP (x, 1)))
1131 return true;
1132
1133 return false;
1134}
1135
1136/* Return true if X would be valid inside a MEM using the Scalar Flat
1137 address space. */
1138
1139bool
1140gcn_scalar_flat_address_p (rtx x)
1141{
1142 if (gcn_address_register_p (x, DImode, false))
1143 return true;
1144
1145 if (GET_CODE (x) == PLUS
1146 && gcn_address_register_p (XEXP (x, 0), DImode, false)
1147 && CONST_INT_P (XEXP (x, 1)))
1148 return true;
1149
1150 return false;
1151}
1152
1153/* Return true if MEM X would be valid for the Scalar Flat address space. */
1154
1155bool
1156gcn_scalar_flat_mem_p (rtx x)
1157{
1158 if (!MEM_P (x))
1159 return false;
1160
1161 if (GET_MODE_SIZE (GET_MODE (x)) < 4)
1162 return false;
1163
1164 return gcn_scalar_flat_address_p (XEXP (x, 0));
1165}
1166
1167/* Return true if X would be valid inside a MEM using the LDS or GDS
1168 address spaces. */
1169
1170bool
1171gcn_ds_address_p (rtx x)
1172{
1173 if (gcn_vec_address_register_p (x, SImode, false))
1174 return true;
1175
1176 if (GET_CODE (x) == PLUS
1177 && gcn_vec_address_register_p (XEXP (x, 0), SImode, false)
1178 && CONST_INT_P (XEXP (x, 1)))
1179 return true;
1180
1181 return false;
1182}
1183
1184/* Return true if ADDR would be valid inside a MEM using the Global
1185 address space. */
1186
1187bool
1188gcn_global_address_p (rtx addr)
1189{
1190 if (gcn_address_register_p (addr, DImode, false)
1191 || gcn_vec_address_register_p (addr, DImode, false))
1192 return true;
1193
1194 if (GET_CODE (addr) == PLUS)
1195 {
1196 rtx base = XEXP (addr, 0);
1197 rtx offset = XEXP (addr, 1);
1198 bool immediate_p = (CONST_INT_P (offset)
1199 && INTVAL (offset) >= -(1 << 12)
1200 && INTVAL (offset) < (1 << 12));
1201
1202 if ((gcn_address_register_p (base, DImode, false)
1203 || gcn_vec_address_register_p (base, DImode, false))
1204 && immediate_p)
1205 /* SGPR + CONST or VGPR + CONST */
1206 return true;
1207
1208 if (gcn_address_register_p (base, DImode, false)
1209 && gcn_vgpr_register_operand (offset, SImode))
1210 /* SPGR + VGPR */
1211 return true;
1212
1213 if (GET_CODE (base) == PLUS
1214 && gcn_address_register_p (XEXP (base, 0), DImode, false)
1215 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1216 && immediate_p)
1217 /* (SGPR + VGPR) + CONST */
1218 return true;
1219 }
1220
1221 return false;
1222}
1223
1224/* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
1225
1226 Recognizes RTL expressions that are valid memory addresses for an
1227 instruction. The MODE argument is the machine mode for the MEM
1228 expression that wants to use this address.
1229
1230 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
1231 convert common non-canonical forms to canonical form so that they will
1232 be recognized. */
1233
1234static bool
1235gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
1236 addr_space_t as)
1237{
1238 /* All vector instructions need to work on addresses in registers. */
1239 if (!TARGET_GCN5_PLUS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
1240 return false;
1241
1242 if (AS_SCALAR_FLAT_P (as))
1243 {
1244 if (mode == QImode || mode == HImode)
1245 return 0;
1246
1247 switch (GET_CODE (x))
1248 {
1249 case REG:
1250 return gcn_address_register_p (x, DImode, strict);
1251 /* Addresses are in the form BASE+OFFSET
1252 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1253 Writes and atomics do not accept SGPR. */
1254 case PLUS:
1255 {
1256 rtx x0 = XEXP (x, 0);
1257 rtx x1 = XEXP (x, 1);
1258 if (!gcn_address_register_p (x0, DImode, strict))
1259 return false;
1260 /* FIXME: This is disabled because of the mode mismatch between
1261 SImode (for the address or m0 register) and the DImode PLUS.
1262 We'll need a zero_extend or similar.
1263
1264 if (gcn_m0_register_p (x1, SImode, strict)
1265 || gcn_address_register_p (x1, SImode, strict))
1266 return true;
1267 else*/
1268 if (GET_CODE (x1) == CONST_INT)
1269 {
1270 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20)
1271 /* The low bits of the offset are ignored, even when
1272 they're meant to realign the pointer. */
1273 && !(INTVAL (x1) & 0x3))
1274 return true;
1275 }
1276 return false;
1277 }
1278
1279 default:
1280 break;
1281 }
1282 }
1283 else if (AS_SCRATCH_P (as))
1284 return gcn_address_register_p (x, SImode, strict);
1285 else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
1286 {
1287 if (TARGET_GCN3 || GET_CODE (x) == REG)
1288 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1289 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1290 ? gcn_address_register_p (x, DImode, strict)
1291 : gcn_vec_address_register_p (x, DImode, strict));
1292 else
1293 {
1294 gcc_assert (TARGET_GCN5_PLUS);
1295
1296 if (GET_CODE (x) == PLUS)
1297 {
1298 rtx x1 = XEXP (x, 1);
1299
1300 if (VECTOR_MODE_P (mode)
1301 ? !gcn_address_register_p (x, DImode, strict)
1302 : !gcn_vec_address_register_p (x, DImode, strict))
1303 return false;
1304
1305 if (GET_CODE (x1) == CONST_INT)
1306 {
1307 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 12)
1308 /* The low bits of the offset are ignored, even when
1309 they're meant to realign the pointer. */
1310 && !(INTVAL (x1) & 0x3))
1311 return true;
1312 }
1313 }
1314 return false;
1315 }
1316 }
1317 else if (AS_GLOBAL_P (as))
1318 {
1319 gcc_assert (TARGET_GCN5_PLUS);
1320
1321 if (GET_CODE (x) == REG)
1322 return (gcn_address_register_p (x, DImode, strict)
1323 || (!VECTOR_MODE_P (mode)
1324 && gcn_vec_address_register_p (x, DImode, strict)));
1325 else if (GET_CODE (x) == PLUS)
1326 {
1327 rtx base = XEXP (x, 0);
1328 rtx offset = XEXP (x, 1);
1329
1330 bool immediate_p = (GET_CODE (offset) == CONST_INT
1331 /* Signed 13-bit immediate. */
1332 && INTVAL (offset) >= -(1 << 12)
1333 && INTVAL (offset) < (1 << 12)
1334 /* The low bits of the offset are ignored, even
1335 when they're meant to realign the pointer. */
1336 && !(INTVAL (offset) & 0x3));
1337
1338 if (!VECTOR_MODE_P (mode))
1339 {
1340 if ((gcn_address_register_p (base, DImode, strict)
1341 || gcn_vec_address_register_p (base, DImode, strict))
1342 && immediate_p)
1343 /* SGPR + CONST or VGPR + CONST */
1344 return true;
1345
1346 if (gcn_address_register_p (base, DImode, strict)
1347 && gcn_vgpr_register_operand (offset, SImode))
1348 /* SGPR + VGPR */
1349 return true;
1350
1351 if (GET_CODE (base) == PLUS
1352 && gcn_address_register_p (XEXP (base, 0), DImode, strict)
1353 && gcn_vgpr_register_operand (XEXP (base, 1), SImode)
1354 && immediate_p)
1355 /* (SGPR + VGPR) + CONST */
1356 return true;
1357 }
1358 else
1359 {
1360 if (gcn_address_register_p (base, DImode, strict)
1361 && immediate_p)
1362 /* SGPR + CONST */
1363 return true;
1364 }
1365 }
1366 else
1367 return false;
1368 }
1369 else if (AS_ANY_DS_P (as))
1370 switch (GET_CODE (x))
1371 {
1372 case REG:
1373 return (VECTOR_MODE_P (mode)
1374 ? gcn_address_register_p (x, SImode, strict)
1375 : gcn_vec_address_register_p (x, SImode, strict));
1376 /* Addresses are in the form BASE+OFFSET
1377 OFFSET is either 20bit unsigned immediate, SGPR or M0.
1378 Writes and atomics do not accept SGPR. */
1379 case PLUS:
1380 {
1381 rtx x0 = XEXP (x, 0);
1382 rtx x1 = XEXP (x, 1);
1383 if (!gcn_vec_address_register_p (x0, DImode, strict))
1384 return false;
1385 if (GET_CODE (x1) == REG)
1386 {
1387 if (GET_CODE (x1) != REG
1388 || (REGNO (x1) <= FIRST_PSEUDO_REGISTER
1389 && !gcn_ssrc_register_operand (x1, DImode)))
1390 return false;
1391 }
1392 else if (GET_CODE (x1) == CONST_VECTOR
1393 && GET_CODE (CONST_VECTOR_ELT (x1, 0)) == CONST_INT
1394 && single_cst_vector_p (x1))
1395 {
1396 x1 = CONST_VECTOR_ELT (x1, 0);
1397 if (INTVAL (x1) >= 0 && INTVAL (x1) < (1 << 20))
1398 return true;
1399 }
1400 return false;
1401 }
1402
1403 default:
1404 break;
1405 }
1406 else
1407 gcc_unreachable ();
1408 return false;
1409}
1410
1411/* Implement TARGET_ADDR_SPACE_POINTER_MODE.
1412
1413 Return the appropriate mode for a named address pointer. */
1414
1415static scalar_int_mode
1416gcn_addr_space_pointer_mode (addr_space_t addrspace)
1417{
1418 switch (addrspace)
1419 {
1420 case ADDR_SPACE_SCRATCH:
1421 case ADDR_SPACE_LDS:
1422 case ADDR_SPACE_GDS:
1423 return SImode;
1424 case ADDR_SPACE_DEFAULT:
1425 case ADDR_SPACE_FLAT:
1426 case ADDR_SPACE_FLAT_SCRATCH:
1427 case ADDR_SPACE_SCALAR_FLAT:
1428 return DImode;
1429 default:
1430 gcc_unreachable ();
1431 }
1432}
1433
1434/* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
1435
1436 Return the appropriate mode for a named address space address. */
1437
1438static scalar_int_mode
1439gcn_addr_space_address_mode (addr_space_t addrspace)
1440{
1441 return gcn_addr_space_pointer_mode (addrspace);
1442}
1443
1444/* Implement TARGET_ADDR_SPACE_SUBSET_P.
1445
1446 Determine if one named address space is a subset of another. */
1447
1448static bool
1449gcn_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
1450{
1451 if (subset == superset)
1452 return true;
1453 /* FIXME is this true? */
1454 if (AS_FLAT_P (superset) || AS_SCALAR_FLAT_P (superset))
1455 return true;
1456 return false;
1457}
1458
1459/* Convert from one address space to another. */
1460
1461static rtx
1462gcn_addr_space_convert (rtx op, tree from_type, tree to_type)
1463{
1464 gcc_assert (POINTER_TYPE_P (from_type));
1465 gcc_assert (POINTER_TYPE_P (to_type));
1466
1467 addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
1468 addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
1469
1470 if (AS_LDS_P (as_from) && AS_FLAT_P (as_to))
1471 {
1472 rtx queue = gen_rtx_REG (DImode,
1473 cfun->machine->args.reg[QUEUE_PTR_ARG]);
1474 rtx group_seg_aperture_hi = gen_rtx_MEM (SImode,
1475 gen_rtx_PLUS (DImode, queue,
1476 gen_int_mode (64, SImode)));
1477 rtx tmp = gen_reg_rtx (DImode);
1478
1479 emit_move_insn (gen_lowpart (SImode, tmp), op);
1480 emit_move_insn (gen_highpart_mode (SImode, DImode, tmp),
1481 group_seg_aperture_hi);
1482
1483 return tmp;
1484 }
1485 else if (as_from == as_to)
1486 return op;
1487 else
1488 gcc_unreachable ();
1489}
1490
1491
1492/* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
1493
1494 Retun true if REGNO is OK for memory adressing. */
1495
1496bool
1497gcn_regno_mode_code_ok_for_base_p (int regno,
1498 machine_mode, addr_space_t as, int, int)
1499{
1500 if (regno >= FIRST_PSEUDO_REGISTER)
1501 {
1502 if (reg_renumber)
1503 regno = reg_renumber[regno];
1504 else
1505 return true;
1506 }
1507 if (AS_FLAT_P (as))
1508 return (VGPR_REGNO_P (regno)
1509 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1510 else if (AS_SCALAR_FLAT_P (as))
1511 return (SGPR_REGNO_P (regno)
1512 || regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM);
1513 else if (AS_GLOBAL_P (as))
1514 {
1515 return (SGPR_REGNO_P (regno)
1516 || VGPR_REGNO_P (regno)
1517 || regno == ARG_POINTER_REGNUM
1518 || regno == FRAME_POINTER_REGNUM);
1519 }
1520 else
1521 /* For now. */
1522 return false;
1523}
1524
1525/* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
1526
1527 Return a suitable register class for memory addressing. */
1528
1529reg_class
1530gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
1531 int ic)
1532{
1533 switch (as)
1534 {
1535 case ADDR_SPACE_DEFAULT:
1536 return gcn_mode_code_base_reg_class (mode, DEFAULT_ADDR_SPACE, oc, ic);
1537 case ADDR_SPACE_SCALAR_FLAT:
1538 case ADDR_SPACE_SCRATCH:
1539 return SGPR_REGS;
1540 break;
1541 case ADDR_SPACE_FLAT:
1542 case ADDR_SPACE_FLAT_SCRATCH:
1543 case ADDR_SPACE_LDS:
1544 case ADDR_SPACE_GDS:
1545 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1546 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1547 ? SGPR_REGS : VGPR_REGS);
1548 case ADDR_SPACE_GLOBAL:
1549 return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1550 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
1551 ? SGPR_REGS : ALL_GPR_REGS);
1552 }
1553 gcc_unreachable ();
1554}
1555
1556/* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
1557
1558 Return true if REGNO is OK for index of memory addressing. */
1559
1560bool
1561regno_ok_for_index_p (int regno)
1562{
1563 if (regno >= FIRST_PSEUDO_REGISTER)
1564 {
1565 if (reg_renumber)
1566 regno = reg_renumber[regno];
1567 else
1568 return true;
1569 }
1570 return regno == M0_REG || VGPR_REGNO_P (regno);
1571}
1572
1573/* Generate move which uses the exec flags. If EXEC is NULL, then it is
1574 assumed that all lanes normally relevant to the mode of the move are
1575 affected. If PREV is NULL, then a sensible default is supplied for
1576 the inactive lanes. */
1577
1578static rtx
1579gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
1580{
1581 machine_mode mode = GET_MODE (op0);
1582
1583 if (vgpr_vector_mode_p (mode))
1584 {
1585 if (exec && exec != CONSTM1_RTX (DImode))
1586 {
1587 if (!prev)
1588 prev = op0;
1589 }
1590 else
1591 {
1592 if (!prev)
1593 prev = gcn_gen_undef (mode);
1594 exec = gcn_full_exec_reg ();
1595 }
1596
1597 rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
1598
1599 return gen_rtx_PARALLEL (VOIDmode,
1600 gen_rtvec (2, set,
1601 gen_rtx_CLOBBER (VOIDmode,
1602 gen_rtx_SCRATCH (V64DImode))));
1603 }
1604
1605 return (gen_rtx_PARALLEL
1606 (VOIDmode,
1607 gen_rtvec (2, gen_rtx_SET (op0, op1),
1608 gen_rtx_USE (VOIDmode,
1609 exec ? exec : gcn_scalar_exec ()))));
1610}
1611
1612/* Generate masked move. */
1613
1614static rtx
1615gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
1616{
1617 if (exec)
1618 return (gen_rtx_SET (op0,
1619 gen_rtx_VEC_MERGE (GET_MODE (op0),
1620 gen_rtx_VEC_DUPLICATE (GET_MODE
1621 (op0), op1),
1622 op2, exec)));
1623 else
1624 return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
1625}
1626
1627/* Expand vector init of OP0 by VEC.
1628 Implements vec_init instruction pattern. */
1629
1630void
1631gcn_expand_vector_init (rtx op0, rtx vec)
1632{
1633 int64_t initialized_mask = 0;
1634 int64_t curr_mask = 1;
1635 machine_mode mode = GET_MODE (op0);
1636
1637 rtx val = XVECEXP (vec, 0, 0);
1638
1639 for (int i = 1; i < 64; i++)
1640 if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
1641 curr_mask |= (int64_t) 1 << i;
1642
1643 if (gcn_constant_p (val))
1644 emit_move_insn (op0, gcn_vec_constant (mode, val));
1645 else
1646 {
1647 val = force_reg (GET_MODE_INNER (mode), val);
1648 emit_insn (gen_duplicate_load (op0, val));
1649 }
1650 initialized_mask |= curr_mask;
1651 for (int i = 1; i < 64; i++)
1652 if (!(initialized_mask & ((int64_t) 1 << i)))
1653 {
1654 curr_mask = (int64_t) 1 << i;
1655 rtx val = XVECEXP (vec, 0, i);
1656
1657 for (int j = i + 1; j < 64; j++)
1658 if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
1659 curr_mask |= (int64_t) 1 << j;
1660 if (gcn_constant_p (val))
1661 emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
1662 get_exec (curr_mask)));
1663 else
1664 {
1665 val = force_reg (GET_MODE_INNER (mode), val);
1666 emit_insn (gen_duplicate_load (op0, val, op0,
1667 get_exec (curr_mask)));
1668 }
1669 initialized_mask |= curr_mask;
1670 }
1671}
1672
1673/* Load vector constant where n-th lane contains BASE+n*VAL. */
1674
1675static rtx
1676strided_constant (machine_mode mode, int base, int val)
1677{
1678 rtx x = gen_reg_rtx (mode);
1679 emit_move_insn (x, gcn_vec_constant (mode, base));
1680 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
1681 x, get_exec (0xffffffff00000000)));
1682 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
1683 x, get_exec (0xffff0000ffff0000)));
1684 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
1685 x, get_exec (0xff00ff00ff00ff00)));
1686 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
1687 x, get_exec (0xf0f0f0f0f0f0f0f0)));
1688 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
1689 x, get_exec (0xcccccccccccccccc)));
1690 emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
1691 x, get_exec (0xaaaaaaaaaaaaaaaa)));
1692 return x;
1693}
1694
1695/* Implement TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS. */
1696
1697static rtx
1698gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
1699 addr_space_t as)
1700{
1701 switch (as)
1702 {
1703 case ADDR_SPACE_DEFAULT:
1704 return gcn_addr_space_legitimize_address (x, old, mode,
1705 DEFAULT_ADDR_SPACE);
1706 case ADDR_SPACE_SCALAR_FLAT:
1707 case ADDR_SPACE_SCRATCH:
1708 /* Instructions working on vectors need the address to be in
1709 a register. */
1710 if (vgpr_vector_mode_p (mode))
1711 return force_reg (GET_MODE (x), x);
1712
1713 return x;
1714 case ADDR_SPACE_FLAT:
1715 case ADDR_SPACE_FLAT_SCRATCH:
1716 case ADDR_SPACE_GLOBAL:
1717 return TARGET_GCN3 ? force_reg (DImode, x) : x;
1718 case ADDR_SPACE_LDS:
1719 case ADDR_SPACE_GDS:
1720 /* FIXME: LDS support offsets, handle them!. */
1721 if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
1722 {
1723 rtx addrs = gen_reg_rtx (V64SImode);
1724 rtx base = force_reg (SImode, x);
1725 rtx offsets = strided_constant (V64SImode, 0,
1726 GET_MODE_UNIT_SIZE (mode));
1727
1728 emit_insn (gen_vec_duplicatev64si (addrs, base));
1729 emit_insn (gen_addv64si3 (addrs, offsets, addrs));
1730 return addrs;
1731 }
1732 return x;
1733 }
1734 gcc_unreachable ();
1735}
1736
1737/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
1738 proper vector of stepped addresses.
1739
1740 MEM will be a DImode address of a vector in an SGPR.
1741 TMP will be a V64DImode VGPR pair or (scratch:V64DI). */
1742
1743rtx
1744gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
1745 rtx tmp)
1746{
1747 gcc_assert (MEM_P (mem));
1748 rtx mem_base = XEXP (mem, 0);
1749 rtx mem_index = NULL_RTX;
1750
1751 if (!TARGET_GCN5_PLUS)
1752 {
1753 /* gcn_addr_space_legitimize_address should have put the address in a
1754 register. If not, it is too late to do anything about it. */
1755 gcc_assert (REG_P (mem_base));
1756 }
1757
1758 if (GET_CODE (mem_base) == PLUS)
1759 {
1760 mem_index = XEXP (mem_base, 1);
1761 mem_base = XEXP (mem_base, 0);
1762 }
1763
1764 /* RF and RM base registers for vector modes should be always an SGPR. */
1765 gcc_assert (SGPR_REGNO_P (REGNO (mem_base))
1766 || REGNO (mem_base) >= FIRST_PSEUDO_REGISTER);
1767
1768 machine_mode inner = GET_MODE_INNER (mode);
1769 int shift = exact_log2 (GET_MODE_SIZE (inner));
1770 rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
1771 rtx undef_v64si = gcn_gen_undef (V64SImode);
1772 rtx new_base = NULL_RTX;
1773 addr_space_t as = MEM_ADDR_SPACE (mem);
1774
1775 rtx tmplo = (REG_P (tmp)
1776 ? gcn_operand_part (V64DImode, tmp, 0)
1777 : gen_reg_rtx (V64SImode));
1778
1779 /* tmplo[:] = ramp[:] << shift */
1780 if (exec)
1781 emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
1782 gen_int_mode (shift, SImode),
1783 undef_v64si, exec));
1784 else
1785 emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
1786
1787 if (AS_FLAT_P (as))
1788 {
1789 if (REG_P (tmp))
1790 {
1791 rtx vcc = gen_rtx_REG (DImode, CC_SAVE_REG);
1792 rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
1793 rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
1794 rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
1795
1796 /* tmphi[:] = mem_base_hi */
1797 if (exec)
1798 emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
1799 undef_v64si, exec));
1800 else
1801 emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
1802
1803 /* tmp[:] += zext (mem_base) */
1804 if (exec)
1805 {
5326695a
AS
1806 emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
1807 vcc, undef_v64si, exec));
1808 emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
1809 vcc, vcc, undef_v64si, exec));
1810 }
1811 else
1812 emit_insn (gen_addv64di3_zext_dup (tmp, mem_base_lo, tmp));
1813 }
1814 else
1815 {
1816 tmp = gen_reg_rtx (V64DImode);
1817 if (exec)
1818 emit_insn (gen_addv64di3_zext_dup2_exec (tmp, tmplo, mem_base,
1819 gcn_gen_undef (V64DImode),
1820 exec));
1821 else
1822 emit_insn (gen_addv64di3_zext_dup2 (tmp, tmplo, mem_base));
1823 }
1824
1825 new_base = tmp;
1826 }
1827 else if (AS_ANY_DS_P (as))
1828 {
1829 if (!exec)
1830 emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
1831 else
1832 emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
1833 gcn_gen_undef (V64SImode), exec));
1834 new_base = tmplo;
1835 }
1836 else
1837 {
1838 mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
1839 new_base = gen_rtx_PLUS (V64DImode, mem_base,
1840 gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
1841 }
1842
1843 return gen_rtx_PLUS (GET_MODE (new_base), new_base,
1844 gen_rtx_VEC_DUPLICATE (GET_MODE (new_base),
1845 (mem_index ? mem_index
1846 : const0_rtx)));
1847}
1848
1849/* Convert a BASE address, a vector of OFFSETS, and a SCALE, to addresses
1850 suitable for the given address space. This is indented for use in
1851 gather/scatter patterns.
1852
1853 The offsets may be signed or unsigned, according to UNSIGNED_P.
1854 If EXEC is set then _exec patterns will be used, otherwise plain.
1855
1856 Return values.
1857 ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses.
1858 ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */
1859
1860rtx
1861gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
1862 bool unsigned_p, rtx exec)
1863{
5326695a
AS
1864 rtx tmpsi = gen_reg_rtx (V64SImode);
1865 rtx tmpdi = gen_reg_rtx (V64DImode);
1866 rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
1867 rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
1868
1869 if (CONST_INT_P (scale)
1870 && INTVAL (scale) > 0
1871 && exact_log2 (INTVAL (scale)) >= 0)
1872 emit_insn (gen_ashlv64si3 (tmpsi, offsets,
1873 GEN_INT (exact_log2 (INTVAL (scale)))));
1874 else
1875 (exec
1876 ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
1877 exec))
1878 : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
1879
1880 /* "Global" instructions do not support negative register offsets. */
1881 if (as == ADDR_SPACE_FLAT || !unsigned_p)
1882 {
1883 if (unsigned_p)
1884 (exec
1885 ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
1886 undefdi, exec))
1887 : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
1888 else
1889 (exec
1890 ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
1891 undefdi, exec))
1892 : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
1893 return tmpdi;
1894 }
1895 else if (as == ADDR_SPACE_GLOBAL)
1896 return tmpsi;
1897
1898 gcc_unreachable ();
1899}
1900
1901/* Return true if move from OP0 to OP1 is known to be executed in vector
1902 unit. */
1903
1904bool
1905gcn_vgpr_move_p (rtx op0, rtx op1)
1906{
1907 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1908 return true;
1909 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1910 return true;
1911 return ((REG_P (op0) && VGPR_REGNO_P (REGNO (op0)))
1912 || (REG_P (op1) && VGPR_REGNO_P (REGNO (op1)))
1913 || vgpr_vector_mode_p (GET_MODE (op0)));
1914}
1915
1916/* Return true if move from OP0 to OP1 is known to be executed in scalar
1917 unit. Used in the machine description. */
1918
1919bool
1920gcn_sgpr_move_p (rtx op0, rtx op1)
1921{
1922 if (MEM_P (op0) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op0)))
1923 return true;
1924 if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
1925 return true;
1926 if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
1927 || VGPR_REGNO_P (REGNO (op0)))
1928 return false;
1929 if (REG_P (op1)
1930 && REGNO (op1) < FIRST_PSEUDO_REGISTER
1931 && !VGPR_REGNO_P (REGNO (op1)))
1932 return true;
1933 return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
1934}
1935
1936/* Implement TARGET_SECONDARY_RELOAD.
1937
1938 The address space determines which registers can be used for loads and
1939 stores. */
1940
1941static reg_class_t
1942gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
1943 machine_mode reload_mode, secondary_reload_info *sri)
1944{
1945 reg_class_t result = NO_REGS;
1946 bool spilled_pseudo =
1947 (REG_P (x) || GET_CODE (x) == SUBREG) && true_regnum (x) == -1;
1948
1949 if (dump_file && (dump_flags & TDF_DETAILS))
1950 {
1951 fprintf (dump_file, "gcn_secondary_reload: ");
1952 dump_value_slim (dump_file, x, 1);
1953 fprintf (dump_file, " %s %s:%s", (in_p ? "->" : "<-"),
1954 reg_class_names[rclass], GET_MODE_NAME (reload_mode));
1955 if (REG_P (x) || GET_CODE (x) == SUBREG)
1956 fprintf (dump_file, " (true regnum: %d \"%s\")", true_regnum (x),
1957 (true_regnum (x) >= 0
1958 && true_regnum (x) < FIRST_PSEUDO_REGISTER
1959 ? reg_names[true_regnum (x)]
1960 : (spilled_pseudo ? "stack spill" : "??")));
1961 fprintf (dump_file, "\n");
1962 }
1963
1964 /* Some callers don't use or initialize icode. */
1965 sri->icode = CODE_FOR_nothing;
1966
1967 if (MEM_P (x) || spilled_pseudo)
1968 {
1969 addr_space_t as = DEFAULT_ADDR_SPACE;
1970
1971 /* If we have a spilled pseudo, we can't find the address space
1972 directly, but we know it's in ADDR_SPACE_FLAT space for GCN3 or
1973 ADDR_SPACE_GLOBAL for GCN5. */
1974 if (MEM_P (x))
1975 as = MEM_ADDR_SPACE (x);
1976
1977 if (as == ADDR_SPACE_DEFAULT)
1978 as = DEFAULT_ADDR_SPACE;
1979
1980 switch (as)
1981 {
1982 case ADDR_SPACE_SCALAR_FLAT:
1983 result =
1984 ((!MEM_P (x) || rclass == SGPR_REGS) ? NO_REGS : SGPR_REGS);
1985 break;
1986 case ADDR_SPACE_FLAT:
1987 case ADDR_SPACE_FLAT_SCRATCH:
1988 case ADDR_SPACE_GLOBAL:
1989 if (GET_MODE_CLASS (reload_mode) == MODE_VECTOR_INT
1990 || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
1991 {
1992 if (in_p)
1993 switch (reload_mode)
1994 {
1995 case E_V64SImode:
1996 sri->icode = CODE_FOR_reload_inv64si;
1997 break;
1998 case E_V64SFmode:
1999 sri->icode = CODE_FOR_reload_inv64sf;
2000 break;
2001 case E_V64HImode:
2002 sri->icode = CODE_FOR_reload_inv64hi;
2003 break;
2004 case E_V64HFmode:
2005 sri->icode = CODE_FOR_reload_inv64hf;
2006 break;
2007 case E_V64QImode:
2008 sri->icode = CODE_FOR_reload_inv64qi;
2009 break;
2010 case E_V64DImode:
2011 sri->icode = CODE_FOR_reload_inv64di;
2012 break;
2013 case E_V64DFmode:
2014 sri->icode = CODE_FOR_reload_inv64df;
2015 break;
2016 default:
2017 gcc_unreachable ();
2018 }
2019 else
2020 switch (reload_mode)
2021 {
2022 case E_V64SImode:
2023 sri->icode = CODE_FOR_reload_outv64si;
2024 break;
2025 case E_V64SFmode:
2026 sri->icode = CODE_FOR_reload_outv64sf;
2027 break;
2028 case E_V64HImode:
2029 sri->icode = CODE_FOR_reload_outv64hi;
2030 break;
2031 case E_V64HFmode:
2032 sri->icode = CODE_FOR_reload_outv64hf;
2033 break;
2034 case E_V64QImode:
2035 sri->icode = CODE_FOR_reload_outv64qi;
2036 break;
2037 case E_V64DImode:
2038 sri->icode = CODE_FOR_reload_outv64di;
2039 break;
2040 case E_V64DFmode:
2041 sri->icode = CODE_FOR_reload_outv64df;
2042 break;
2043 default:
2044 gcc_unreachable ();
2045 }
2046 break;
2047 }
2048 /* Fallthrough. */
2049 case ADDR_SPACE_LDS:
2050 case ADDR_SPACE_GDS:
2051 case ADDR_SPACE_SCRATCH:
2052 result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
2053 break;
2054 }
2055 }
2056
2057 if (dump_file && (dump_flags & TDF_DETAILS))
2058 fprintf (dump_file, " <= %s (icode: %s)\n", reg_class_names[result],
2059 get_insn_name (sri->icode));
2060
2061 return result;
2062}
2063
2064/* Update register usage after having seen the compiler flags and kernel
2065 attributes. We typically want to fix registers that contain values
2066 set by the HSA runtime. */
2067
2068static void
2069gcn_conditional_register_usage (void)
2070{
342f9464
KCY
2071 if (!cfun || !cfun->machine)
2072 return;
5326695a 2073
342f9464
KCY
2074 if (cfun->machine->normal_function)
2075 {
2076 /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
87fdbe69
KCY
2077 for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT - 2);
2078 i <= LAST_SGPR_REG; i++)
342f9464 2079 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a 2080
87fdbe69
KCY
2081 for (int i = VGPR_REGNO (MAX_NORMAL_VGPR_COUNT);
2082 i <= LAST_VGPR_REG; i++)
342f9464 2083 fixed_regs[i] = 1, call_used_regs[i] = 1;
5326695a 2084
5326695a
AS
2085 return;
2086 }
2087
342f9464
KCY
2088 /* If the set of requested args is the default set, nothing more needs to
2089 be done. */
2090 if (cfun->machine->args.requested == default_requested_args)
2091 return;
2092
2093 /* Requesting a set of args different from the default violates the ABI. */
2094 if (!leaf_function_p ())
2095 warning (0, "A non-default set of initial values has been requested, "
2096 "which violates the ABI!");
2097
2098 for (int i = SGPR_REGNO (0); i < SGPR_REGNO (14); i++)
2099 fixed_regs[i] = 0;
2100
5326695a
AS
2101 /* Fix the runtime argument register containing values that may be
2102 needed later. DISPATCH_PTR_ARG and FLAT_SCRATCH_* should not be
2103 needed after the prologue so there's no need to fix them. */
2104 if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
2105 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]] = 1;
2106 if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
2107 {
342f9464
KCY
2108 /* The upper 32-bits of the 64-bit descriptor are not used, so allow
2109 the containing registers to be used for other purposes. */
5326695a
AS
2110 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]] = 1;
2111 fixed_regs[cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] + 1] = 1;
5326695a
AS
2112 }
2113 if (cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] >= 0)
2114 {
2115 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG]] = 1;
2116 fixed_regs[cfun->machine->args.reg[KERNARG_SEGMENT_PTR_ARG] + 1] = 1;
2117 }
2118 if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0)
2119 {
2120 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG]] = 1;
2121 fixed_regs[cfun->machine->args.reg[DISPATCH_PTR_ARG] + 1] = 1;
2122 }
2123 if (cfun->machine->args.reg[WORKGROUP_ID_X_ARG] >= 0)
2124 fixed_regs[cfun->machine->args.reg[WORKGROUP_ID_X_ARG]] = 1;
2125 if (cfun->machine->args.reg[WORK_ITEM_ID_X_ARG] >= 0)
2126 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_X_ARG]] = 1;
2127 if (cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG] >= 0)
2128 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Y_ARG]] = 1;
2129 if (cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG] >= 0)
2130 fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
2131
2132 if (TARGET_GCN5_PLUS)
2133 /* v0 is always zero, for global nul-offsets. */
2134 fixed_regs[VGPR_REGNO (0)] = 1;
2135}
2136
2137/* Determine if a load or store is valid, according to the register classes
2138 and address space. Used primarily by the machine description to decide
2139 when to split a move into two steps. */
2140
2141bool
2142gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
2143{
2144 if (!MEM_P (dest) && !MEM_P (src))
2145 return true;
2146
2147 if (MEM_P (dest)
2148 && AS_FLAT_P (MEM_ADDR_SPACE (dest))
2149 && (gcn_flat_address_p (XEXP (dest, 0), mode)
2150 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2151 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2152 && gcn_vgpr_register_operand (src, mode))
2153 return true;
2154 else if (MEM_P (src)
2155 && AS_FLAT_P (MEM_ADDR_SPACE (src))
2156 && (gcn_flat_address_p (XEXP (src, 0), mode)
2157 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2158 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2159 && gcn_vgpr_register_operand (dest, mode))
2160 return true;
2161
2162 if (MEM_P (dest)
2163 && AS_GLOBAL_P (MEM_ADDR_SPACE (dest))
2164 && (gcn_global_address_p (XEXP (dest, 0))
2165 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2166 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2167 && gcn_vgpr_register_operand (src, mode))
2168 return true;
2169 else if (MEM_P (src)
2170 && AS_GLOBAL_P (MEM_ADDR_SPACE (src))
2171 && (gcn_global_address_p (XEXP (src, 0))
2172 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2173 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2174 && gcn_vgpr_register_operand (dest, mode))
2175 return true;
2176
2177 if (MEM_P (dest)
2178 && MEM_ADDR_SPACE (dest) == ADDR_SPACE_SCALAR_FLAT
2179 && (gcn_scalar_flat_address_p (XEXP (dest, 0))
2180 || GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
2181 || GET_CODE (XEXP (dest, 0)) == LABEL_REF)
2182 && gcn_ssrc_register_operand (src, mode))
2183 return true;
2184 else if (MEM_P (src)
2185 && MEM_ADDR_SPACE (src) == ADDR_SPACE_SCALAR_FLAT
2186 && (gcn_scalar_flat_address_p (XEXP (src, 0))
2187 || GET_CODE (XEXP (src, 0)) == SYMBOL_REF
2188 || GET_CODE (XEXP (src, 0)) == LABEL_REF)
2189 && gcn_sdst_register_operand (dest, mode))
2190 return true;
2191
2192 if (MEM_P (dest)
2193 && AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
2194 && gcn_ds_address_p (XEXP (dest, 0))
2195 && gcn_vgpr_register_operand (src, mode))
2196 return true;
2197 else if (MEM_P (src)
2198 && AS_ANY_DS_P (MEM_ADDR_SPACE (src))
2199 && gcn_ds_address_p (XEXP (src, 0))
2200 && gcn_vgpr_register_operand (dest, mode))
2201 return true;
2202
2203 return false;
2204}
2205
2206/* }}} */
2207/* {{{ Functions and ABI. */
2208
2209/* Implement TARGET_FUNCTION_VALUE.
2210
2211 Define how to find the value returned by a function.
2212 The register location is always the same, but the mode depends on
2213 VALTYPE. */
2214
2215static rtx
2216gcn_function_value (const_tree valtype, const_tree, bool)
2217{
2218 machine_mode mode = TYPE_MODE (valtype);
2219
2220 if (INTEGRAL_TYPE_P (valtype)
2221 && GET_MODE_CLASS (mode) == MODE_INT
2222 && GET_MODE_SIZE (mode) < 4)
2223 mode = SImode;
2224
2225 return gen_rtx_REG (mode, SGPR_REGNO (RETURN_VALUE_REG));
2226}
2227
2228/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
2229
2230 Return true if N is a possible register number for the function return
2231 value. */
2232
2233static bool
2234gcn_function_value_regno_p (const unsigned int n)
2235{
2236 return n == RETURN_VALUE_REG;
2237}
2238
0ffef200
RS
2239/* Calculate the number of registers required to hold function argument
2240 ARG. */
5326695a
AS
2241
2242static int
0ffef200 2243num_arg_regs (const function_arg_info &arg)
5326695a 2244{
0ffef200 2245 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2246 return 0;
2247
0ffef200 2248 int size = arg.promoted_size_in_bytes ();
5326695a
AS
2249 return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2250}
2251
2252/* Implement TARGET_STRICT_ARGUMENT_NAMING.
2253
2254 Return true if the location where a function argument is passed
2255 depends on whether or not it is a named argument
2256
2257 For gcn, we know how to handle functions declared as stdarg: by
2258 passing an extra pointer to the unnamed arguments. However, the
2259 Fortran frontend can produce a different situation, where a
2260 function pointer is declared with no arguments, but the actual
2261 function and calls to it take more arguments. In that case, we
2262 want to ensure the call matches the definition of the function. */
2263
2264static bool
2265gcn_strict_argument_naming (cumulative_args_t cum_v)
2266{
2267 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2268
2269 return cum->fntype == NULL_TREE || stdarg_p (cum->fntype);
2270}
2271
2272/* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
2273
2274 See comment on gcn_strict_argument_naming. */
2275
2276static bool
2277gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
2278{
2279 return !gcn_strict_argument_naming (cum_v);
2280}
2281
2282/* Implement TARGET_FUNCTION_ARG.
2283
2284 Return an RTX indicating whether a function argument is passed in a register
2285 and if so, which register. */
2286
2287static rtx
6783fdb7 2288gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
5326695a
AS
2289{
2290 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2291 if (cum->normal_function)
2292 {
6783fdb7 2293 if (!arg.named || arg.end_marker_p ())
5326695a
AS
2294 return 0;
2295
0ffef200 2296 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2297 return 0;
2298
2299 int reg_num = FIRST_PARM_REG + cum->num;
0ffef200 2300 int num_regs = num_arg_regs (arg);
5326695a
AS
2301 if (num_regs > 0)
2302 while (reg_num % num_regs != 0)
2303 reg_num++;
2304 if (reg_num + num_regs <= FIRST_PARM_REG + NUM_PARM_REGS)
6783fdb7 2305 return gen_rtx_REG (arg.mode, reg_num);
5326695a
AS
2306 }
2307 else
2308 {
2309 if (cum->num >= cum->args.nargs)
2310 {
6783fdb7
RS
2311 cum->offset = (cum->offset + TYPE_ALIGN (arg.type) / 8 - 1)
2312 & -(TYPE_ALIGN (arg.type) / 8);
5326695a
AS
2313 cfun->machine->kernarg_segment_alignment
2314 = MAX ((unsigned) cfun->machine->kernarg_segment_alignment,
6783fdb7 2315 TYPE_ALIGN (arg.type) / 8);
5326695a
AS
2316 rtx addr = gen_rtx_REG (DImode,
2317 cum->args.reg[KERNARG_SEGMENT_PTR_ARG]);
2318 if (cum->offset)
2319 addr = gen_rtx_PLUS (DImode, addr,
2320 gen_int_mode (cum->offset, DImode));
6783fdb7
RS
2321 rtx mem = gen_rtx_MEM (arg.mode, addr);
2322 set_mem_attributes (mem, arg.type, 1);
5326695a
AS
2323 set_mem_addr_space (mem, ADDR_SPACE_SCALAR_FLAT);
2324 MEM_READONLY_P (mem) = 1;
2325 return mem;
2326 }
2327
2328 int a = cum->args.order[cum->num];
6783fdb7 2329 if (arg.mode != gcn_kernel_arg_types[a].mode)
5326695a
AS
2330 {
2331 error ("wrong type of argument %s", gcn_kernel_arg_types[a].name);
2332 return 0;
2333 }
2334 return gen_rtx_REG ((machine_mode) gcn_kernel_arg_types[a].mode,
2335 cum->args.reg[a]);
2336 }
2337 return 0;
2338}
2339
2340/* Implement TARGET_FUNCTION_ARG_ADVANCE.
2341
2342 Updates the summarizer variable pointed to by CUM_V to advance past an
2343 argument in the argument list. */
2344
2345static void
6930c98c
RS
2346gcn_function_arg_advance (cumulative_args_t cum_v,
2347 const function_arg_info &arg)
5326695a
AS
2348{
2349 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2350
2351 if (cum->normal_function)
2352 {
6930c98c 2353 if (!arg.named)
5326695a
AS
2354 return;
2355
0ffef200 2356 int num_regs = num_arg_regs (arg);
5326695a
AS
2357 if (num_regs > 0)
2358 while ((FIRST_PARM_REG + cum->num) % num_regs != 0)
2359 cum->num++;
2360 cum->num += num_regs;
2361 }
2362 else
2363 {
2364 if (cum->num < cum->args.nargs)
2365 cum->num++;
2366 else
2367 {
6930c98c 2368 cum->offset += tree_to_uhwi (TYPE_SIZE_UNIT (arg.type));
5326695a
AS
2369 cfun->machine->kernarg_segment_byte_size = cum->offset;
2370 }
2371 }
2372}
2373
2374/* Implement TARGET_ARG_PARTIAL_BYTES.
2375
2376 Returns the number of bytes at the beginning of an argument that must be put
2377 in registers. The value must be zero for arguments that are passed entirely
2378 in registers or that are entirely pushed on the stack. */
2379
2380static int
a7c81bc1 2381gcn_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
5326695a
AS
2382{
2383 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2384
a7c81bc1 2385 if (!arg.named)
5326695a
AS
2386 return 0;
2387
0ffef200 2388 if (targetm.calls.must_pass_in_stack (arg))
5326695a
AS
2389 return 0;
2390
2391 if (cum->num >= NUM_PARM_REGS)
2392 return 0;
2393
2394 /* If the argument fits entirely in registers, return 0. */
0ffef200 2395 if (cum->num + num_arg_regs (arg) <= NUM_PARM_REGS)
5326695a
AS
2396 return 0;
2397
2398 return (NUM_PARM_REGS - cum->num) * UNITS_PER_WORD;
2399}
2400
2401/* A normal function which takes a pointer argument (to a scalar) may be
2402 passed a pointer to LDS space (via a high-bits-set aperture), and that only
2403 works with FLAT addressing, not GLOBAL. Force FLAT addressing if the
2404 function has an incoming pointer-to-scalar parameter. */
2405
2406static void
2407gcn_detect_incoming_pointer_arg (tree fndecl)
2408{
2409 gcc_assert (cfun && cfun->machine);
2410
2411 for (tree arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
2412 arg;
2413 arg = TREE_CHAIN (arg))
2414 if (POINTER_TYPE_P (TREE_VALUE (arg))
2415 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_VALUE (arg))))
2416 cfun->machine->use_flat_addressing = true;
2417}
2418
2419/* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
2420
2421 Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
2422 whose data type is FNTYPE. For a library call, FNTYPE is 0. */
2423
2424void
2425gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* Argument info to init */ ,
2426 tree fntype /* tree ptr for function decl */ ,
2427 rtx libname /* SYMBOL_REF of library name or 0 */ ,
2428 tree fndecl, int caller)
2429{
2430 memset (cum, 0, sizeof (*cum));
2431 cum->fntype = fntype;
2432 if (libname)
2433 {
2434 gcc_assert (cfun && cfun->machine);
2435 cum->normal_function = true;
2436 if (!caller)
2437 {
2438 cfun->machine->normal_function = true;
2439 gcn_detect_incoming_pointer_arg (fndecl);
2440 }
2441 return;
2442 }
2443 tree attr = NULL;
2444 if (fndecl)
2445 attr = lookup_attribute ("amdgpu_hsa_kernel", DECL_ATTRIBUTES (fndecl));
2446 if (fndecl && !attr)
2447 attr = lookup_attribute ("amdgpu_hsa_kernel",
2448 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
2449 if (!attr && fntype)
2450 attr = lookup_attribute ("amdgpu_hsa_kernel", TYPE_ATTRIBUTES (fntype));
2451 /* Handle main () as kernel, so we can run testsuite.
2452 Handle OpenACC kernels similarly to main. */
2453 if (!attr && !caller && fndecl
2454 && (MAIN_NAME_P (DECL_NAME (fndecl))
2455 || lookup_attribute ("omp target entrypoint",
2456 DECL_ATTRIBUTES (fndecl)) != NULL_TREE))
2457 gcn_parse_amdgpu_hsa_kernel_attribute (&cum->args, NULL_TREE);
2458 else
2459 {
2460 if (!attr || caller)
2461 {
2462 gcc_assert (cfun && cfun->machine);
2463 cum->normal_function = true;
2464 if (!caller)
2465 cfun->machine->normal_function = true;
2466 }
2467 gcn_parse_amdgpu_hsa_kernel_attribute
2468 (&cum->args, attr ? TREE_VALUE (attr) : NULL_TREE);
2469 }
2470 cfun->machine->args = cum->args;
2471 if (!caller && cfun->machine->normal_function)
2472 gcn_detect_incoming_pointer_arg (fndecl);
3ed8f692
KCY
2473
2474 reinit_regs ();
5326695a
AS
2475}
2476
2477static bool
2478gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
2479{
2480 machine_mode mode = TYPE_MODE (type);
2481 HOST_WIDE_INT size = int_size_in_bytes (type);
2482
2483 if (AGGREGATE_TYPE_P (type))
2484 return true;
2485
2486 if (mode == BLKmode)
2487 return true;
2488
2489 if (size > 2 * UNITS_PER_WORD)
2490 return true;
2491
2492 return false;
2493}
2494
2495/* Implement TARGET_PROMOTE_FUNCTION_MODE.
2496
2497 Return the mode to use for outgoing function arguments. */
2498
2499machine_mode
2500gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
2501 int *ARG_UNUSED (punsignedp),
2502 const_tree ARG_UNUSED (funtype),
2503 int ARG_UNUSED (for_return))
2504{
2505 if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4)
2506 return SImode;
2507
2508 return mode;
2509}
2510
2511/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
2512
2513 Derived from hppa_gimplify_va_arg_expr. The generic routine doesn't handle
2514 ARGS_GROW_DOWNWARDS. */
2515
2516static tree
2517gcn_gimplify_va_arg_expr (tree valist, tree type,
2518 gimple_seq *ARG_UNUSED (pre_p),
2519 gimple_seq *ARG_UNUSED (post_p))
2520{
2521 tree ptr = build_pointer_type (type);
2522 tree valist_type;
2523 tree t, u;
2524 bool indirect;
2525
fde65a89 2526 indirect = pass_va_arg_by_reference (type);
5326695a
AS
2527 if (indirect)
2528 {
2529 type = ptr;
2530 ptr = build_pointer_type (type);
2531 }
2532 valist_type = TREE_TYPE (valist);
2533
2534 /* Args grow down. Not handled by generic routines. */
2535
2536 u = fold_convert (sizetype, size_in_bytes (type));
2537 u = fold_build1 (NEGATE_EXPR, sizetype, u);
2538 t = fold_build_pointer_plus (valist, u);
2539
2540 /* Align to 8 byte boundary. */
2541
2542 u = build_int_cst (TREE_TYPE (t), -8);
2543 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, u);
2544 t = fold_convert (valist_type, t);
2545
2546 t = build2 (MODIFY_EXPR, valist_type, valist, t);
2547
2548 t = fold_convert (ptr, t);
2549 t = build_va_arg_indirect_ref (t);
2550
2551 if (indirect)
2552 t = build_va_arg_indirect_ref (t);
2553
2554 return t;
2555}
2556
955cd057
TB
2557/* Return 1 if TRAIT NAME is present in the OpenMP context's
2558 device trait set, return 0 if not present in any OpenMP context in the
2559 whole translation unit, or -1 if not present in the current OpenMP context
2560 but might be present in another OpenMP context in the same TU. */
2561
2562int
2563gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
2564 const char *name)
2565{
2566 switch (trait)
2567 {
2568 case omp_device_kind:
2569 return strcmp (name, "gpu") == 0;
2570 case omp_device_arch:
2571 return strcmp (name, "gcn") == 0;
2572 case omp_device_isa:
955cd057
TB
2573 if (strcmp (name, "fiji") == 0)
2574 return gcn_arch == PROCESSOR_FIJI;
2575 if (strcmp (name, "gfx900") == 0)
2576 return gcn_arch == PROCESSOR_VEGA;
2577 if (strcmp (name, "gfx906") == 0)
2578 return gcn_arch == PROCESSOR_VEGA;
2579 return 0;
2580 default:
2581 gcc_unreachable ();
2582 }
2583}
2584
5326695a
AS
2585/* Calculate stack offsets needed to create prologues and epilogues. */
2586
2587static struct machine_function *
2588gcn_compute_frame_offsets (void)
2589{
2590 machine_function *offsets = cfun->machine;
2591
2592 if (reload_completed)
2593 return offsets;
2594
2595 offsets->need_frame_pointer = frame_pointer_needed;
2596
2597 offsets->outgoing_args_size = crtl->outgoing_args_size;
2598 offsets->pretend_size = crtl->args.pretend_args_size;
2599
2600 offsets->local_vars = get_frame_size ();
2601
2602 offsets->lr_needs_saving = (!leaf_function_p ()
2603 || df_regs_ever_live_p (LR_REGNUM)
2604 || df_regs_ever_live_p (LR_REGNUM + 1));
2605
2606 offsets->callee_saves = offsets->lr_needs_saving ? 8 : 0;
2607
2608 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
a365fa06 2609 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
2610 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2611 && frame_pointer_needed))
2612 offsets->callee_saves += (VGPR_REGNO_P (regno) ? 256 : 4);
2613
2614 /* Round up to 64-bit boundary to maintain stack alignment. */
2615 offsets->callee_saves = (offsets->callee_saves + 7) & ~7;
2616
2617 return offsets;
2618}
2619
2620/* Insert code into the prologue or epilogue to store or load any
2621 callee-save register to/from the stack.
2622
2623 Helper function for gcn_expand_prologue and gcn_expand_epilogue. */
2624
2625static void
2626move_callee_saved_registers (rtx sp, machine_function *offsets,
2627 bool prologue)
2628{
2629 int regno, offset, saved_scalars;
2630 rtx exec = gen_rtx_REG (DImode, EXEC_REG);
2631 rtx vcc = gen_rtx_REG (DImode, VCC_LO_REG);
2632 rtx offreg = gen_rtx_REG (SImode, SGPR_REGNO (22));
2633 rtx as = gen_rtx_CONST_INT (VOIDmode, STACK_ADDR_SPACE);
2634 HOST_WIDE_INT exec_set = 0;
2635 int offreg_set = 0;
2636
2637 start_sequence ();
2638
2639 /* Move scalars into two vector registers. */
2640 for (regno = 0, saved_scalars = 0; regno < FIRST_VGPR_REG; regno++)
a365fa06 2641 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
2642 || ((regno & ~1) == LINK_REGNUM && offsets->lr_needs_saving)
2643 || ((regno & ~1) == HARD_FRAME_POINTER_REGNUM
2644 && offsets->need_frame_pointer))
2645 {
2646 rtx reg = gen_rtx_REG (SImode, regno);
2647 rtx vreg = gen_rtx_REG (V64SImode,
2648 VGPR_REGNO (6 + (saved_scalars / 64)));
2649 int lane = saved_scalars % 64;
2650
2651 if (prologue)
2652 emit_insn (gen_vec_setv64si (vreg, reg, GEN_INT (lane)));
2653 else
2654 emit_insn (gen_vec_extractv64sisi (reg, vreg, GEN_INT (lane)));
2655
2656 saved_scalars++;
2657 }
2658
2659 rtx move_scalars = get_insns ();
2660 end_sequence ();
2661 start_sequence ();
2662
2663 /* Ensure that all vector lanes are moved. */
2664 exec_set = -1;
2665 emit_move_insn (exec, GEN_INT (exec_set));
2666
2667 /* Set up a vector stack pointer. */
2668 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2669 rtx _0_4_8_12 = gen_rtx_REG (V64SImode, VGPR_REGNO (3));
2670 emit_insn (gen_ashlv64si3_exec (_0_4_8_12, _0_1_2_3, GEN_INT (2),
2671 gcn_gen_undef (V64SImode), exec));
2672 rtx vsp = gen_rtx_REG (V64DImode, VGPR_REGNO (4));
2673 emit_insn (gen_vec_duplicatev64di_exec (vsp, sp, gcn_gen_undef (V64DImode),
2674 exec));
2675 emit_insn (gen_addv64si3_vcc_exec (gcn_operand_part (V64SImode, vsp, 0),
2676 gcn_operand_part (V64SImode, vsp, 0),
2677 _0_4_8_12, vcc, gcn_gen_undef (V64SImode),
2678 exec));
2679 emit_insn (gen_addcv64si3_exec (gcn_operand_part (V64SImode, vsp, 1),
2680 gcn_operand_part (V64SImode, vsp, 1),
2681 const0_rtx, vcc, vcc,
2682 gcn_gen_undef (V64SImode), exec));
2683
2684 /* Move vectors. */
2685 for (regno = FIRST_VGPR_REG, offset = offsets->pretend_size;
2686 regno < FIRST_PSEUDO_REGISTER; regno++)
a365fa06 2687 if ((df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
5326695a
AS
2688 || (regno == VGPR_REGNO (6) && saved_scalars > 0)
2689 || (regno == VGPR_REGNO (7) && saved_scalars > 63))
2690 {
2691 rtx reg = gen_rtx_REG (V64SImode, regno);
2692 int size = 256;
2693
2694 if (regno == VGPR_REGNO (6) && saved_scalars < 64)
2695 size = saved_scalars * 4;
2696 else if (regno == VGPR_REGNO (7) && saved_scalars < 128)
2697 size = (saved_scalars - 64) * 4;
2698
2699 if (size != 256 || exec_set != -1)
2700 {
2701 exec_set = ((unsigned HOST_WIDE_INT) 1 << (size / 4)) - 1;
2702 emit_move_insn (exec, gen_int_mode (exec_set, DImode));
2703 }
2704
2705 if (prologue)
2706 emit_insn (gen_scatterv64si_insn_1offset_exec (vsp, const0_rtx, reg,
2707 as, const0_rtx, exec));
2708 else
2709 emit_insn (gen_gatherv64si_insn_1offset_exec
2710 (reg, vsp, const0_rtx, as, const0_rtx,
2711 gcn_gen_undef (V64SImode), exec));
2712
2713 /* Move our VSP to the next stack entry. */
2714 if (offreg_set != size)
2715 {
2716 offreg_set = size;
2717 emit_move_insn (offreg, GEN_INT (size));
2718 }
2719 if (exec_set != -1)
2720 {
2721 exec_set = -1;
2722 emit_move_insn (exec, GEN_INT (exec_set));
2723 }
2724 emit_insn (gen_addv64si3_vcc_dup_exec
2725 (gcn_operand_part (V64SImode, vsp, 0),
2726 offreg, gcn_operand_part (V64SImode, vsp, 0),
2727 vcc, gcn_gen_undef (V64SImode), exec));
2728 emit_insn (gen_addcv64si3_exec
2729 (gcn_operand_part (V64SImode, vsp, 1),
2730 gcn_operand_part (V64SImode, vsp, 1),
2731 const0_rtx, vcc, vcc, gcn_gen_undef (V64SImode), exec));
2732
2733 offset += size;
2734 }
2735
2736 rtx move_vectors = get_insns ();
2737 end_sequence ();
2738
2739 if (prologue)
2740 {
2741 emit_insn (move_scalars);
2742 emit_insn (move_vectors);
2743 }
2744 else
2745 {
2746 emit_insn (move_vectors);
2747 emit_insn (move_scalars);
2748 }
2749}
2750
2751/* Generate prologue. Called from gen_prologue during pro_and_epilogue pass.
2752
2753 For a non-kernel function, the stack layout looks like this (interim),
2754 growing *upwards*:
2755
2756 hi | + ...
2757 |__________________| <-- current SP
2758 | outgoing args |
2759 |__________________|
2760 | (alloca space) |
2761 |__________________|
2762 | local vars |
2763 |__________________| <-- FP/hard FP
2764 | callee-save regs |
2765 |__________________| <-- soft arg pointer
2766 | pretend args |
2767 |__________________| <-- incoming SP
2768 | incoming args |
2769 lo |..................|
2770
2771 This implies arguments (beyond the first N in registers) must grow
2772 downwards (as, apparently, PA has them do).
2773
2774 For a kernel function we have the simpler:
2775
2776 hi | + ...
2777 |__________________| <-- current SP
2778 | outgoing args |
2779 |__________________|
2780 | (alloca space) |
2781 |__________________|
2782 | local vars |
2783 lo |__________________| <-- FP/hard FP
2784
2785*/
2786
2787void
2788gcn_expand_prologue ()
2789{
2790 machine_function *offsets = gcn_compute_frame_offsets ();
2791
2792 if (!cfun || !cfun->machine || cfun->machine->normal_function)
2793 {
2794 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2795 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2796
2797 start_sequence ();
2798
2799 if (offsets->pretend_size > 0)
2800 {
2801 /* FIXME: Do the actual saving of register pretend args to the stack.
2802 Register order needs consideration. */
2803 }
2804
2805 /* Save callee-save regs. */
2806 move_callee_saved_registers (sp, offsets, true);
2807
2808 HOST_WIDE_INT sp_adjust = offsets->pretend_size
2809 + offsets->callee_saves
2810 + offsets->local_vars + offsets->outgoing_args_size;
2811 if (sp_adjust > 0)
2812 emit_insn (gen_adddi3_scc (sp, sp, gen_int_mode (sp_adjust, DImode)));
2813
2814 if (offsets->need_frame_pointer)
2815 emit_insn (gen_adddi3_scc (fp, sp,
2816 gen_int_mode
2817 (-(offsets->local_vars +
2818 offsets->outgoing_args_size),
2819 DImode)));
2820
2821 rtx_insn *seq = get_insns ();
2822 end_sequence ();
2823
2824 /* FIXME: Prologue insns should have this flag set for debug output, etc.
2825 but it causes issues for now.
2826 for (insn = seq; insn; insn = NEXT_INSN (insn))
2827 if (INSN_P (insn))
2828 RTX_FRAME_RELATED_P (insn) = 1;*/
2829
2830 emit_insn (seq);
2831 }
2832 else
2833 {
2834 rtx wave_offset = gen_rtx_REG (SImode,
2835 cfun->machine->args.
2836 reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
2837
5326695a
AS
2838 if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
2839 {
2840 rtx fs_init_lo =
2841 gen_rtx_REG (SImode,
2842 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG]);
2843 rtx fs_init_hi =
2844 gen_rtx_REG (SImode,
2845 cfun->machine->args.reg[FLAT_SCRATCH_INIT_ARG] + 1);
2846 rtx fs_reg_lo = gen_rtx_REG (SImode, FLAT_SCRATCH_REG);
2847 rtx fs_reg_hi = gen_rtx_REG (SImode, FLAT_SCRATCH_REG + 1);
2848
2849 /*rtx queue = gen_rtx_REG(DImode,
2850 cfun->machine->args.reg[QUEUE_PTR_ARG]);
2851 rtx aperture = gen_rtx_MEM (SImode,
2852 gen_rtx_PLUS (DImode, queue,
2853 gen_int_mode (68, SImode)));
2854 set_mem_addr_space (aperture, ADDR_SPACE_SCALAR_FLAT);*/
2855
2856 /* Set up flat_scratch. */
2857 emit_insn (gen_addsi3_scc (fs_reg_hi, fs_init_lo, wave_offset));
2858 emit_insn (gen_lshrsi3_scc (fs_reg_hi, fs_reg_hi,
2859 gen_int_mode (8, SImode)));
2860 emit_move_insn (fs_reg_lo, fs_init_hi);
2861 }
2862
2863 /* Set up frame pointer and stack pointer. */
2864 rtx sp = gen_rtx_REG (DImode, STACK_POINTER_REGNUM);
2865 rtx fp = gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM);
2866 rtx fp_hi = simplify_gen_subreg (SImode, fp, DImode, 4);
2867 rtx fp_lo = simplify_gen_subreg (SImode, fp, DImode, 0);
2868
2869 HOST_WIDE_INT sp_adjust = (offsets->local_vars
2870 + offsets->outgoing_args_size);
2871
2872 /* Initialise FP and SP from the buffer descriptor in s[0:3]. */
2873 emit_move_insn (fp_lo, gen_rtx_REG (SImode, 0));
2874 emit_insn (gen_andsi3_scc (fp_hi, gen_rtx_REG (SImode, 1),
2875 gen_int_mode (0xffff, SImode)));
3258c2d6
AS
2876 rtx scc = gen_rtx_REG (BImode, SCC_REG);
2877 emit_insn (gen_addsi3_scalar_carry (fp_lo, fp_lo, wave_offset, scc));
2878 emit_insn (gen_addcsi3_scalar_zero (fp_hi, fp_hi, scc));
5326695a
AS
2879
2880 if (sp_adjust > 0)
2881 emit_insn (gen_adddi3_scc (sp, fp, gen_int_mode (sp_adjust, DImode)));
2882 else
2883 emit_move_insn (sp, fp);
2884
2885 /* Make sure the flat scratch reg doesn't get optimised away. */
2886 emit_insn (gen_prologue_use (gen_rtx_REG (DImode, FLAT_SCRATCH_REG)));
2887 }
2888
2889 /* Ensure that the scheduler doesn't do anything unexpected. */
2890 emit_insn (gen_blockage ());
2891
86b0eb81
AS
2892 /* m0 is initialized for the usual LDS DS and FLAT memory case.
2893 The low-part is the address of the topmost addressable byte, which is
2894 size-1. The high-part is an offset and should be zero. */
5326695a 2895 emit_move_insn (gen_rtx_REG (SImode, M0_REG),
86b0eb81 2896 gen_int_mode (LDS_SIZE-1, SImode));
5326695a
AS
2897
2898 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
5326695a
AS
2899
2900 if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
2901 {
2902 /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel. */
2903 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
2904 emit_move_insn (fn_reg, gen_rtx_SYMBOL_REF (Pmode,
2905 "gomp_gcn_enter_kernel"));
2906 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
2907 }
2908}
2909
2910/* Generate epilogue. Called from gen_epilogue during pro_and_epilogue pass.
2911
2912 See gcn_expand_prologue for stack details. */
2913
2914void
2915gcn_expand_epilogue (void)
2916{
2917 /* Ensure that the scheduler doesn't do anything unexpected. */
2918 emit_insn (gen_blockage ());
2919
2920 if (!cfun || !cfun->machine || cfun->machine->normal_function)
2921 {
2922 machine_function *offsets = gcn_compute_frame_offsets ();
2923 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2924 rtx fp = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2925
2926 HOST_WIDE_INT sp_adjust = offsets->callee_saves + offsets->pretend_size;
2927
2928 if (offsets->need_frame_pointer)
2929 {
2930 /* Restore old SP from the frame pointer. */
2931 if (sp_adjust > 0)
2932 emit_insn (gen_subdi3 (sp, fp, gen_int_mode (sp_adjust, DImode)));
2933 else
2934 emit_move_insn (sp, fp);
2935 }
2936 else
2937 {
2938 /* Restore old SP from current SP. */
2939 sp_adjust += offsets->outgoing_args_size + offsets->local_vars;
2940
2941 if (sp_adjust > 0)
2942 emit_insn (gen_subdi3 (sp, sp, gen_int_mode (sp_adjust, DImode)));
2943 }
2944
2945 move_callee_saved_registers (sp, offsets, false);
2946
2947 /* There's no explicit use of the link register on the return insn. Emit
2948 one here instead. */
2949 if (offsets->lr_needs_saving)
2950 emit_use (gen_rtx_REG (DImode, LINK_REGNUM));
2951
2952 /* Similar for frame pointer. */
2953 if (offsets->need_frame_pointer)
2954 emit_use (gen_rtx_REG (DImode, HARD_FRAME_POINTER_REGNUM));
2955 }
2956 else if (flag_openmp)
2957 {
2958 /* OpenMP kernels have an implicit call to gomp_gcn_exit_kernel. */
2959 rtx fn_reg = gen_rtx_REG (Pmode, FIRST_PARM_REG);
2960 emit_move_insn (fn_reg,
2961 gen_rtx_SYMBOL_REF (Pmode, "gomp_gcn_exit_kernel"));
2962 emit_call_insn (gen_gcn_indirect_call (fn_reg, const0_rtx));
2963 }
2964 else if (TREE_CODE (TREE_TYPE (DECL_RESULT (cfun->decl))) != VOID_TYPE)
2965 {
2966 /* Assume that an exit value compatible with gcn-run is expected.
2967 That is, the third input parameter is an int*.
2968
2969 We can't allocate any new registers, but the kernarg_reg is
2970 dead after this, so we'll use that. */
2971 rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
2972 [KERNARG_SEGMENT_PTR_ARG]);
2973 rtx retptr_mem = gen_rtx_MEM (DImode,
2974 gen_rtx_PLUS (DImode, kernarg_reg,
2975 GEN_INT (16)));
2976 set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
2977 emit_move_insn (kernarg_reg, retptr_mem);
2978
2979 rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
2980 set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
2981 emit_move_insn (retval_mem,
2982 gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
2983 }
2984
2985 emit_jump_insn (gen_gcn_return ());
2986}
2987
2988/* Implement TARGET_CAN_ELIMINATE.
2989
2990 Return true if the compiler is allowed to try to replace register number
2991 FROM_REG with register number TO_REG.
2992
2993 FIXME: is the default "true" not enough? Should this be a negative set? */
2994
2995bool
2996gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
2997{
2998 return (to_reg == HARD_FRAME_POINTER_REGNUM
2999 || to_reg == STACK_POINTER_REGNUM);
3000}
3001
3002/* Implement INITIAL_ELIMINATION_OFFSET.
3003
3004 Returns the initial difference between the specified pair of registers, in
3005 terms of stack position. */
3006
3007HOST_WIDE_INT
3008gcn_initial_elimination_offset (int from, int to)
3009{
3010 machine_function *offsets = gcn_compute_frame_offsets ();
3011
3012 switch (from)
3013 {
3014 case ARG_POINTER_REGNUM:
3015 if (to == STACK_POINTER_REGNUM)
3016 return -(offsets->callee_saves + offsets->local_vars
3017 + offsets->outgoing_args_size);
3018 else if (to == FRAME_POINTER_REGNUM || to == HARD_FRAME_POINTER_REGNUM)
3019 return -offsets->callee_saves;
3020 else
3021 gcc_unreachable ();
3022 break;
3023
3024 case FRAME_POINTER_REGNUM:
3025 if (to == STACK_POINTER_REGNUM)
3026 return -(offsets->local_vars + offsets->outgoing_args_size);
3027 else if (to == HARD_FRAME_POINTER_REGNUM)
3028 return 0;
3029 else
3030 gcc_unreachable ();
3031 break;
3032
3033 default:
3034 gcc_unreachable ();
3035 }
3036}
3037
3038/* Implement HARD_REGNO_RENAME_OK.
3039
3040 Return true if it is permissible to rename a hard register from
3041 FROM_REG to TO_REG. */
3042
3043bool
3044gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
3045{
3046 if (from_reg == SCC_REG
3047 || from_reg == VCC_LO_REG || from_reg == VCC_HI_REG
3048 || from_reg == EXEC_LO_REG || from_reg == EXEC_HI_REG
3049 || to_reg == SCC_REG
3050 || to_reg == VCC_LO_REG || to_reg == VCC_HI_REG
3051 || to_reg == EXEC_LO_REG || to_reg == EXEC_HI_REG)
3052 return false;
3053
3054 /* Allow the link register to be used if it was saved. */
3055 if ((to_reg & ~1) == LINK_REGNUM)
3056 return !cfun || cfun->machine->lr_needs_saving;
3057
3058 /* Allow the registers used for the static chain to be used if the chain is
3059 not in active use. */
3060 if ((to_reg & ~1) == STATIC_CHAIN_REGNUM)
3061 return !cfun
3062 || !(cfun->static_chain_decl
3063 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
3064 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM + 1));
3065
3066 return true;
3067}
3068
3069/* Implement HARD_REGNO_CALLER_SAVE_MODE.
3070
3071 Which mode is required for saving NREGS of a pseudo-register in
3072 call-clobbered hard register REGNO. */
3073
3074machine_mode
3075gcn_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
3076 machine_mode regmode)
3077{
737d6a1a 3078 machine_mode result = choose_hard_reg_mode (regno, nregs, NULL);
5326695a
AS
3079
3080 if (VECTOR_MODE_P (result) && !VECTOR_MODE_P (regmode))
3081 result = (nregs == 1 ? SImode : DImode);
3082
3083 return result;
3084}
3085
3086/* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.
3087
3088 Output assembler code for a block containing the constant parts
3089 of a trampoline, leaving space for the variable parts. */
3090
3091static void
3092gcn_asm_trampoline_template (FILE *f)
3093{
3094 /* The source operand of the move instructions must be a 32-bit
3095 constant following the opcode. */
3096 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM);
3097 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", STATIC_CHAIN_REGNUM + 1);
3098 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG);
3099 asm_fprintf (f, "\ts_mov_b32\ts%i, 0xffff\n", CC_SAVE_REG + 1);
3100 asm_fprintf (f, "\ts_setpc_b64\ts[%i:%i]\n", CC_SAVE_REG, CC_SAVE_REG + 1);
3101}
3102
3103/* Implement TARGET_TRAMPOLINE_INIT.
3104
3105 Emit RTL insns to initialize the variable parts of a trampoline.
3106 FNDECL is the decl of the target address, M_TRAMP is a MEM for
3107 the trampoline, and CHAIN_VALUE is an RTX for the static chain
3108 to be passed to the target function. */
3109
3110static void
3111gcn_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3112{
b7c28a47
AS
3113 if (TARGET_GCN5_PLUS)
3114 sorry ("nested function trampolines not supported on GCN5 due to"
3115 " non-executable stacks");
3116
5326695a
AS
3117 emit_block_move (m_tramp, assemble_trampoline_template (),
3118 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3119
3120 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
3121 rtx chain_value_reg = copy_to_reg (chain_value);
3122 rtx fnaddr_reg = copy_to_reg (fnaddr);
3123
3124 for (int i = 0; i < 4; i++)
3125 {
3126 rtx mem = adjust_address (m_tramp, SImode, i * 8 + 4);
3127 rtx reg = i < 2 ? chain_value_reg : fnaddr_reg;
3128 emit_move_insn (mem, gen_rtx_SUBREG (SImode, reg, (i % 2) * 4));
3129 }
3130
3131 rtx tramp_addr = XEXP (m_tramp, 0);
3132 emit_insn (gen_clear_icache (tramp_addr,
3133 plus_constant (ptr_mode, tramp_addr,
3134 TRAMPOLINE_SIZE)));
3135}
3136
3137/* }}} */
3138/* {{{ Miscellaneous. */
3139
3140/* Implement TARGET_CANNOT_COPY_INSN_P.
3141
3142 Return true if INSN must not be duplicated. */
3143
3144static bool
3145gcn_cannot_copy_insn_p (rtx_insn *insn)
3146{
3147 if (recog_memoized (insn) == CODE_FOR_gcn_wavefront_barrier)
3148 return true;
3149
3150 return false;
3151}
3152
3153/* Implement TARGET_DEBUG_UNWIND_INFO.
3154
3155 Defines the mechanism that will be used for describing frame unwind
3156 information to the debugger. */
3157
3158static enum unwind_info_type
3159gcn_debug_unwind_info ()
3160{
3161 /* No support for debug info, yet. */
3162 return UI_NONE;
3163}
3164
3165/* Determine if there is a suitable hardware conversion instruction.
3166 Used primarily by the machine description. */
3167
3168bool
3169gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
3170{
3171 if (VECTOR_MODE_P (from) != VECTOR_MODE_P (to))
3172 return false;
3173
3174 if (VECTOR_MODE_P (from))
3175 {
3176 from = GET_MODE_INNER (from);
3177 to = GET_MODE_INNER (to);
3178 }
3179
3180 switch (op)
3181 {
3182 case fix_trunc_cvt:
3183 case fixuns_trunc_cvt:
3184 if (GET_MODE_CLASS (from) != MODE_FLOAT
3185 || GET_MODE_CLASS (to) != MODE_INT)
3186 return false;
3187 break;
3188 case float_cvt:
3189 case floatuns_cvt:
3190 if (GET_MODE_CLASS (from) != MODE_INT
3191 || GET_MODE_CLASS (to) != MODE_FLOAT)
3192 return false;
3193 break;
3194 case extend_cvt:
3195 if (GET_MODE_CLASS (from) != MODE_FLOAT
3196 || GET_MODE_CLASS (to) != MODE_FLOAT
3197 || GET_MODE_SIZE (from) >= GET_MODE_SIZE (to))
3198 return false;
3199 break;
3200 case trunc_cvt:
3201 if (GET_MODE_CLASS (from) != MODE_FLOAT
3202 || GET_MODE_CLASS (to) != MODE_FLOAT
3203 || GET_MODE_SIZE (from) <= GET_MODE_SIZE (to))
3204 return false;
3205 break;
3206 }
3207
3208 return ((to == HImode && from == HFmode)
3209 || (to == SImode && (from == SFmode || from == DFmode))
3210 || (to == HFmode && (from == HImode || from == SFmode))
3211 || (to == SFmode && (from == SImode || from == HFmode
3212 || from == DFmode))
3213 || (to == DFmode && (from == SImode || from == SFmode)));
3214}
3215
76d46331
KCY
3216/* Implement TARGET_EMUTLS_VAR_INIT.
3217
3218 Disable emutls (gthr-gcn.h does not support it, yet). */
3219
3220tree
3221gcn_emutls_var_init (tree, tree decl, tree)
3222{
3223 sorry_at (DECL_SOURCE_LOCATION (decl), "TLS is not implemented for GCN.");
6349e4f0 3224 return NULL_TREE;
76d46331
KCY
3225}
3226
5326695a
AS
3227/* }}} */
3228/* {{{ Costs. */
3229
3230/* Implement TARGET_RTX_COSTS.
3231
3232 Compute a (partial) cost for rtx X. Return true if the complete
3233 cost has been computed, and false if subexpressions should be
3234 scanned. In either case, *TOTAL contains the cost result. */
3235
3236static bool
3237gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
3238{
3239 enum rtx_code code = GET_CODE (x);
3240 switch (code)
3241 {
3242 case CONST:
3243 case CONST_DOUBLE:
3244 case CONST_VECTOR:
3245 case CONST_INT:
3246 if (gcn_inline_constant_p (x))
3247 *total = 0;
3248 else if (code == CONST_INT
3249 && ((unsigned HOST_WIDE_INT) INTVAL (x) + 0x8000) < 0x10000)
3250 *total = 1;
3251 else if (gcn_constant_p (x))
3252 *total = 2;
3253 else
3254 *total = vgpr_vector_mode_p (GET_MODE (x)) ? 64 : 4;
3255 return true;
3256
3257 case DIV:
3258 *total = 100;
3259 return false;
3260
3261 default:
3262 *total = 3;
3263 return false;
3264 }
3265}
3266
3267/* Implement TARGET_MEMORY_MOVE_COST.
3268
3269 Return the cost of moving data of mode M between a
3270 register and memory. A value of 2 is the default; this cost is
3271 relative to those in `REGISTER_MOVE_COST'.
3272
3273 This function is used extensively by register_move_cost that is used to
3274 build tables at startup. Make it inline in this case.
3275 When IN is 2, return maximum of in and out move cost.
3276
3277 If moving between registers and memory is more expensive than
3278 between two registers, you should define this macro to express the
3279 relative cost.
3280
3281 Model also increased moving costs of QImode registers in non
3282 Q_REGS classes. */
3283
3284#define LOAD_COST 32
3285#define STORE_COST 32
3286static int
3287gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
3288{
3289 int nregs = CEIL (GET_MODE_SIZE (mode), 4);
3290 switch (regclass)
3291 {
3292 case SCC_CONDITIONAL_REG:
3293 case VCCZ_CONDITIONAL_REG:
3294 case VCC_CONDITIONAL_REG:
3295 case EXECZ_CONDITIONAL_REG:
3296 case ALL_CONDITIONAL_REGS:
3297 case SGPR_REGS:
3298 case SGPR_EXEC_REGS:
3299 case EXEC_MASK_REG:
3300 case SGPR_VOP_SRC_REGS:
3301 case SGPR_MEM_SRC_REGS:
3302 case SGPR_SRC_REGS:
3303 case SGPR_DST_REGS:
3304 case GENERAL_REGS:
3305 case AFP_REGS:
3306 if (!in)
3307 return (STORE_COST + 2) * nregs;
3308 return LOAD_COST * nregs;
3309 case VGPR_REGS:
3310 if (in)
3311 return (LOAD_COST + 2) * nregs;
3312 return STORE_COST * nregs;
3313 case ALL_REGS:
3314 case ALL_GPR_REGS:
3315 case SRCDST_REGS:
3316 if (in)
3317 return (LOAD_COST + 2) * nregs;
3318 return (STORE_COST + 2) * nregs;
3319 default:
3320 gcc_unreachable ();
3321 }
3322}
3323
3324/* Implement TARGET_REGISTER_MOVE_COST.
3325
3326 Return the cost of moving data from a register in class CLASS1 to
3327 one in class CLASS2. Base value is 2. */
3328
3329static int
3330gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
3331{
3332 /* Increase cost of moving from and to vector registers. While this is
3333 fast in hardware (I think), it has hidden cost of setting up the exec
3334 flags. */
3335 if ((src < VGPR_REGS) != (dst < VGPR_REGS))
3336 return 4;
3337 return 2;
3338}
3339
3340/* }}} */
3341/* {{{ Builtins. */
3342
3343/* Type codes used by GCN built-in definitions. */
3344
3345enum gcn_builtin_type_index
3346{
3347 GCN_BTI_END_OF_PARAMS,
3348
3349 GCN_BTI_VOID,
3350 GCN_BTI_BOOL,
3351 GCN_BTI_INT,
3352 GCN_BTI_UINT,
3353 GCN_BTI_SIZE_T,
3354 GCN_BTI_LLINT,
3355 GCN_BTI_LLUINT,
3356 GCN_BTI_EXEC,
3357
3358 GCN_BTI_SF,
3359 GCN_BTI_V64SI,
3360 GCN_BTI_V64SF,
3361 GCN_BTI_V64PTR,
3362 GCN_BTI_SIPTR,
3363 GCN_BTI_SFPTR,
3364 GCN_BTI_VOIDPTR,
3365
3366 GCN_BTI_LDS_VOIDPTR,
3367
3368 GCN_BTI_MAX
3369};
3370
3371static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX];
3372
3373#define exec_type_node (gcn_builtin_types[GCN_BTI_EXEC])
3374#define sf_type_node (gcn_builtin_types[GCN_BTI_SF])
3375#define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI])
3376#define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF])
3377#define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR])
3378#define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR])
3379#define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR])
3380#define voidptr_type_node (gcn_builtin_types[GCN_BTI_VOIDPTR])
3381#define size_t_type_node (gcn_builtin_types[GCN_BTI_SIZE_T])
3382
3383static rtx gcn_expand_builtin_1 (tree, rtx, rtx, machine_mode, int,
3384 struct gcn_builtin_description *);
3385static rtx gcn_expand_builtin_binop (tree, rtx, rtx, machine_mode, int,
3386 struct gcn_builtin_description *);
3387
3388struct gcn_builtin_description;
3389typedef rtx (*gcn_builtin_expander) (tree, rtx, rtx, machine_mode, int,
3390 struct gcn_builtin_description *);
3391
3392enum gcn_builtin_type
3393{
3394 B_UNIMPLEMENTED, /* Sorry out */
3395 B_INSN, /* Emit a pattern */
3396 B_OVERLOAD /* Placeholder for an overloaded function */
3397};
3398
3399struct gcn_builtin_description
3400{
3401 int fcode;
3402 int icode;
3403 const char *name;
3404 enum gcn_builtin_type type;
3405 /* The first element of parm is always the return type. The rest
3406 are a zero terminated list of parameters. */
3407 int parm[6];
3408 gcn_builtin_expander expander;
3409};
3410
3411/* Read in the GCN builtins from gcn-builtins.def. */
3412
3413extern GTY(()) struct gcn_builtin_description gcn_builtins[GCN_BUILTIN_MAX];
3414
3415struct gcn_builtin_description gcn_builtins[] = {
3416#define DEF_BUILTIN(fcode, icode, name, type, params, expander) \
3417 {GCN_BUILTIN_ ## fcode, icode, name, type, params, expander},
3418
3419#define DEF_BUILTIN_BINOP_INT_FP(fcode, ic, name) \
3420 {GCN_BUILTIN_ ## fcode ## _V64SI, \
3421 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int", B_INSN, \
3422 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3423 GCN_BTI_V64SI, GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop}, \
3424 {GCN_BUILTIN_ ## fcode ## _V64SI_unspec, \
3425 CODE_FOR_ ## ic ##v64si3_exec, name "_v64int_unspec", B_INSN, \
3426 {GCN_BTI_V64SI, GCN_BTI_EXEC, GCN_BTI_V64SI, GCN_BTI_V64SI, \
3427 GCN_BTI_END_OF_PARAMS}, gcn_expand_builtin_binop},
3428
3429#include "gcn-builtins.def"
3430#undef DEF_BUILTIN_BINOP_INT_FP
3431#undef DEF_BUILTIN
3432};
3433
3434static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
3435
3436/* Implement TARGET_BUILTIN_DECL.
3437
3438 Return the GCN builtin for CODE. */
3439
3440tree
3441gcn_builtin_decl (unsigned code, bool ARG_UNUSED (initialize_p))
3442{
3443 if (code >= GCN_BUILTIN_MAX)
3444 return error_mark_node;
3445
3446 return gcn_builtin_decls[code];
3447}
3448
3449/* Helper function for gcn_init_builtins. */
3450
3451static void
3452gcn_init_builtin_types (void)
3453{
3454 gcn_builtin_types[GCN_BTI_VOID] = void_type_node;
3455 gcn_builtin_types[GCN_BTI_BOOL] = boolean_type_node;
3456 gcn_builtin_types[GCN_BTI_INT] = intSI_type_node;
3457 gcn_builtin_types[GCN_BTI_UINT] = unsigned_type_for (intSI_type_node);
3458 gcn_builtin_types[GCN_BTI_SIZE_T] = size_type_node;
3459 gcn_builtin_types[GCN_BTI_LLINT] = intDI_type_node;
3460 gcn_builtin_types[GCN_BTI_LLUINT] = unsigned_type_for (intDI_type_node);
3461
3462 exec_type_node = unsigned_intDI_type_node;
3463 sf_type_node = float32_type_node;
3464 v64si_type_node = build_vector_type (intSI_type_node, 64);
3465 v64sf_type_node = build_vector_type (float_type_node, 64);
3466 v64ptr_type_node = build_vector_type (unsigned_intDI_type_node
3467 /*build_pointer_type
3468 (integer_type_node) */
3469 , 64);
3470 tree tmp = build_distinct_type_copy (intSI_type_node);
3471 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3472 siptr_type_node = build_pointer_type (tmp);
3473
3474 tmp = build_distinct_type_copy (float_type_node);
3475 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3476 sfptr_type_node = build_pointer_type (tmp);
3477
3478 tmp = build_distinct_type_copy (void_type_node);
3479 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_FLAT;
3480 voidptr_type_node = build_pointer_type (tmp);
3481
3482 tmp = build_distinct_type_copy (void_type_node);
3483 TYPE_ADDR_SPACE (tmp) = ADDR_SPACE_LDS;
3484 gcn_builtin_types[GCN_BTI_LDS_VOIDPTR] = build_pointer_type (tmp);
3485}
3486
3487/* Implement TARGET_INIT_BUILTINS.
3488
3489 Set up all builtin functions for this target. */
3490
3491static void
3492gcn_init_builtins (void)
3493{
3494 gcn_init_builtin_types ();
3495
3496 struct gcn_builtin_description *d;
3497 unsigned int i;
3498 for (i = 0, d = gcn_builtins; i < GCN_BUILTIN_MAX; i++, d++)
3499 {
3500 tree p;
3501 char name[64]; /* build_function will make a copy. */
3502 int parm;
3503
3504 /* FIXME: Is this necessary/useful? */
3505 if (d->name == 0)
3506 continue;
3507
3508 /* Find last parm. */
3509 for (parm = 1; d->parm[parm] != GCN_BTI_END_OF_PARAMS; parm++)
3510 ;
3511
3512 p = void_list_node;
3513 while (parm > 1)
3514 p = tree_cons (NULL_TREE, gcn_builtin_types[d->parm[--parm]], p);
3515
3516 p = build_function_type (gcn_builtin_types[d->parm[0]], p);
3517
3518 sprintf (name, "__builtin_gcn_%s", d->name);
3519 gcn_builtin_decls[i]
3520 = add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
3521
3522 /* These builtins don't throw. */
3523 TREE_NOTHROW (gcn_builtin_decls[i]) = 1;
3524 }
3525
3526/* FIXME: remove the ifdef once OpenACC support is merged upstream. */
3527#ifdef BUILT_IN_GOACC_SINGLE_START
3528 /* These builtins need to take/return an LDS pointer: override the generic
3529 versions here. */
3530
3531 set_builtin_decl (BUILT_IN_GOACC_SINGLE_START,
3532 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_START], false);
3533
3534 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_START,
3535 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_START],
3536 false);
3537
3538 set_builtin_decl (BUILT_IN_GOACC_SINGLE_COPY_END,
3539 gcn_builtin_decls[GCN_BUILTIN_ACC_SINGLE_COPY_END],
3540 false);
3541
3542 set_builtin_decl (BUILT_IN_GOACC_BARRIER,
3543 gcn_builtin_decls[GCN_BUILTIN_ACC_BARRIER], false);
3544#endif
3545}
3546
3547/* Expand the CMP_SWAP GCN builtins. We have our own versions that do
3548 not require taking the address of any object, other than the memory
3549 cell being operated on.
3550
3551 Helper function for gcn_expand_builtin_1. */
3552
3553static rtx
3554gcn_expand_cmp_swap (tree exp, rtx target)
3555{
3556 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3557 addr_space_t as
3558 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (CALL_EXPR_ARG (exp, 0))));
3559 machine_mode as_mode = gcn_addr_space_address_mode (as);
3560
3561 if (!target)
3562 target = gen_reg_rtx (mode);
3563
3564 rtx addr = expand_expr (CALL_EXPR_ARG (exp, 0),
3565 NULL_RTX, as_mode, EXPAND_NORMAL);
3566 rtx cmp = expand_expr (CALL_EXPR_ARG (exp, 1),
3567 NULL_RTX, mode, EXPAND_NORMAL);
3568 rtx src = expand_expr (CALL_EXPR_ARG (exp, 2),
3569 NULL_RTX, mode, EXPAND_NORMAL);
3570 rtx pat;
3571
3572 rtx mem = gen_rtx_MEM (mode, force_reg (as_mode, addr));
3573 set_mem_addr_space (mem, as);
3574
3575 if (!REG_P (cmp))
3576 cmp = copy_to_mode_reg (mode, cmp);
3577 if (!REG_P (src))
3578 src = copy_to_mode_reg (mode, src);
3579
3580 if (mode == SImode)
3581 pat = gen_sync_compare_and_swapsi (target, mem, cmp, src);
3582 else
3583 pat = gen_sync_compare_and_swapdi (target, mem, cmp, src);
3584
3585 emit_insn (pat);
3586
3587 return target;
3588}
3589
3590/* Expand many different builtins.
3591
3592 Intended for use in gcn-builtins.def. */
3593
3594static rtx
3595gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
3596 machine_mode /*mode */ , int ignore,
3597 struct gcn_builtin_description *)
3598{
3599 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 3600 switch (DECL_MD_FUNCTION_CODE (fndecl))
5326695a
AS
3601 {
3602 case GCN_BUILTIN_FLAT_LOAD_INT32:
3603 {
3604 if (ignore)
3605 return target;
3606 /*rtx exec = */
3607 force_reg (DImode,
3608 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3609 EXPAND_NORMAL));
3610 /*rtx ptr = */
3611 force_reg (V64DImode,
3612 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64DImode,
3613 EXPAND_NORMAL));
3614 /*emit_insn (gen_vector_flat_loadv64si
3615 (target, gcn_gen_undef (V64SImode), ptr, exec)); */
3616 return target;
3617 }
3618 case GCN_BUILTIN_FLAT_LOAD_PTR_INT32:
3619 case GCN_BUILTIN_FLAT_LOAD_PTR_FLOAT:
3620 {
3621 if (ignore)
3622 return target;
3623 rtx exec = force_reg (DImode,
3624 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3625 DImode,
3626 EXPAND_NORMAL));
3627 rtx ptr = force_reg (DImode,
3628 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3629 V64DImode,
3630 EXPAND_NORMAL));
3631 rtx offsets = force_reg (V64SImode,
3632 expand_expr (CALL_EXPR_ARG (exp, 2),
3633 NULL_RTX, V64DImode,
3634 EXPAND_NORMAL));
3635 rtx addrs = gen_reg_rtx (V64DImode);
3636 rtx tmp = gen_reg_rtx (V64SImode);
3637 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3638 GEN_INT (2),
3639 gcn_gen_undef (V64SImode), exec));
3640 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3641 gcn_gen_undef (V64DImode),
3642 exec));
3643 rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
3644 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3645 /* FIXME: set attributes. */
3646 emit_insn (gen_mov_with_exec (target, mem, exec));
3647 return target;
3648 }
3649 case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
3650 case GCN_BUILTIN_FLAT_STORE_PTR_FLOAT:
3651 {
3652 rtx exec = force_reg (DImode,
3653 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3654 DImode,
3655 EXPAND_NORMAL));
3656 rtx ptr = force_reg (DImode,
3657 expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
3658 V64DImode,
3659 EXPAND_NORMAL));
3660 rtx offsets = force_reg (V64SImode,
3661 expand_expr (CALL_EXPR_ARG (exp, 2),
3662 NULL_RTX, V64DImode,
3663 EXPAND_NORMAL));
3664 machine_mode vmode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (exp,
3665 3)));
3666 rtx val = force_reg (vmode,
3667 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3668 vmode,
3669 EXPAND_NORMAL));
3670 rtx addrs = gen_reg_rtx (V64DImode);
3671 rtx tmp = gen_reg_rtx (V64SImode);
3672 emit_insn (gen_ashlv64si3_exec (tmp, offsets,
3673 GEN_INT (2),
3674 gcn_gen_undef (V64SImode), exec));
3675 emit_insn (gen_addv64di3_zext_dup2_exec (addrs, tmp, ptr,
3676 gcn_gen_undef (V64DImode),
3677 exec));
3678 rtx mem = gen_rtx_MEM (vmode, addrs);
3679 /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
3680 /* FIXME: set attributes. */
3681 emit_insn (gen_mov_with_exec (mem, val, exec));
3682 return target;
3683 }
3684 case GCN_BUILTIN_SQRTVF:
3685 {
3686 if (ignore)
3687 return target;
3688 rtx exec = gcn_full_exec_reg ();
3689 rtx arg = force_reg (V64SFmode,
3690 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3691 V64SFmode,
3692 EXPAND_NORMAL));
3693 emit_insn (gen_sqrtv64sf2_exec
3694 (target, arg, gcn_gen_undef (V64SFmode), exec));
3695 return target;
3696 }
3697 case GCN_BUILTIN_SQRTF:
3698 {
3699 if (ignore)
3700 return target;
3701 rtx arg = force_reg (SFmode,
3702 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3703 SFmode,
3704 EXPAND_NORMAL));
3705 emit_insn (gen_sqrtsf2 (target, arg));
3706 return target;
3707 }
3708 case GCN_BUILTIN_OMP_DIM_SIZE:
3709 {
3710 if (ignore)
3711 return target;
3712 emit_insn (gen_oacc_dim_size (target,
3713 expand_expr (CALL_EXPR_ARG (exp, 0),
3714 NULL_RTX, SImode,
3715 EXPAND_NORMAL)));
3716 return target;
3717 }
3718 case GCN_BUILTIN_OMP_DIM_POS:
3719 {
3720 if (ignore)
3721 return target;
3722 emit_insn (gen_oacc_dim_pos (target,
3723 expand_expr (CALL_EXPR_ARG (exp, 0),
3724 NULL_RTX, SImode,
3725 EXPAND_NORMAL)));
3726 return target;
3727 }
3728 case GCN_BUILTIN_CMP_SWAP:
3729 case GCN_BUILTIN_CMP_SWAPLL:
3730 return gcn_expand_cmp_swap (exp, target);
3731
3732 case GCN_BUILTIN_ACC_SINGLE_START:
3733 {
3734 if (ignore)
3735 return target;
3736
3737 rtx wavefront = gcn_oacc_dim_pos (1);
3738 rtx cond = gen_rtx_EQ (VOIDmode, wavefront, const0_rtx);
3739 rtx cc = (target && REG_P (target)) ? target : gen_reg_rtx (BImode);
3740 emit_insn (gen_cstoresi4 (cc, cond, wavefront, const0_rtx));
3741 return cc;
3742 }
3743
3744 case GCN_BUILTIN_ACC_SINGLE_COPY_START:
3745 {
3746 rtx blk = force_reg (SImode,
3747 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
3748 SImode, EXPAND_NORMAL));
3749 rtx wavefront = gcn_oacc_dim_pos (1);
3750 rtx cond = gen_rtx_NE (VOIDmode, wavefront, const0_rtx);
3751 rtx not_zero = gen_label_rtx ();
3752 emit_insn (gen_cbranchsi4 (cond, wavefront, const0_rtx, not_zero));
3753 emit_move_insn (blk, const0_rtx);
3754 emit_label (not_zero);
3755 return blk;
3756 }
3757
3758 case GCN_BUILTIN_ACC_SINGLE_COPY_END:
3759 return target;
3760
3761 case GCN_BUILTIN_ACC_BARRIER:
3762 emit_insn (gen_gcn_wavefront_barrier ());
3763 return target;
3764
3765 default:
3766 gcc_unreachable ();
3767 }
3768}
3769
3770/* Expansion of simple arithmetic and bit binary operation builtins.
3771
3772 Intended for use with gcn_builtins table. */
3773
3774static rtx
3775gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
3776 machine_mode /*mode */ , int ignore,
3777 struct gcn_builtin_description *d)
3778{
3779 int icode = d->icode;
3780 if (ignore)
3781 return target;
3782
3783 rtx exec = force_reg (DImode,
3784 expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, DImode,
3785 EXPAND_NORMAL));
3786
3787 machine_mode m1 = insn_data[icode].operand[1].mode;
3788 rtx arg1 = expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, m1,
3789 EXPAND_NORMAL);
3790 if (!insn_data[icode].operand[1].predicate (arg1, m1))
3791 arg1 = force_reg (m1, arg1);
3792
3793 machine_mode m2 = insn_data[icode].operand[2].mode;
3794 rtx arg2 = expand_expr (CALL_EXPR_ARG (exp, 2), NULL_RTX, m2,
3795 EXPAND_NORMAL);
3796 if (!insn_data[icode].operand[2].predicate (arg2, m2))
3797 arg2 = force_reg (m2, arg2);
3798
3799 rtx arg_prev;
3800 if (call_expr_nargs (exp) == 4)
3801 {
3802 machine_mode m_prev = insn_data[icode].operand[4].mode;
3803 arg_prev = force_reg (m_prev,
3804 expand_expr (CALL_EXPR_ARG (exp, 3), NULL_RTX,
3805 m_prev, EXPAND_NORMAL));
3806 }
3807 else
3808 arg_prev = gcn_gen_undef (GET_MODE (target));
3809
3810 rtx pat = GEN_FCN (icode) (target, arg1, arg2, exec, arg_prev);
3811 emit_insn (pat);
3812 return target;
3813}
3814
3815/* Implement TARGET_EXPAND_BUILTIN.
3816
3817 Expand an expression EXP that calls a built-in function, with result going
3818 to TARGET if that's convenient (and in mode MODE if that's convenient).
3819 SUBTARGET may be used as the target for computing one of EXP's operands.
3820 IGNORE is nonzero if the value is to be ignored. */
3821
3822rtx
3823gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
3824 int ignore)
3825{
3826 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
4d732405 3827 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
5326695a
AS
3828 struct gcn_builtin_description *d;
3829
3830 gcc_assert (fcode < GCN_BUILTIN_MAX);
3831 d = &gcn_builtins[fcode];
3832
3833 if (d->type == B_UNIMPLEMENTED)
3834 sorry ("Builtin not implemented");
3835
3836 return d->expander (exp, target, subtarget, mode, ignore, d);
3837}
3838
3839/* }}} */
3840/* {{{ Vectorization. */
3841
3842/* Implement TARGET_VECTORIZE_GET_MASK_MODE.
3843
3844 A vector mask is a value that holds one boolean result for every element in
3845 a vector. */
3846
3847opt_machine_mode
10116ec1 3848gcn_vectorize_get_mask_mode (machine_mode)
5326695a
AS
3849{
3850 /* GCN uses a DImode bit-mask. */
3851 return DImode;
3852}
3853
3854/* Return an RTX that references a vector with the i-th lane containing
3855 PERM[i]*4.
3856
3857 Helper function for gcn_vectorize_vec_perm_const. */
3858
3859static rtx
3860gcn_make_vec_perm_address (unsigned int *perm)
3861{
3862 rtx x = gen_reg_rtx (V64SImode);
3863 emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
3864
3865 /* Permutation addresses use byte addressing. With each vector lane being
3866 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
3867 so only set those.
3868
3869 The permutation given to the vec_perm* patterns range from 0 to 2N-1 to
3870 select between lanes in two vectors, but as the DS_BPERMUTE* instructions
3871 only take one source vector, the most-significant bit can be ignored
3872 here. Instead, we can use EXEC masking to select the relevant part of
3873 each source vector after they are permuted separately. */
3874 uint64_t bit_mask = 1 << 2;
3875 for (int i = 2; i < 8; i++, bit_mask <<= 1)
3876 {
3877 uint64_t exec_mask = 0;
3878 uint64_t lane_mask = 1;
3879 for (int j = 0; j < 64; j++, lane_mask <<= 1)
3880 if ((perm[j] * 4) & bit_mask)
3881 exec_mask |= lane_mask;
3882
3883 if (exec_mask)
3884 emit_insn (gen_addv64si3_exec (x, x,
3885 gcn_vec_constant (V64SImode,
3886 bit_mask),
3887 x, get_exec (exec_mask)));
3888 }
3889
3890 return x;
3891}
3892
3893/* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
3894
3895 Return true if permutation with SEL is possible.
3896
3897 If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
3898 permutations. */
3899
3900static bool
3901gcn_vectorize_vec_perm_const (machine_mode vmode, rtx dst,
3902 rtx src0, rtx src1,
3903 const vec_perm_indices & sel)
3904{
3905 unsigned int nelt = GET_MODE_NUNITS (vmode);
3906
3907 gcc_assert (VECTOR_MODE_P (vmode));
3908 gcc_assert (nelt <= 64);
3909 gcc_assert (sel.length () == nelt);
3910
3911 if (!dst)
3912 {
3913 /* All vector permutations are possible on this architecture,
3914 with varying degrees of efficiency depending on the permutation. */
3915 return true;
3916 }
3917
3918 unsigned int perm[64];
3919 for (unsigned int i = 0; i < nelt; ++i)
3920 perm[i] = sel[i] & (2 * nelt - 1);
3921
3922 /* Make life a bit easier by swapping operands if necessary so that
3923 the first element always comes from src0. */
3924 if (perm[0] >= nelt)
3925 {
3926 rtx temp = src0;
3927 src0 = src1;
3928 src1 = temp;
3929
3930 for (unsigned int i = 0; i < nelt; ++i)
3931 if (perm[i] < nelt)
3932 perm[i] += nelt;
3933 else
3934 perm[i] -= nelt;
3935 }
3936
3937 /* TODO: There are more efficient ways to implement certain permutations
3938 using ds_swizzle_b32 and/or DPP. Test for and expand them here, before
3939 this more inefficient generic approach is used. */
3940
3941 int64_t src1_lanes = 0;
3942 int64_t lane_bit = 1;
3943
3944 for (unsigned int i = 0; i < nelt; ++i, lane_bit <<= 1)
3945 {
3946 /* Set the bits for lanes from src1. */
3947 if (perm[i] >= nelt)
3948 src1_lanes |= lane_bit;
3949 }
3950
3951 rtx addr = gcn_make_vec_perm_address (perm);
3952 rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
3953
3954 switch (vmode)
3955 {
3956 case E_V64QImode:
3957 ds_bpermute = gen_ds_bpermutev64qi;
3958 break;
3959 case E_V64HImode:
3960 ds_bpermute = gen_ds_bpermutev64hi;
3961 break;
3962 case E_V64SImode:
3963 ds_bpermute = gen_ds_bpermutev64si;
3964 break;
3965 case E_V64HFmode:
3966 ds_bpermute = gen_ds_bpermutev64hf;
3967 break;
3968 case E_V64SFmode:
3969 ds_bpermute = gen_ds_bpermutev64sf;
3970 break;
3971 case E_V64DImode:
3972 ds_bpermute = gen_ds_bpermutev64di;
3973 break;
3974 case E_V64DFmode:
3975 ds_bpermute = gen_ds_bpermutev64df;
3976 break;
3977 default:
3978 gcc_assert (false);
3979 }
3980
3981 /* Load elements from src0 to dst. */
3982 gcc_assert (~src1_lanes);
3983 emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
3984
3985 /* Load elements from src1 to dst. */
3986 if (src1_lanes)
3987 {
3988 /* Masking a lane masks both the destination and source lanes for
3989 DS_BPERMUTE, so we need to have all lanes enabled for the permute,
3990 then add an extra masked move to merge the results of permuting
3991 the two source vectors together.
3992 */
3993 rtx tmp = gen_reg_rtx (vmode);
3994 emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
3995 emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
3996 }
3997
3998 return true;
3999}
4000
4001/* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
4002
4003 Return nonzero if vector MODE is supported with at least move
4004 instructions. */
4005
4006static bool
4007gcn_vector_mode_supported_p (machine_mode mode)
4008{
2b99bed8
AS
4009 return (mode == V64QImode || mode == V64HImode
4010 || mode == V64SImode || mode == V64DImode
5326695a
AS
4011 || mode == V64SFmode || mode == V64DFmode);
4012}
4013
4014/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
4015
4016 Enables autovectorization for all supported modes. */
4017
4018static machine_mode
4019gcn_vectorize_preferred_simd_mode (scalar_mode mode)
4020{
4021 switch (mode)
4022 {
4023 case E_QImode:
4024 return V64QImode;
4025 case E_HImode:
4026 return V64HImode;
4027 case E_SImode:
4028 return V64SImode;
4029 case E_DImode:
4030 return V64DImode;
4031 case E_SFmode:
4032 return V64SFmode;
4033 case E_DFmode:
4034 return V64DFmode;
4035 default:
4036 return word_mode;
4037 }
4038}
4039
2b99bed8
AS
4040/* Implement TARGET_VECTORIZE_RELATED_MODE.
4041
4042 All GCN vectors are 64-lane, so this is simpler than other architectures.
4043 In particular, we do *not* want to match vector bit-size. */
4044
4045static opt_machine_mode
4046gcn_related_vector_mode (machine_mode vector_mode, scalar_mode element_mode,
4047 poly_uint64 nunits)
4048{
4049 if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
4050 return VOIDmode;
4051
4052 machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
4053 if (!VECTOR_MODE_P (pref_mode))
4054 return VOIDmode;
4055
4056 return pref_mode;
4057}
4058
5326695a
AS
4059/* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
4060
4061 Returns the preferred alignment in bits for accesses to vectors of type type
4062 in vectorized code. This might be less than or greater than the ABI-defined
4063 value returned by TARGET_VECTOR_ALIGNMENT. It can be equal to the alignment
4064 of a single element, in which case the vectorizer will not try to optimize
4065 for alignment. */
4066
4067static poly_uint64
4068gcn_preferred_vector_alignment (const_tree type)
4069{
4070 return TYPE_ALIGN (TREE_TYPE (type));
4071}
4072
4073/* Implement TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT.
4074
4075 Return true if the target supports misaligned vector store/load of a
4076 specific factor denoted in the misalignment parameter. */
4077
4078static bool
4079gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
4080 const_tree type, int misalignment,
4081 bool is_packed)
4082{
4083 if (is_packed)
4084 return false;
4085
4086 /* If the misalignment is unknown, we should be able to handle the access
4087 so long as it is not to a member of a packed data structure. */
4088 if (misalignment == -1)
4089 return true;
4090
4091 /* Return true if the misalignment is a multiple of the natural alignment
4092 of the vector's element type. This is probably always going to be
4093 true in practice, since we've already established that this isn't a
4094 packed access. */
4095 return misalignment % TYPE_ALIGN_UNIT (type) == 0;
4096}
4097
4098/* Implement TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE.
4099
4100 Return true if vector alignment is reachable (by peeling N iterations) for
4101 the given scalar type TYPE. */
4102
4103static bool
4104gcn_vector_alignment_reachable (const_tree ARG_UNUSED (type), bool is_packed)
4105{
4106 /* Vectors which aren't in packed structures will not be less aligned than
4107 the natural alignment of their element type, so this is safe. */
4108 return !is_packed;
4109}
4110
4111/* Generate DPP instructions used for vector reductions.
4112
4113 The opcode is given by INSN.
4114 The first operand of the operation is shifted right by SHIFT vector lanes.
4115 SHIFT must be a power of 2. If SHIFT is 16, the 15th lane of each row is
4116 broadcast the next row (thereby acting like a shift of 16 for the end of
4117 each row). If SHIFT is 32, lane 31 is broadcast to all the
4118 following lanes (thereby acting like a shift of 32 for lane 63). */
4119
4120char *
4121gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
4122 int unspec, int shift)
4123{
4124 static char buf[64];
4125 const char *dpp;
4126 const char *vcc_in = "";
4127 const char *vcc_out = "";
4128
4129 /* Add the vcc operand if needed. */
4130 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
4131 {
4132 if (unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4133 vcc_in = ", vcc";
4134
4135 if (unspec == UNSPEC_PLUS_CARRY_DPP_SHR
4136 || unspec == UNSPEC_PLUS_CARRY_IN_DPP_SHR)
4137 vcc_out = ", vcc";
4138 }
4139
4140 /* Add the DPP modifiers. */
4141 switch (shift)
4142 {
4143 case 1:
4144 dpp = "row_shr:1 bound_ctrl:0";
4145 break;
4146 case 2:
4147 dpp = "row_shr:2 bound_ctrl:0";
4148 break;
4149 case 4:
4150 dpp = "row_shr:4 bank_mask:0xe";
4151 break;
4152 case 8:
4153 dpp = "row_shr:8 bank_mask:0xc";
4154 break;
4155 case 16:
4156 dpp = "row_bcast:15 row_mask:0xa";
4157 break;
4158 case 32:
4159 dpp = "row_bcast:31 row_mask:0xc";
4160 break;
4161 default:
4162 gcc_unreachable ();
4163 }
4164
4165 sprintf (buf, "%s\t%%0%s, %%1, %%2%s %s", insn, vcc_out, vcc_in, dpp);
4166
4167 return buf;
4168}
4169
4170/* Generate vector reductions in terms of DPP instructions.
4171
4172 The vector register SRC of mode MODE is reduced using the operation given
4173 by UNSPEC, and the scalar result is returned in lane 63 of a vector
4174 register. */
4175
4176rtx
4177gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
4178{
4179 rtx tmp = gen_reg_rtx (mode);
4180 bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
4181 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
4182 && (TARGET_GCN3 || mode == V64DImode);
4183
4184 if (use_plus_carry)
4185 unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
4186
4187 /* Perform reduction by first performing the reduction operation on every
4188 pair of lanes, then on every pair of results from the previous
4189 iteration (thereby effectively reducing every 4 lanes) and so on until
4190 all lanes are reduced. */
4191 for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
4192 {
4193 rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
4194 rtx insn = gen_rtx_SET (tmp,
4195 gen_rtx_UNSPEC (mode,
4196 gen_rtvec (3,
4197 src, src, shift_val),
4198 unspec));
4199
4200 /* Add clobber for instructions that set the carry flags. */
4201 if (use_plus_carry)
4202 {
4203 rtx clobber = gen_rtx_CLOBBER (VOIDmode,
4204 gen_rtx_REG (DImode, VCC_REG));
4205 insn = gen_rtx_PARALLEL (VOIDmode,
4206 gen_rtvec (2, insn, clobber));
4207 }
4208
4209 emit_insn (insn);
4210
4211 /* The source operands for every iteration after the first
4212 should be TMP. */
4213 src = tmp;
4214 }
4215
4216 return tmp;
4217}
4218
4219/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST. */
4220
4221int
4222gcn_vectorization_cost (enum vect_cost_for_stmt ARG_UNUSED (type_of_cost),
4223 tree ARG_UNUSED (vectype), int ARG_UNUSED (misalign))
4224{
4225 /* Always vectorize. */
4226 return 1;
4227}
4228
4229/* }}} */
4230/* {{{ md_reorg pass. */
4231
4232/* Identify VMEM instructions from their "type" attribute. */
4233
4234static bool
4235gcn_vmem_insn_p (attr_type type)
4236{
4237 switch (type)
4238 {
4239 case TYPE_MUBUF:
4240 case TYPE_MTBUF:
4241 case TYPE_FLAT:
4242 return true;
4243 case TYPE_UNKNOWN:
4244 case TYPE_SOP1:
4245 case TYPE_SOP2:
4246 case TYPE_SOPK:
4247 case TYPE_SOPC:
4248 case TYPE_SOPP:
4249 case TYPE_SMEM:
4250 case TYPE_DS:
4251 case TYPE_VOP2:
4252 case TYPE_VOP1:
4253 case TYPE_VOPC:
4254 case TYPE_VOP3A:
4255 case TYPE_VOP3B:
4256 case TYPE_VOP_SDWA:
4257 case TYPE_VOP_DPP:
4258 case TYPE_MULT:
4259 case TYPE_VMULT:
4260 return false;
4261 }
4262 gcc_unreachable ();
4263 return false;
4264}
4265
4266/* If INSN sets the EXEC register to a constant value, return the value,
4267 otherwise return zero. */
4268
4269static int64_t
4270gcn_insn_exec_value (rtx_insn *insn)
4271{
4272 if (!NONDEBUG_INSN_P (insn))
4273 return 0;
4274
4275 rtx pattern = PATTERN (insn);
4276
4277 if (GET_CODE (pattern) == SET)
4278 {
4279 rtx dest = XEXP (pattern, 0);
4280 rtx src = XEXP (pattern, 1);
4281
4282 if (GET_MODE (dest) == DImode
4283 && REG_P (dest) && REGNO (dest) == EXEC_REG
4284 && CONST_INT_P (src))
4285 return INTVAL (src);
4286 }
4287
4288 return 0;
4289}
4290
4291/* Sets the EXEC register before INSN to the value that it had after
4292 LAST_EXEC_DEF. The constant value of the EXEC register is returned if
4293 known, otherwise it returns zero. */
4294
4295static int64_t
4296gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
4297 bool curr_exec_known, bool &last_exec_def_saved)
4298{
4299 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
4300 rtx exec;
4301
4302 int64_t exec_value = gcn_insn_exec_value (last_exec_def);
4303
4304 if (exec_value)
4305 {
4306 /* If the EXEC value is a constant and it happens to be the same as the
4307 current EXEC value, the restore can be skipped. */
4308 if (curr_exec_known && exec_value == curr_exec)
4309 return exec_value;
4310
4311 exec = GEN_INT (exec_value);
4312 }
4313 else
4314 {
4315 /* If the EXEC value is not a constant, save it in a register after the
4316 point of definition. */
4317 rtx exec_save_reg = gen_rtx_REG (DImode, EXEC_SAVE_REG);
4318
4319 if (!last_exec_def_saved)
4320 {
4321 start_sequence ();
4322 emit_move_insn (exec_save_reg, exec_reg);
4323 rtx_insn *seq = get_insns ();
4324 end_sequence ();
4325
4326 emit_insn_after (seq, last_exec_def);
4327 if (dump_file && (dump_flags & TDF_DETAILS))
4328 fprintf (dump_file, "Saving EXEC after insn %d.\n",
4329 INSN_UID (last_exec_def));
4330
4331 last_exec_def_saved = true;
4332 }
4333
4334 exec = exec_save_reg;
4335 }
4336
4337 /* Restore EXEC register before the usage. */
4338 start_sequence ();
4339 emit_move_insn (exec_reg, exec);
4340 rtx_insn *seq = get_insns ();
4341 end_sequence ();
4342 emit_insn_before (seq, insn);
4343
4344 if (dump_file && (dump_flags & TDF_DETAILS))
4345 {
4346 if (exec_value)
4347 fprintf (dump_file, "Restoring EXEC to %ld before insn %d.\n",
4348 exec_value, INSN_UID (insn));
4349 else
4350 fprintf (dump_file,
4351 "Restoring EXEC from saved value before insn %d.\n",
4352 INSN_UID (insn));
4353 }
4354
4355 return exec_value;
4356}
4357
4358/* Implement TARGET_MACHINE_DEPENDENT_REORG.
4359
4360 Ensure that pipeline dependencies and lane masking are set correctly. */
4361
4362static void
4363gcn_md_reorg (void)
4364{
4365 basic_block bb;
4366 rtx exec_reg = gen_rtx_REG (DImode, EXEC_REG);
5326695a
AS
4367 regset_head live;
4368
4369 INIT_REG_SET (&live);
4370
4371 compute_bb_for_insn ();
4372
4373 if (!optimize)
4374 {
4375 split_all_insns ();
4376 if (dump_file && (dump_flags & TDF_DETAILS))
4377 {
4378 fprintf (dump_file, "After split:\n");
4379 print_rtl_with_bb (dump_file, get_insns (), dump_flags);
4380 }
4381
4382 /* Update data-flow information for split instructions. */
4383 df_insn_rescan_all ();
4384 }
4385
4386 df_analyze ();
4387
4388 /* This pass ensures that the EXEC register is set correctly, according
4389 to the "exec" attribute. However, care must be taken so that the
4390 value that reaches explicit uses of the EXEC register remains the
4391 same as before.
4392 */
4393
4394 FOR_EACH_BB_FN (bb, cfun)
4395 {
4396 if (dump_file && (dump_flags & TDF_DETAILS))
4397 fprintf (dump_file, "BB %d:\n", bb->index);
4398
4399 rtx_insn *insn, *curr;
4400 rtx_insn *last_exec_def = BB_HEAD (bb);
4401 bool last_exec_def_saved = false;
4402 bool curr_exec_explicit = true;
4403 bool curr_exec_known = true;
4404 int64_t curr_exec = 0; /* 0 here means 'the value is that of EXEC
4405 after last_exec_def is executed'. */
4406
4407 FOR_BB_INSNS_SAFE (bb, insn, curr)
4408 {
4409 if (!NONDEBUG_INSN_P (insn))
4410 continue;
4411
4412 if (GET_CODE (PATTERN (insn)) == USE
4413 || GET_CODE (PATTERN (insn)) == CLOBBER)
4414 continue;
4415
4416 HARD_REG_SET defs, uses;
4417 CLEAR_HARD_REG_SET (defs);
4418 CLEAR_HARD_REG_SET (uses);
e8448ba5 4419 note_stores (insn, record_hard_reg_sets, &defs);
5326695a
AS
4420 note_uses (&PATTERN (insn), record_hard_reg_uses, &uses);
4421
4422 bool exec_lo_def_p = TEST_HARD_REG_BIT (defs, EXEC_LO_REG);
4423 bool exec_hi_def_p = TEST_HARD_REG_BIT (defs, EXEC_HI_REG);
4424 bool exec_used = (hard_reg_set_intersect_p
4425 (uses, reg_class_contents[(int) EXEC_MASK_REG])
4426 || TEST_HARD_REG_BIT (uses, EXECZ_REG));
4427
4428 /* Check the instruction for implicit setting of EXEC via an
4429 attribute. */
4430 attr_exec exec_attr = get_attr_exec (insn);
4431 int64_t new_exec;
4432
4433 switch (exec_attr)
4434 {
4435 case EXEC_NONE:
4436 new_exec = 0;
4437 break;
4438
4439 case EXEC_SINGLE:
4440 /* Instructions that do not involve memory accesses only require
4441 bit 0 of EXEC to be set. */
4442 if (gcn_vmem_insn_p (get_attr_type (insn))
4443 || get_attr_type (insn) == TYPE_DS)
4444 new_exec = 1;
4445 else
4446 new_exec = curr_exec | 1;
4447 break;
4448
4449 case EXEC_FULL:
4450 new_exec = -1;
4451 break;
4452
4453 default: /* Auto-detect what setting is appropriate. */
4454 {
4455 new_exec = 0;
4456
4457 /* If EXEC is referenced explicitly then we don't need to do
4458 anything to set it, so we're done. */
4459 if (exec_used)
4460 break;
4461
4462 /* Scan the insn for VGPRs defs or uses. The mode determines
4463 what kind of exec is needed. */
4464 subrtx_iterator::array_type array;
4465 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
4466 {
4467 const_rtx x = *iter;
4468 if (REG_P (x) && VGPR_REGNO_P (REGNO (x)))
4469 {
4470 if (VECTOR_MODE_P (GET_MODE (x)))
4471 {
4472 new_exec = -1;
4473 break;
4474 }
4475 else
4476 new_exec = 1;
4477 }
4478 }
4479 }
4480 break;
4481 }
4482
4483 if (new_exec && (!curr_exec_known || new_exec != curr_exec))
4484 {
4485 start_sequence ();
4486 emit_move_insn (exec_reg, GEN_INT (new_exec));
4487 rtx_insn *seq = get_insns ();
4488 end_sequence ();
4489 emit_insn_before (seq, insn);
4490
4491 if (dump_file && (dump_flags & TDF_DETAILS))
4492 fprintf (dump_file, "Setting EXEC to %ld before insn %d.\n",
4493 new_exec, INSN_UID (insn));
4494
4495 curr_exec = new_exec;
4496 curr_exec_explicit = false;
4497 curr_exec_known = true;
4498 }
4499 else if (new_exec && dump_file && (dump_flags & TDF_DETAILS))
4500 {
4501 fprintf (dump_file, "Exec already is %ld before insn %d.\n",
4502 new_exec, INSN_UID (insn));
4503 }
4504
4505 /* The state of the EXEC register is unknown after a
4506 function call. */
4507 if (CALL_P (insn))
4508 curr_exec_known = false;
4509
4510 /* Handle explicit uses of EXEC. If the instruction is a partial
4511 explicit definition of EXEC, then treat it as an explicit use of
4512 EXEC as well. */
4513 if (exec_used || exec_lo_def_p != exec_hi_def_p)
4514 {
4515 /* An instruction that explicitly uses EXEC should not also
4516 implicitly define it. */
4517 gcc_assert (!exec_used || !new_exec);
4518
4519 if (!curr_exec_known || !curr_exec_explicit)
4520 {
4521 /* Restore the previous explicitly defined value. */
4522 curr_exec = gcn_restore_exec (insn, last_exec_def,
4523 curr_exec, curr_exec_known,
4524 last_exec_def_saved);
4525 curr_exec_explicit = true;
4526 curr_exec_known = true;
4527 }
4528 }
4529
4530 /* Handle explicit definitions of EXEC. */
4531 if (exec_lo_def_p || exec_hi_def_p)
4532 {
4533 last_exec_def = insn;
4534 last_exec_def_saved = false;
4535 curr_exec = gcn_insn_exec_value (insn);
4536 curr_exec_explicit = true;
4537 curr_exec_known = true;
4538
4539 if (dump_file && (dump_flags & TDF_DETAILS))
4540 fprintf (dump_file,
4541 "Found %s definition of EXEC at insn %d.\n",
4542 exec_lo_def_p == exec_hi_def_p ? "full" : "partial",
4543 INSN_UID (insn));
4544 }
4545 }
4546
4547 COPY_REG_SET (&live, DF_LR_OUT (bb));
4548 df_simulate_initialize_backwards (bb, &live);
4549
4550 /* If EXEC is live after the basic block, restore the value of EXEC
4551 at the end of the block. */
4552 if ((REGNO_REG_SET_P (&live, EXEC_LO_REG)
4553 || REGNO_REG_SET_P (&live, EXEC_HI_REG))
4554 && (!curr_exec_known || !curr_exec_explicit))
4555 {
4556 rtx_insn *end_insn = BB_END (bb);
4557
4558 /* If the instruction is not a jump instruction, do the restore
4559 after the last instruction in the basic block. */
4560 if (NONJUMP_INSN_P (end_insn))
4561 end_insn = NEXT_INSN (end_insn);
4562
4563 gcn_restore_exec (end_insn, last_exec_def, curr_exec,
4564 curr_exec_known, last_exec_def_saved);
4565 }
4566 }
4567
4568 CLEAR_REG_SET (&live);
4569
4570 /* "Manually Inserted Wait States (NOPs)."
4571
4572 GCN hardware detects most kinds of register dependencies, but there
4573 are some exceptions documented in the ISA manual. This pass
4574 detects the missed cases, and inserts the documented number of NOPs
4575 required for correct execution. */
4576
4577 const int max_waits = 5;
4578 struct ilist
4579 {
4580 rtx_insn *insn;
4581 attr_unit unit;
930c5599 4582 attr_delayeduse delayeduse;
5326695a 4583 HARD_REG_SET writes;
930c5599 4584 HARD_REG_SET reads;
5326695a
AS
4585 int age;
4586 } back[max_waits];
4587 int oldest = 0;
4588 for (int i = 0; i < max_waits; i++)
4589 back[i].insn = NULL;
4590
4591 rtx_insn *insn, *last_insn = NULL;
4592 for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
4593 {
4594 if (!NONDEBUG_INSN_P (insn))
4595 continue;
4596
4597 if (GET_CODE (PATTERN (insn)) == USE
4598 || GET_CODE (PATTERN (insn)) == CLOBBER)
4599 continue;
4600
4601 attr_type itype = get_attr_type (insn);
4602 attr_unit iunit = get_attr_unit (insn);
930c5599 4603 attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
5326695a
AS
4604 HARD_REG_SET ireads, iwrites;
4605 CLEAR_HARD_REG_SET (ireads);
4606 CLEAR_HARD_REG_SET (iwrites);
e8448ba5 4607 note_stores (insn, record_hard_reg_sets, &iwrites);
5326695a
AS
4608 note_uses (&PATTERN (insn), record_hard_reg_uses, &ireads);
4609
4610 /* Scan recent previous instructions for dependencies not handled in
4611 hardware. */
4612 int nops_rqd = 0;
4613 for (int i = oldest; i < oldest + max_waits; i++)
4614 {
4615 struct ilist *prev_insn = &back[i % max_waits];
4616
4617 if (!prev_insn->insn)
4618 continue;
4619
4620 /* VALU writes SGPR followed by VMEM reading the same SGPR
4621 requires 5 wait states. */
4622 if ((prev_insn->age + nops_rqd) < 5
4623 && prev_insn->unit == UNIT_VECTOR
4624 && gcn_vmem_insn_p (itype))
4625 {
dc333d8f 4626 HARD_REG_SET regs = prev_insn->writes & ireads;
5326695a
AS
4627 if (hard_reg_set_intersect_p
4628 (regs, reg_class_contents[(int) SGPR_REGS]))
4629 nops_rqd = 5 - prev_insn->age;
4630 }
4631
4632 /* VALU sets VCC/EXEC followed by VALU uses VCCZ/EXECZ
4633 requires 5 wait states. */
4634 if ((prev_insn->age + nops_rqd) < 5
4635 && prev_insn->unit == UNIT_VECTOR
4636 && iunit == UNIT_VECTOR
4637 && ((hard_reg_set_intersect_p
4638 (prev_insn->writes,
4639 reg_class_contents[(int) EXEC_MASK_REG])
4640 && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
4641 ||
4642 (hard_reg_set_intersect_p
4643 (prev_insn->writes,
4644 reg_class_contents[(int) VCC_CONDITIONAL_REG])
4645 && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
4646 nops_rqd = 5 - prev_insn->age;
4647
4648 /* VALU writes SGPR/VCC followed by v_{read,write}lane using
4649 SGPR/VCC as lane select requires 4 wait states. */
4650 if ((prev_insn->age + nops_rqd) < 4
4651 && prev_insn->unit == UNIT_VECTOR
4652 && get_attr_laneselect (insn) == LANESELECT_YES)
4653 {
dc333d8f 4654 HARD_REG_SET regs = prev_insn->writes & ireads;
5326695a
AS
4655 if (hard_reg_set_intersect_p
4656 (regs, reg_class_contents[(int) SGPR_REGS])
4657 || hard_reg_set_intersect_p
4658 (regs, reg_class_contents[(int) VCC_CONDITIONAL_REG]))
4659 nops_rqd = 4 - prev_insn->age;
4660 }
4661
4662 /* VALU writes VGPR followed by VALU_DPP reading that VGPR
4663 requires 2 wait states. */
4664 if ((prev_insn->age + nops_rqd) < 2
4665 && prev_insn->unit == UNIT_VECTOR
4666 && itype == TYPE_VOP_DPP)
4667 {
dc333d8f 4668 HARD_REG_SET regs = prev_insn->writes & ireads;
5326695a
AS
4669 if (hard_reg_set_intersect_p
4670 (regs, reg_class_contents[(int) VGPR_REGS]))
4671 nops_rqd = 2 - prev_insn->age;
4672 }
930c5599
AS
4673
4674 /* Store that requires input registers are not overwritten by
4675 following instruction. */
4676 if ((prev_insn->age + nops_rqd) < 1
4677 && prev_insn->delayeduse == DELAYEDUSE_YES
4678 && ((hard_reg_set_intersect_p
4679 (prev_insn->reads, iwrites))))
4680 nops_rqd = 1 - prev_insn->age;
5326695a
AS
4681 }
4682
4683 /* Insert the required number of NOPs. */
4684 for (int i = nops_rqd; i > 0; i--)
4685 emit_insn_after (gen_nop (), last_insn);
4686
4687 /* Age the previous instructions. We can also ignore writes to
4688 registers subsequently overwritten. */
4689 HARD_REG_SET written;
4690 CLEAR_HARD_REG_SET (written);
4691 for (int i = oldest + max_waits - 1; i > oldest; i--)
4692 {
4693 struct ilist *prev_insn = &back[i % max_waits];
4694
4695 /* Assume all instructions are equivalent to one "wait", the same
4696 as s_nop. This is probably true for SALU, but not VALU (which
4697 may take longer), so this is not optimal. However, AMD do
4698 not publish the cycle times for instructions. */
4699 prev_insn->age += 1 + nops_rqd;
4700
44942965 4701 written |= iwrites;
d15e5131 4702 prev_insn->writes &= ~written;
5326695a
AS
4703 }
4704
4705 /* Track the current instruction as a previous instruction. */
4706 back[oldest].insn = insn;
4707 back[oldest].unit = iunit;
930c5599 4708 back[oldest].delayeduse = idelayeduse;
6576d245
RS
4709 back[oldest].writes = iwrites;
4710 back[oldest].reads = ireads;
5326695a
AS
4711 back[oldest].age = 0;
4712 oldest = (oldest + 1) % max_waits;
4713
4714 last_insn = insn;
4715 }
4716}
4717
4718/* }}} */
4719/* {{{ OpenACC / OpenMP. */
4720
4721#define GCN_DEFAULT_GANGS 0 /* Choose at runtime. */
4722#define GCN_DEFAULT_WORKERS 0 /* Choose at runtime. */
4723#define GCN_DEFAULT_VECTORS 1 /* Use autovectorization only, for now. */
4724
4725/* Implement TARGET_GOACC_VALIDATE_DIMS.
4726
4727 Check the launch dimensions provided for an OpenACC compute
4728 region, or routine. */
4729
4730static bool
4731gcn_goacc_validate_dims (tree decl, int dims[], int fn_level,
4732 unsigned /*used*/)
4733{
4734 bool changed = false;
4735
4736 /* FIXME: remove -facc-experimental-workers when they're ready. */
4737 int max_workers = flag_worker_partitioning ? 16 : 1;
4738
fe22e0d4
AS
4739 gcc_assert (!flag_worker_partitioning);
4740
5326695a
AS
4741 /* The vector size must appear to be 64, to the user, unless this is a
4742 SEQ routine. The real, internal value is always 1, which means use
4743 autovectorization, but the user should not see that. */
4744 if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
4745 && dims[GOMP_DIM_VECTOR] >= 0)
4746 {
4747 if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0
4748 && dims[GOMP_DIM_VECTOR] != 64)
4749 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4750 OPT_Wopenacc_dims,
4751 (dims[GOMP_DIM_VECTOR]
4752 ? G_("using vector_length (64), ignoring %d")
4753 : G_("using vector_length (64), "
4754 "ignoring runtime setting")),
4755 dims[GOMP_DIM_VECTOR]);
4756 dims[GOMP_DIM_VECTOR] = 1;
4757 changed = true;
4758 }
4759
4760 /* Check the num workers is not too large. */
4761 if (dims[GOMP_DIM_WORKER] > max_workers)
4762 {
4763 warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION,
4764 OPT_Wopenacc_dims,
4765 "using num_workers (%d), ignoring %d",
4766 max_workers, dims[GOMP_DIM_WORKER]);
4767 dims[GOMP_DIM_WORKER] = max_workers;
4768 changed = true;
4769 }
4770
4771 /* Set global defaults. */
4772 if (!decl)
4773 {
4774 dims[GOMP_DIM_VECTOR] = GCN_DEFAULT_VECTORS;
4775 if (dims[GOMP_DIM_WORKER] < 0)
4776 dims[GOMP_DIM_WORKER] = (flag_worker_partitioning
4777 ? GCN_DEFAULT_WORKERS : 1);
4778 if (dims[GOMP_DIM_GANG] < 0)
4779 dims[GOMP_DIM_GANG] = GCN_DEFAULT_GANGS;
4780 changed = true;
4781 }
4782
4783 return changed;
4784}
4785
4786/* Helper function for oacc_dim_size instruction.
4787 Also used for OpenMP, via builtin_gcn_dim_size, and the omp_gcn pass. */
4788
4789rtx
4790gcn_oacc_dim_size (int dim)
4791{
4792 if (dim < 0 || dim > 2)
4793 error ("offload dimension out of range (%d)", dim);
4794
4795 /* Vectors are a special case. */
4796 if (dim == 2)
4797 return const1_rtx; /* Think of this as 1 times 64. */
4798
4799 static int offset[] = {
4800 /* Offsets into dispatch packet. */
4801 12, /* X dim = Gang / Team / Work-group. */
4802 20, /* Z dim = Worker / Thread / Wavefront. */
4803 16 /* Y dim = Vector / SIMD / Work-item. */
4804 };
4805 rtx addr = gen_rtx_PLUS (DImode,
4806 gen_rtx_REG (DImode,
4807 cfun->machine->args.
4808 reg[DISPATCH_PTR_ARG]),
4809 GEN_INT (offset[dim]));
4810 return gen_rtx_MEM (SImode, addr);
4811}
4812
4813/* Helper function for oacc_dim_pos instruction.
4814 Also used for OpenMP, via builtin_gcn_dim_pos, and the omp_gcn pass. */
4815
4816rtx
4817gcn_oacc_dim_pos (int dim)
4818{
4819 if (dim < 0 || dim > 2)
4820 error ("offload dimension out of range (%d)", dim);
4821
4822 static const int reg[] = {
4823 WORKGROUP_ID_X_ARG, /* Gang / Team / Work-group. */
4824 WORK_ITEM_ID_Z_ARG, /* Worker / Thread / Wavefront. */
4825 WORK_ITEM_ID_Y_ARG /* Vector / SIMD / Work-item. */
4826 };
4827
4828 int reg_num = cfun->machine->args.reg[reg[dim]];
4829
4830 /* The information must have been requested by the kernel. */
4831 gcc_assert (reg_num >= 0);
4832
4833 return gen_rtx_REG (SImode, reg_num);
4834}
4835
4836/* Implement TARGET_GOACC_FORK_JOIN. */
4837
4838static bool
4839gcn_fork_join (gcall *ARG_UNUSED (call), const int *ARG_UNUSED (dims),
4840 bool ARG_UNUSED (is_fork))
4841{
4842 /* GCN does not use the fork/join concept invented for NVPTX.
4843 Instead we use standard autovectorization. */
4844 return false;
4845}
4846
4847/* Implement ???????
4848 FIXME make this a real hook.
4849
4850 Adjust FNDECL such that options inherited from the host compiler
4851 are made appropriate for the accelerator compiler. */
4852
4853void
4854gcn_fixup_accel_lto_options (tree fndecl)
4855{
4856 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4857 if (!func_optimize)
4858 return;
4859
4860 tree old_optimize = build_optimization_node (&global_options);
4861 tree new_optimize;
4862
4863 /* If the function changed the optimization levels as well as
4864 setting target options, start with the optimizations
4865 specified. */
4866 if (func_optimize != old_optimize)
4867 cl_optimization_restore (&global_options,
4868 TREE_OPTIMIZATION (func_optimize));
4869
4870 gcn_option_override ();
4871
4872 /* The target attributes may also change some optimization flags,
4873 so update the optimization options if necessary. */
4874 new_optimize = build_optimization_node (&global_options);
4875
4876 if (old_optimize != new_optimize)
4877 {
4878 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4879 cl_optimization_restore (&global_options,
4880 TREE_OPTIMIZATION (old_optimize));
4881 }
4882}
4883
4884/* }}} */
4885/* {{{ ASM Output. */
4886
4887/* Implement TARGET_ASM_FILE_START.
4888
4889 Print assembler file header text. */
4890
4891static void
4892output_file_start (void)
4893{
4894 fprintf (asm_out_file, "\t.text\n");
4895 fprintf (asm_out_file, "\t.hsa_code_object_version 2,0\n");
4896 fprintf (asm_out_file, "\t.hsa_code_object_isa\n"); /* Autodetect. */
4897 fprintf (asm_out_file, "\t.section\t.AMDGPU.config\n");
4898 fprintf (asm_out_file, "\t.text\n");
4899}
4900
4901/* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
4902
4903 Print the initial definition of a function name.
4904
4905 For GCN kernel entry points this includes all the HSA meta-data, special
4906 alignment constraints that don't apply to regular functions, and magic
4907 comments that pass information to mkoffload. */
4908
4909void
4910gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
4911{
4912 int sgpr, vgpr;
4913 bool xnack_enabled = false;
4914 int extra_regs = 0;
4915
4916 if (cfun && cfun->machine && cfun->machine->normal_function)
4917 {
4918 fputs ("\t.type\t", file);
4919 assemble_name (file, name);
4920 fputs (",@function\n", file);
4921 assemble_name (file, name);
4922 fputs (":\n", file);
4923 return;
4924 }
4925
4926 /* Determine count of sgpr/vgpr registers by looking for last
4927 one used. */
4928 for (sgpr = 101; sgpr >= 0; sgpr--)
4929 if (df_regs_ever_live_p (FIRST_SGPR_REG + sgpr))
4930 break;
4931 sgpr++;
4932 for (vgpr = 255; vgpr >= 0; vgpr--)
4933 if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
4934 break;
4935 vgpr++;
4936
4937 if (xnack_enabled)
4938 extra_regs = 6;
4939 if (df_regs_ever_live_p (FLAT_SCRATCH_LO_REG)
4940 || df_regs_ever_live_p (FLAT_SCRATCH_HI_REG))
4941 extra_regs = 4;
4942 else if (df_regs_ever_live_p (VCC_LO_REG)
4943 || df_regs_ever_live_p (VCC_HI_REG))
4944 extra_regs = 2;
4945
4946 if (!leaf_function_p ())
4947 {
4948 /* We can't know how many registers function calls might use. */
87fdbe69
KCY
4949 if (vgpr < MAX_NORMAL_VGPR_COUNT)
4950 vgpr = MAX_NORMAL_VGPR_COUNT;
4951 if (sgpr + extra_regs < MAX_NORMAL_SGPR_COUNT)
4952 sgpr = MAX_NORMAL_SGPR_COUNT - extra_regs;
5326695a
AS
4953 }
4954
aa84ec84
AS
4955 /* GFX8 allocates SGPRs in blocks of 8.
4956 GFX9 uses blocks of 16. */
4957 int granulated_sgprs;
4958 if (TARGET_GCN3)
4959 granulated_sgprs = (sgpr + extra_regs + 7) / 8 - 1;
4960 else if (TARGET_GCN5)
4961 granulated_sgprs = 2 * ((sgpr + extra_regs + 15) / 16 - 1);
6349e4f0
TB
4962 else
4963 gcc_unreachable ();
aa84ec84 4964
5326695a
AS
4965 fputs ("\t.align\t256\n", file);
4966 fputs ("\t.type\t", file);
4967 assemble_name (file, name);
4968 fputs (",@function\n\t.amdgpu_hsa_kernel\t", file);
4969 assemble_name (file, name);
4970 fputs ("\n", file);
4971 assemble_name (file, name);
4972 fputs (":\n", file);
4973 fprintf (file, "\t.amd_kernel_code_t\n"
4974 "\t\tkernel_code_version_major = 1\n"
4975 "\t\tkernel_code_version_minor = 0\n" "\t\tmachine_kind = 1\n"
4976 /* "\t\tmachine_version_major = 8\n"
4977 "\t\tmachine_version_minor = 0\n"
4978 "\t\tmachine_version_stepping = 1\n" */
4979 "\t\tkernel_code_entry_byte_offset = 256\n"
4980 "\t\tkernel_code_prefetch_byte_size = 0\n"
4981 "\t\tmax_scratch_backing_memory_byte_size = 0\n"
4982 "\t\tcompute_pgm_rsrc1_vgprs = %i\n"
4983 "\t\tcompute_pgm_rsrc1_sgprs = %i\n"
4984 "\t\tcompute_pgm_rsrc1_priority = 0\n"
4985 "\t\tcompute_pgm_rsrc1_float_mode = 192\n"
4986 "\t\tcompute_pgm_rsrc1_priv = 0\n"
4987 "\t\tcompute_pgm_rsrc1_dx10_clamp = 1\n"
4988 "\t\tcompute_pgm_rsrc1_debug_mode = 0\n"
4989 "\t\tcompute_pgm_rsrc1_ieee_mode = 1\n"
4990 /* We enable scratch memory. */
4991 "\t\tcompute_pgm_rsrc2_scratch_en = 1\n"
4992 "\t\tcompute_pgm_rsrc2_user_sgpr = %i\n"
4993 "\t\tcompute_pgm_rsrc2_tgid_x_en = 1\n"
4994 "\t\tcompute_pgm_rsrc2_tgid_y_en = 0\n"
4995 "\t\tcompute_pgm_rsrc2_tgid_z_en = 0\n"
4996 "\t\tcompute_pgm_rsrc2_tg_size_en = 0\n"
4997 "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = 0\n"
4998 "\t\tcompute_pgm_rsrc2_excp_en_msb = 0\n"
4999 "\t\tcompute_pgm_rsrc2_lds_size = 0\n" /* Set at runtime. */
5000 "\t\tcompute_pgm_rsrc2_excp_en = 0\n",
5001 (vgpr - 1) / 4,
5002 /* Must match wavefront_sgpr_count */
aa84ec84 5003 granulated_sgprs,
5326695a
AS
5004 /* The total number of SGPR user data registers requested. This
5005 number must match the number of user data registers enabled. */
5006 cfun->machine->args.nsgprs);
5007 int reg = FIRST_SGPR_REG;
5008 for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
5009 {
5010 int reg_first = -1;
5011 int reg_last;
5012 if ((cfun->machine->args.requested & (1 << a))
5013 && (gcn_kernel_arg_types[a].fixed_regno < 0))
5014 {
5015 reg_first = reg;
5016 reg_last = (reg_first
5017 + (GET_MODE_SIZE (gcn_kernel_arg_types[a].mode)
5018 / UNITS_PER_WORD) - 1);
5019 reg = reg_last + 1;
5020 }
5021
5022 if (gcn_kernel_arg_types[a].header_pseudo)
5023 {
5024 fprintf (file, "\t\t%s = %i",
5025 gcn_kernel_arg_types[a].header_pseudo,
5026 (cfun->machine->args.requested & (1 << a)) != 0);
5027 if (reg_first != -1)
5028 {
5029 fprintf (file, " ; (");
5030 for (int i = reg_first; i <= reg_last; ++i)
5031 {
5032 if (i != reg_first)
5033 fprintf (file, ", ");
5034 fprintf (file, "%s", reg_names[i]);
5035 }
5036 fprintf (file, ")");
5037 }
5038 fprintf (file, "\n");
5039 }
5040 else if (gcn_kernel_arg_types[a].fixed_regno >= 0
5041 && cfun->machine->args.requested & (1 << a))
5042 fprintf (file, "\t\t; %s = %i (%s)\n",
5043 gcn_kernel_arg_types[a].name,
5044 (cfun->machine->args.requested & (1 << a)) != 0,
5045 reg_names[gcn_kernel_arg_types[a].fixed_regno]);
5046 }
5047 fprintf (file, "\t\tenable_vgpr_workitem_id = %i\n",
5048 (cfun->machine->args.requested & (1 << WORK_ITEM_ID_Z_ARG))
5049 ? 2
5050 : cfun->machine->args.requested & (1 << WORK_ITEM_ID_Y_ARG)
5051 ? 1 : 0);
5052 fprintf (file, "\t\tenable_ordered_append_gds = 0\n"
5053 "\t\tprivate_element_size = 1\n"
5054 "\t\tis_ptr64 = 1\n"
5055 "\t\tis_dynamic_callstack = 0\n"
5056 "\t\tis_debug_enabled = 0\n"
5057 "\t\tis_xnack_enabled = %i\n"
5058 "\t\tworkitem_private_segment_byte_size = %i\n"
5059 "\t\tworkgroup_group_segment_byte_size = %u\n"
5060 "\t\tgds_segment_byte_size = 0\n"
5061 "\t\tkernarg_segment_byte_size = %i\n"
5062 "\t\tworkgroup_fbarrier_count = 0\n"
5063 "\t\twavefront_sgpr_count = %i\n"
5064 "\t\tworkitem_vgpr_count = %i\n"
5065 "\t\treserved_vgpr_first = 0\n"
5066 "\t\treserved_vgpr_count = 0\n"
5067 "\t\treserved_sgpr_first = 0\n"
5068 "\t\treserved_sgpr_count = 0\n"
5069 "\t\tdebug_wavefront_private_segment_offset_sgpr = 0\n"
5070 "\t\tdebug_private_segment_buffer_sgpr = 0\n"
5071 "\t\tkernarg_segment_alignment = %i\n"
5072 "\t\tgroup_segment_alignment = 4\n"
5073 "\t\tprivate_segment_alignment = %i\n"
5074 "\t\twavefront_size = 6\n"
5075 "\t\tcall_convention = 0\n"
5076 "\t\truntime_loader_kernel_symbol = 0\n"
5077 "\t.end_amd_kernel_code_t\n", xnack_enabled,
5078 /* workitem_private_segment_bytes_size needs to be
5079 one 64th the wave-front stack size. */
5080 stack_size_opt / 64,
5081 LDS_SIZE, cfun->machine->kernarg_segment_byte_size,
5082 /* Number of scalar registers used by a wavefront. This
5083 includes the special SGPRs for VCC, Flat Scratch (Base,
5084 Size) and XNACK (for GFX8 (VI)+). It does not include the
5085 16 SGPR added if a trap handler is enabled. Must match
5086 compute_pgm_rsrc1.sgprs. */
5087 sgpr + extra_regs, vgpr,
5088 cfun->machine->kernarg_segment_alignment,
5089 crtl->stack_alignment_needed / 8);
5090
5091 /* This comment is read by mkoffload. */
5092 if (flag_openacc)
5093 fprintf (file, "\t;; OPENACC-DIMS: %d, %d, %d : %s\n",
5094 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_GANG),
5095 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_WORKER),
5096 oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
5097}
5098
5099/* Implement TARGET_ASM_SELECT_SECTION.
5100
5101 Return the section into which EXP should be placed. */
5102
5103static section *
5104gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
5105{
5106 if (TREE_TYPE (exp) != error_mark_node
5107 && TYPE_ADDR_SPACE (TREE_TYPE (exp)) == ADDR_SPACE_LDS)
5108 {
5109 if (!DECL_P (exp))
5110 return get_section (".lds_bss",
5111 SECTION_WRITE | SECTION_BSS | SECTION_DEBUG,
5112 NULL);
5113
5114 return get_named_section (exp, ".lds_bss", reloc);
5115 }
5116
5117 return default_elf_select_section (exp, reloc, align);
5118}
5119
5120/* Implement TARGET_ASM_FUNCTION_PROLOGUE.
5121
5122 Emits custom text into the assembler file at the head of each function. */
5123
5124static void
5125gcn_target_asm_function_prologue (FILE *file)
5126{
5127 machine_function *offsets = gcn_compute_frame_offsets ();
5128
5129 asm_fprintf (file, "\t; using %s addressing in function\n",
5130 offsets->use_flat_addressing ? "flat" : "global");
5131
5132 if (offsets->normal_function)
5133 {
5134 asm_fprintf (file, "\t; frame pointer needed: %s\n",
5135 offsets->need_frame_pointer ? "true" : "false");
5136 asm_fprintf (file, "\t; lr needs saving: %s\n",
5137 offsets->lr_needs_saving ? "true" : "false");
5138 asm_fprintf (file, "\t; outgoing args size: %wd\n",
5139 offsets->outgoing_args_size);
5140 asm_fprintf (file, "\t; pretend size: %wd\n", offsets->pretend_size);
5141 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5142 asm_fprintf (file, "\t; callee save size: %wd\n",
5143 offsets->callee_saves);
5144 }
5145 else
5146 {
5147 asm_fprintf (file, "\t; HSA kernel entry point\n");
5148 asm_fprintf (file, "\t; local vars size: %wd\n", offsets->local_vars);
5149 asm_fprintf (file, "\t; outgoing args size: %wd\n",
5150 offsets->outgoing_args_size);
5151
5152 /* Enable denorms. */
5153 asm_fprintf (file, "\n\t; Set MODE[FP_DENORM]: allow single and double"
5154 " input and output denorms\n");
5155 asm_fprintf (file, "\ts_setreg_imm32_b32\thwreg(1, 4, 4), 0xf\n\n");
5156 }
5157}
5158
5159/* Helper function for print_operand and print_operand_address.
5160
5161 Print a register as the assembler requires, according to mode and name. */
5162
5163static void
5164print_reg (FILE *file, rtx x)
5165{
5166 machine_mode mode = GET_MODE (x);
5167 if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
5168 || mode == HFmode || mode == SFmode
5169 || mode == V64SFmode || mode == V64SImode
5170 || mode == V64QImode || mode == V64HImode)
5171 fprintf (file, "%s", reg_names[REGNO (x)]);
5172 else if (mode == DImode || mode == V64DImode
5173 || mode == DFmode || mode == V64DFmode)
5174 {
5175 if (SGPR_REGNO_P (REGNO (x)))
5176 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5177 REGNO (x) - FIRST_SGPR_REG + 1);
5178 else if (VGPR_REGNO_P (REGNO (x)))
5179 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5180 REGNO (x) - FIRST_VGPR_REG + 1);
5181 else if (REGNO (x) == FLAT_SCRATCH_REG)
5182 fprintf (file, "flat_scratch");
5183 else if (REGNO (x) == EXEC_REG)
5184 fprintf (file, "exec");
5185 else if (REGNO (x) == VCC_LO_REG)
5186 fprintf (file, "vcc");
5187 else
5188 fprintf (file, "[%s:%s]",
5189 reg_names[REGNO (x)], reg_names[REGNO (x) + 1]);
5190 }
5191 else if (mode == TImode)
5192 {
5193 if (SGPR_REGNO_P (REGNO (x)))
5194 fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
5195 REGNO (x) - FIRST_SGPR_REG + 3);
5196 else if (VGPR_REGNO_P (REGNO (x)))
5197 fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
5198 REGNO (x) - FIRST_VGPR_REG + 3);
5199 else
5200 gcc_unreachable ();
5201 }
5202 else
5203 gcc_unreachable ();
5204}
5205
5206/* Implement TARGET_SECTION_TYPE_FLAGS.
5207
5208 Return a set of section attributes for use by TARGET_ASM_NAMED_SECTION. */
5209
5210static unsigned int
5211gcn_section_type_flags (tree decl, const char *name, int reloc)
5212{
5213 if (strcmp (name, ".lds_bss") == 0)
5214 return SECTION_WRITE | SECTION_BSS | SECTION_DEBUG;
5215
5216 return default_section_type_flags (decl, name, reloc);
5217}
5218
5219/* Helper function for gcn_asm_output_symbol_ref.
5220
5221 FIXME: If we want to have propagation blocks allocated separately and
5222 statically like this, it would be better done via symbol refs and the
5223 assembler/linker. This is a temporary hack. */
5224
5225static void
5226gcn_print_lds_decl (FILE *f, tree var)
5227{
5228 int *offset;
5229 machine_function *machfun = cfun->machine;
5230
5231 if ((offset = machfun->lds_allocs->get (var)))
5232 fprintf (f, "%u", (unsigned) *offset);
5233 else
5234 {
5235 unsigned HOST_WIDE_INT align = DECL_ALIGN_UNIT (var);
5236 tree type = TREE_TYPE (var);
5237 unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
5238 if (size > align && size > 4 && align < 8)
5239 align = 8;
5240
5241 machfun->lds_allocated = ((machfun->lds_allocated + align - 1)
5242 & ~(align - 1));
5243
5244 machfun->lds_allocs->put (var, machfun->lds_allocated);
5245 fprintf (f, "%u", machfun->lds_allocated);
5246 machfun->lds_allocated += size;
5247 if (machfun->lds_allocated > LDS_SIZE)
5248 error ("local data-share memory exhausted");
5249 }
5250}
5251
5252/* Implement ASM_OUTPUT_SYMBOL_REF via gcn-hsa.h. */
5253
5254void
5255gcn_asm_output_symbol_ref (FILE *file, rtx x)
5256{
5257 tree decl;
9200b53a
JB
5258 if (cfun
5259 && (decl = SYMBOL_REF_DECL (x)) != 0
5326695a
AS
5260 && TREE_CODE (decl) == VAR_DECL
5261 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5262 {
5263 /* LDS symbols (emitted using this hook) are only used at present
5264 to propagate worker values from an active thread to neutered
5265 threads. Use the same offset for each such block, but don't
5266 use zero because null pointers are used to identify the active
5267 thread in GOACC_single_copy_start calls. */
5268 gcn_print_lds_decl (file, decl);
5269 }
5270 else
5271 {
5272 assemble_name (file, XSTR (x, 0));
5273 /* FIXME: See above -- this condition is unreachable. */
9200b53a
JB
5274 if (cfun
5275 && (decl = SYMBOL_REF_DECL (x)) != 0
5326695a
AS
5276 && TREE_CODE (decl) == VAR_DECL
5277 && AS_LDS_P (TYPE_ADDR_SPACE (TREE_TYPE (decl))))
5278 fputs ("@abs32", file);
5279 }
5280}
5281
5282/* Implement TARGET_CONSTANT_ALIGNMENT.
5283
5284 Returns the alignment in bits of a constant that is being placed in memory.
5285 CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
5286 would ordinarily have. */
5287
5288static HOST_WIDE_INT
5289gcn_constant_alignment (const_tree ARG_UNUSED (constant),
5290 HOST_WIDE_INT basic_align)
5291{
5292 return basic_align > 128 ? basic_align : 128;
5293}
5294
5295/* Implement PRINT_OPERAND_ADDRESS via gcn.h. */
5296
5297void
5298print_operand_address (FILE *file, rtx mem)
5299{
5300 gcc_assert (MEM_P (mem));
5301
5302 rtx reg;
5303 rtx offset;
5304 addr_space_t as = MEM_ADDR_SPACE (mem);
5305 rtx addr = XEXP (mem, 0);
5306 gcc_assert (REG_P (addr) || GET_CODE (addr) == PLUS);
5307
5308 if (AS_SCRATCH_P (as))
5309 switch (GET_CODE (addr))
5310 {
5311 case REG:
5312 print_reg (file, addr);
5313 break;
5314
5315 case PLUS:
5316 reg = XEXP (addr, 0);
5317 offset = XEXP (addr, 1);
5318 print_reg (file, reg);
5319 if (GET_CODE (offset) == CONST_INT)
5320 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5321 else
5322 abort ();
5323 break;
5324
5325 default:
5326 debug_rtx (addr);
5327 abort ();
5328 }
5329 else if (AS_ANY_FLAT_P (as))
5330 {
5331 if (GET_CODE (addr) == REG)
5332 print_reg (file, addr);
5333 else
5334 {
5335 gcc_assert (TARGET_GCN5_PLUS);
5336 print_reg (file, XEXP (addr, 0));
5337 }
5338 }
5339 else if (AS_GLOBAL_P (as))
5340 {
5341 gcc_assert (TARGET_GCN5_PLUS);
5342
5343 rtx base = addr;
5344 rtx vgpr_offset = NULL_RTX;
5345
5346 if (GET_CODE (addr) == PLUS)
5347 {
5348 base = XEXP (addr, 0);
5349
5350 if (GET_CODE (base) == PLUS)
5351 {
5352 /* (SGPR + VGPR) + CONST */
5353 vgpr_offset = XEXP (base, 1);
5354 base = XEXP (base, 0);
5355 }
5356 else
5357 {
5358 rtx offset = XEXP (addr, 1);
5359
5360 if (REG_P (offset))
5361 /* SGPR + VGPR */
5362 vgpr_offset = offset;
5363 else if (CONST_INT_P (offset))
5364 /* VGPR + CONST or SGPR + CONST */
5365 ;
5366 else
5367 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5368 }
5369 }
5370
5371 if (REG_P (base))
5372 {
5373 if (VGPR_REGNO_P (REGNO (base)))
5374 print_reg (file, base);
5375 else if (SGPR_REGNO_P (REGNO (base)))
5376 {
5377 /* The assembler requires a 64-bit VGPR pair here, even though
5378 the offset should be only 32-bit. */
5379 if (vgpr_offset == NULL_RTX)
f6e20012
KCY
5380 /* In this case, the vector offset is zero, so we use the first
5381 lane of v1, which is initialized to zero. */
5382 fprintf (file, "v[1:2]");
5326695a
AS
5383 else if (REG_P (vgpr_offset)
5384 && VGPR_REGNO_P (REGNO (vgpr_offset)))
5385 {
5386 fprintf (file, "v[%d:%d]",
5387 REGNO (vgpr_offset) - FIRST_VGPR_REG,
5388 REGNO (vgpr_offset) - FIRST_VGPR_REG + 1);
5389 }
5390 else
5391 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5392 }
5393 }
5394 else
5395 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5396 }
5397 else if (AS_ANY_DS_P (as))
5398 switch (GET_CODE (addr))
5399 {
5400 case REG:
5401 print_reg (file, addr);
5402 break;
5403
5404 case PLUS:
5405 reg = XEXP (addr, 0);
5406 print_reg (file, reg);
5407 break;
5408
5409 default:
5410 debug_rtx (addr);
5411 abort ();
5412 }
5413 else
5414 switch (GET_CODE (addr))
5415 {
5416 case REG:
5417 print_reg (file, addr);
5418 fprintf (file, ", 0");
5419 break;
5420
5421 case PLUS:
5422 reg = XEXP (addr, 0);
5423 offset = XEXP (addr, 1);
5424 print_reg (file, reg);
5425 fprintf (file, ", ");
5426 if (GET_CODE (offset) == REG)
5427 print_reg (file, reg);
5428 else if (GET_CODE (offset) == CONST_INT)
5429 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
5430 else
5431 abort ();
5432 break;
5433
5434 default:
5435 debug_rtx (addr);
5436 abort ();
5437 }
5438}
5439
5440/* Implement PRINT_OPERAND via gcn.h.
5441
5442 b - print operand size as untyped operand (b8/b16/b32/b64)
5443 B - print operand size as SI/DI untyped operand (b32/b32/b32/b64)
5444 i - print operand size as untyped operand (i16/b32/i64)
5445 u - print operand size as untyped operand (u16/u32/u64)
5446 o - print operand size as memory access size for loads
5447 (ubyte/ushort/dword/dwordx2/wordx3/dwordx4)
5448 s - print operand size as memory access size for stores
5449 (byte/short/dword/dwordx2/wordx3/dwordx4)
5450 C - print conditional code for s_cbranch (_sccz/_sccnz/_vccz/_vccnz...)
5451 c - print inverse conditional code for s_cbranch
5452 D - print conditional code for s_cmp (eq_u64/lg_u64...)
5453 E - print conditional code for v_cmp (eq_u64/ne_u64...)
5454 A - print address in formatting suitable for given address space.
5455 O - print offset:n for data share operations.
5456 ^ - print "_co" suffix for GCN5 mnemonics
5457 g - print "glc", if appropriate for given MEM
5458 */
5459
5460void
5461print_operand (FILE *file, rtx x, int code)
5462{
5463 int xcode = x ? GET_CODE (x) : 0;
5464 bool invert = false;
5465 switch (code)
5466 {
5467 /* Instructions have the following suffixes.
5468 If there are two suffixes, the first is the destination type,
5469 and the second is the source type.
5470
5471 B32 Bitfield (untyped data) 32-bit
5472 B64 Bitfield (untyped data) 64-bit
5473 F16 floating-point 16-bit
5474 F32 floating-point 32-bit (IEEE 754 single-precision float)
5475 F64 floating-point 64-bit (IEEE 754 double-precision float)
5476 I16 signed 32-bit integer
5477 I32 signed 32-bit integer
5478 I64 signed 64-bit integer
5479 U16 unsigned 32-bit integer
5480 U32 unsigned 32-bit integer
5481 U64 unsigned 64-bit integer */
5482
5483 /* Print operand size as untyped suffix. */
5484 case 'b':
5485 {
5486 const char *s = "";
5487 machine_mode mode = GET_MODE (x);
5488 if (VECTOR_MODE_P (mode))
5489 mode = GET_MODE_INNER (mode);
5490 switch (GET_MODE_SIZE (mode))
5491 {
5492 case 1:
5493 s = "_b8";
5494 break;
5495 case 2:
5496 s = "_b16";
5497 break;
5498 case 4:
5499 s = "_b32";
5500 break;
5501 case 8:
5502 s = "_b64";
5503 break;
5504 default:
5505 output_operand_lossage ("invalid operand %%xn code");
5506 return;
5507 }
5508 fputs (s, file);
5509 }
5510 return;
5511 case 'B':
5512 {
5513 const char *s = "";
5514 machine_mode mode = GET_MODE (x);
5515 if (VECTOR_MODE_P (mode))
5516 mode = GET_MODE_INNER (mode);
5517 switch (GET_MODE_SIZE (mode))
5518 {
5519 case 1:
5520 case 2:
5521 case 4:
5522 s = "_b32";
5523 break;
5524 case 8:
5525 s = "_b64";
5526 break;
5527 default:
5528 output_operand_lossage ("invalid operand %%xn code");
5529 return;
5530 }
5531 fputs (s, file);
5532 }
5533 return;
5534 case 'e':
5535 fputs ("sext(", file);
5536 print_operand (file, x, 0);
5537 fputs (")", file);
5538 return;
5539 case 'i':
5540 case 'u':
5541 {
5542 bool signed_p = code == 'i';
5543 const char *s = "";
5544 machine_mode mode = GET_MODE (x);
5545 if (VECTOR_MODE_P (mode))
5546 mode = GET_MODE_INNER (mode);
5547 if (mode == VOIDmode)
5548 switch (GET_CODE (x))
5549 {
5550 case CONST_INT:
5551 s = signed_p ? "_i32" : "_u32";
5552 break;
5553 case CONST_DOUBLE:
5554 s = "_f64";
5555 break;
5556 default:
5557 output_operand_lossage ("invalid operand %%xn code");
5558 return;
5559 }
5560 else if (FLOAT_MODE_P (mode))
5561 switch (GET_MODE_SIZE (mode))
5562 {
5563 case 2:
5564 s = "_f16";
5565 break;
5566 case 4:
5567 s = "_f32";
5568 break;
5569 case 8:
5570 s = "_f64";
5571 break;
5572 default:
5573 output_operand_lossage ("invalid operand %%xn code");
5574 return;
5575 }
5576 else
5577 switch (GET_MODE_SIZE (mode))
5578 {
5579 case 1:
5580 s = signed_p ? "_i8" : "_u8";
5581 break;
5582 case 2:
5583 s = signed_p ? "_i16" : "_u16";
5584 break;
5585 case 4:
5586 s = signed_p ? "_i32" : "_u32";
5587 break;
5588 case 8:
5589 s = signed_p ? "_i64" : "_u64";
5590 break;
5591 default:
5592 output_operand_lossage ("invalid operand %%xn code");
5593 return;
5594 }
5595 fputs (s, file);
5596 }
5597 return;
5598 /* Print operand size as untyped suffix. */
5599 case 'o':
5600 {
5601 const char *s = 0;
5602 switch (GET_MODE_SIZE (GET_MODE (x)))
5603 {
5604 case 1:
5605 s = "_ubyte";
5606 break;
5607 case 2:
5608 s = "_ushort";
5609 break;
5610 /* The following are full-vector variants. */
5611 case 64:
5612 s = "_ubyte";
5613 break;
5614 case 128:
5615 s = "_ushort";
5616 break;
5617 }
5618
5619 if (s)
5620 {
5621 fputs (s, file);
5622 return;
5623 }
5624
5625 /* Fall-through - the other cases for 'o' are the same as for 's'. */
5626 gcc_fallthrough();
5627 }
5628 case 's':
5629 {
5630 const char *s = "";
5631 switch (GET_MODE_SIZE (GET_MODE (x)))
5632 {
5633 case 1:
5634 s = "_byte";
5635 break;
5636 case 2:
5637 s = "_short";
5638 break;
5639 case 4:
5640 s = "_dword";
5641 break;
5642 case 8:
5643 s = "_dwordx2";
5644 break;
5645 case 12:
5646 s = "_dwordx3";
5647 break;
5648 case 16:
5649 s = "_dwordx4";
5650 break;
5651 case 32:
5652 s = "_dwordx8";
5653 break;
5654 case 64:
5655 s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
5656 break;
5657 /* The following are full-vector variants. */
5658 case 128:
5659 s = "_short";
5660 break;
5661 case 256:
5662 s = "_dword";
5663 break;
5664 case 512:
5665 s = "_dwordx2";
5666 break;
5667 default:
5668 output_operand_lossage ("invalid operand %%xn code");
5669 return;
5670 }
5671 fputs (s, file);
5672 }
5673 return;
5674 case 'A':
5675 if (xcode != MEM)
5676 {
5677 output_operand_lossage ("invalid %%xn code");
5678 return;
5679 }
5680 print_operand_address (file, x);
5681 return;
5682 case 'O':
5683 {
5684 if (xcode != MEM)
5685 {
5686 output_operand_lossage ("invalid %%xn code");
5687 return;
5688 }
5689 if (AS_GDS_P (MEM_ADDR_SPACE (x)))
5690 fprintf (file, " gds");
5691
5692 rtx x0 = XEXP (x, 0);
5693 if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
5694 {
5695 gcc_assert (TARGET_GCN5_PLUS);
5696
5697 fprintf (file, ", ");
5698
5699 rtx base = x0;
5700 rtx const_offset = NULL_RTX;
5701
5702 if (GET_CODE (base) == PLUS)
5703 {
5704 rtx offset = XEXP (x0, 1);
5705 base = XEXP (x0, 0);
5706
5707 if (GET_CODE (base) == PLUS)
5708 /* (SGPR + VGPR) + CONST */
5709 /* Ignore the VGPR offset for this operand. */
5710 base = XEXP (base, 0);
5711
5712 if (CONST_INT_P (offset))
5713 const_offset = XEXP (x0, 1);
5714 else if (REG_P (offset))
5715 /* SGPR + VGPR */
5716 /* Ignore the VGPR offset for this operand. */
5717 ;
5718 else
5719 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5720 }
5721
5722 if (REG_P (base))
5723 {
5724 if (VGPR_REGNO_P (REGNO (base)))
5725 /* The VGPR address is specified in the %A operand. */
5726 fprintf (file, "off");
5727 else if (SGPR_REGNO_P (REGNO (base)))
5728 print_reg (file, base);
5729 else
5730 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5731 }
5732 else
5733 output_operand_lossage ("bad ADDR_SPACE_GLOBAL address");
5734
5735 if (const_offset != NULL_RTX)
5736 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC,
5737 INTVAL (const_offset));
5738
5739 return;
5740 }
5741
5742 if (GET_CODE (x0) == REG)
5743 return;
5744 if (GET_CODE (x0) != PLUS)
5745 {
5746 output_operand_lossage ("invalid %%xn code");
5747 return;
5748 }
5749 rtx val = XEXP (x0, 1);
5750 if (GET_CODE (val) == CONST_VECTOR)
5751 val = CONST_VECTOR_ELT (val, 0);
5752 if (GET_CODE (val) != CONST_INT)
5753 {
5754 output_operand_lossage ("invalid %%xn code");
5755 return;
5756 }
5757 fprintf (file, " offset:" HOST_WIDE_INT_PRINT_DEC, INTVAL (val));
5758
5759 }
5760 return;
5761 case 'c':
5762 invert = true;
5763 /* Fall through. */
5764 case 'C':
5765 {
5766 const char *s;
5767 bool num = false;
5768 if ((xcode != EQ && xcode != NE) || !REG_P (XEXP (x, 0)))
5769 {
5770 output_operand_lossage ("invalid %%xn code");
5771 return;
5772 }
5773 switch (REGNO (XEXP (x, 0)))
5774 {
5775 case VCC_REG:
5776 case VCCZ_REG:
5777 s = "_vcc";
5778 break;
5779 case SCC_REG:
5780 /* For some reason llvm-mc insists on scc0 instead of sccz. */
5781 num = true;
5782 s = "_scc";
5783 break;
5784 case EXECZ_REG:
5785 s = "_exec";
5786 break;
5787 default:
5788 output_operand_lossage ("invalid %%xn code");
5789 return;
5790 }
5791 fputs (s, file);
5792 if (xcode == (invert ? NE : EQ))
5793 fputc (num ? '0' : 'z', file);
5794 else
5795 fputs (num ? "1" : "nz", file);
5796 return;
5797 }
5798 case 'D':
5799 {
5800 const char *s;
5801 bool cmp_signed = false;
5802 switch (xcode)
5803 {
5804 case EQ:
5805 s = "_eq_";
5806 break;
5807 case NE:
5808 s = "_lg_";
5809 break;
5810 case LT:
5811 s = "_lt_";
5812 cmp_signed = true;
5813 break;
5814 case LE:
5815 s = "_le_";
5816 cmp_signed = true;
5817 break;
5818 case GT:
5819 s = "_gt_";
5820 cmp_signed = true;
5821 break;
5822 case GE:
5823 s = "_ge_";
5824 cmp_signed = true;
5825 break;
5826 case LTU:
5827 s = "_lt_";
5828 break;
5829 case LEU:
5830 s = "_le_";
5831 break;
5832 case GTU:
5833 s = "_gt_";
5834 break;
5835 case GEU:
5836 s = "_ge_";
5837 break;
5838 default:
5839 output_operand_lossage ("invalid %%xn code");
5840 return;
5841 }
5842 fputs (s, file);
5843 fputc (cmp_signed ? 'i' : 'u', file);
5844
5845 machine_mode mode = GET_MODE (XEXP (x, 0));
5846
5847 if (mode == VOIDmode)
5848 mode = GET_MODE (XEXP (x, 1));
5849
5850 /* If both sides are constants, then assume the instruction is in
5851 SImode since s_cmp can only do integer compares. */
5852 if (mode == VOIDmode)
5853 mode = SImode;
5854
5855 switch (GET_MODE_SIZE (mode))
5856 {
5857 case 4:
5858 s = "32";
5859 break;
5860 case 8:
5861 s = "64";
5862 break;
5863 default:
5864 output_operand_lossage ("invalid operand %%xn code");
5865 return;
5866 }
5867 fputs (s, file);
5868 return;
5869 }
5870 case 'E':
5871 {
5872 const char *s;
5873 bool cmp_signed = false;
5874 machine_mode mode = GET_MODE (XEXP (x, 0));
5875
5876 if (mode == VOIDmode)
5877 mode = GET_MODE (XEXP (x, 1));
5878
5879 /* If both sides are constants, assume the instruction is in SFmode
5880 if either operand is floating point, otherwise assume SImode. */
5881 if (mode == VOIDmode)
5882 {
5883 if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
5884 || GET_CODE (XEXP (x, 1)) == CONST_DOUBLE)
5885 mode = SFmode;
5886 else
5887 mode = SImode;
5888 }
5889
5890 /* Use the same format code for vector comparisons. */
5891 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5892 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
5893 mode = GET_MODE_INNER (mode);
5894
5895 bool float_p = GET_MODE_CLASS (mode) == MODE_FLOAT;
5896
5897 switch (xcode)
5898 {
5899 case EQ:
5900 s = "_eq_";
5901 break;
5902 case NE:
5903 s = float_p ? "_neq_" : "_ne_";
5904 break;
5905 case LT:
5906 s = "_lt_";
5907 cmp_signed = true;
5908 break;
5909 case LE:
5910 s = "_le_";
5911 cmp_signed = true;
5912 break;
5913 case GT:
5914 s = "_gt_";
5915 cmp_signed = true;
5916 break;
5917 case GE:
5918 s = "_ge_";
5919 cmp_signed = true;
5920 break;
5921 case LTU:
5922 s = "_lt_";
5923 break;
5924 case LEU:
5925 s = "_le_";
5926 break;
5927 case GTU:
5928 s = "_gt_";
5929 break;
5930 case GEU:
5931 s = "_ge_";
5932 break;
5933 case ORDERED:
5934 s = "_o_";
5935 break;
5936 case UNORDERED:
5937 s = "_u_";
5938 break;
59e6d62b
AS
5939 case LTGT:
5940 s = "_lg_";
5941 break;
5326695a
AS
5942 default:
5943 output_operand_lossage ("invalid %%xn code");
5944 return;
5945 }
5946 fputs (s, file);
5947 fputc (float_p ? 'f' : cmp_signed ? 'i' : 'u', file);
5948
5949 switch (GET_MODE_SIZE (mode))
5950 {
5951 case 1:
0e159efc
AS
5952 output_operand_lossage ("operand %%xn code invalid for QImode");
5953 return;
5326695a 5954 case 2:
0e159efc 5955 s = "16";
5326695a
AS
5956 break;
5957 case 4:
5958 s = "32";
5959 break;
5960 case 8:
5961 s = "64";
5962 break;
5963 default:
5964 output_operand_lossage ("invalid operand %%xn code");
5965 return;
5966 }
5967 fputs (s, file);
5968 return;
5969 }
5970 case 'L':
5971 print_operand (file, gcn_operand_part (GET_MODE (x), x, 0), 0);
5972 return;
5973 case 'H':
5974 print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
5975 return;
5976 case 'R':
5977 /* Print a scalar register number as an integer. Temporary hack. */
5978 gcc_assert (REG_P (x));
5979 fprintf (file, "%u", (int) REGNO (x));
5980 return;
5981 case 'V':
5982 /* Print a vector register number as an integer. Temporary hack. */
5983 gcc_assert (REG_P (x));
5984 fprintf (file, "%u", (int) REGNO (x) - FIRST_VGPR_REG);
5985 return;
5986 case 0:
5987 if (xcode == REG)
5988 print_reg (file, x);
5989 else if (xcode == MEM)
5990 output_address (GET_MODE (x), x);
5991 else if (xcode == CONST_INT)
5992 fprintf (file, "%i", (int) INTVAL (x));
5993 else if (xcode == CONST_VECTOR)
5994 print_operand (file, CONST_VECTOR_ELT (x, 0), code);
5995 else if (xcode == CONST_DOUBLE)
5996 {
5997 const char *str;
5998 switch (gcn_inline_fp_constant_p (x, false))
5999 {
6000 case 240:
6001 str = "0.5";
6002 break;
6003 case 241:
6004 str = "-0.5";
6005 break;
6006 case 242:
6007 str = "1.0";
6008 break;
6009 case 243:
6010 str = "-1.0";
6011 break;
6012 case 244:
6013 str = "2.0";
6014 break;
6015 case 245:
6016 str = "-2.0";
6017 break;
6018 case 246:
6019 str = "4.0";
6020 break;
6021 case 247:
6022 str = "-4.0";
6023 break;
6024 case 248:
6025 str = "1/pi";
6026 break;
6027 default:
6028 rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode
6029 ? DImode : SImode,
6030 x, GET_MODE (x), 0);
6031 if (x)
6032 print_operand (file, ix, code);
6033 else
a94d5170 6034 output_operand_lossage ("invalid fp constant");
5326695a
AS
6035 return;
6036 break;
6037 }
6038 fprintf (file, str);
6039 return;
6040 }
6041 else
6042 output_addr_const (file, x);
6043 return;
6044 case '^':
6045 if (TARGET_GCN5_PLUS)
6046 fputs ("_co", file);
6047 return;
6048 case 'g':
6049 gcc_assert (xcode == MEM);
6050 if (MEM_VOLATILE_P (x))
6051 fputs (" glc", file);
6052 return;
6053 default:
6054 output_operand_lossage ("invalid %%xn code");
6055 }
6056 gcc_unreachable ();
6057}
6058
6059/* }}} */
6060/* {{{ TARGET hook overrides. */
6061
6062#undef TARGET_ADDR_SPACE_ADDRESS_MODE
6063#define TARGET_ADDR_SPACE_ADDRESS_MODE gcn_addr_space_address_mode
6064#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
6065#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
6066 gcn_addr_space_legitimate_address_p
6067#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
6068#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS gcn_addr_space_legitimize_address
6069#undef TARGET_ADDR_SPACE_POINTER_MODE
6070#define TARGET_ADDR_SPACE_POINTER_MODE gcn_addr_space_pointer_mode
6071#undef TARGET_ADDR_SPACE_SUBSET_P
6072#define TARGET_ADDR_SPACE_SUBSET_P gcn_addr_space_subset_p
6073#undef TARGET_ADDR_SPACE_CONVERT
6074#define TARGET_ADDR_SPACE_CONVERT gcn_addr_space_convert
6075#undef TARGET_ARG_PARTIAL_BYTES
6076#define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
6077#undef TARGET_ASM_ALIGNED_DI_OP
6078#define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
5326695a
AS
6079#undef TARGET_ASM_FILE_START
6080#define TARGET_ASM_FILE_START output_file_start
6081#undef TARGET_ASM_FUNCTION_PROLOGUE
6082#define TARGET_ASM_FUNCTION_PROLOGUE gcn_target_asm_function_prologue
6083#undef TARGET_ASM_SELECT_SECTION
6084#define TARGET_ASM_SELECT_SECTION gcn_asm_select_section
6085#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
6086#define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
6087#undef TARGET_ATTRIBUTE_TABLE
6088#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
6089#undef TARGET_BUILTIN_DECL
6090#define TARGET_BUILTIN_DECL gcn_builtin_decl
6091#undef TARGET_CAN_CHANGE_MODE_CLASS
6092#define TARGET_CAN_CHANGE_MODE_CLASS gcn_can_change_mode_class
6093#undef TARGET_CAN_ELIMINATE
6094#define TARGET_CAN_ELIMINATE gcn_can_eliminate_p
6095#undef TARGET_CANNOT_COPY_INSN_P
6096#define TARGET_CANNOT_COPY_INSN_P gcn_cannot_copy_insn_p
6097#undef TARGET_CLASS_LIKELY_SPILLED_P
6098#define TARGET_CLASS_LIKELY_SPILLED_P gcn_class_likely_spilled_p
6099#undef TARGET_CLASS_MAX_NREGS
6100#define TARGET_CLASS_MAX_NREGS gcn_class_max_nregs
6101#undef TARGET_CONDITIONAL_REGISTER_USAGE
6102#define TARGET_CONDITIONAL_REGISTER_USAGE gcn_conditional_register_usage
6103#undef TARGET_CONSTANT_ALIGNMENT
6104#define TARGET_CONSTANT_ALIGNMENT gcn_constant_alignment
6105#undef TARGET_DEBUG_UNWIND_INFO
6106#define TARGET_DEBUG_UNWIND_INFO gcn_debug_unwind_info
76d46331
KCY
6107#undef TARGET_EMUTLS_VAR_INIT
6108#define TARGET_EMUTLS_VAR_INIT gcn_emutls_var_init
5326695a
AS
6109#undef TARGET_EXPAND_BUILTIN
6110#define TARGET_EXPAND_BUILTIN gcn_expand_builtin
6111#undef TARGET_FUNCTION_ARG
6112#undef TARGET_FUNCTION_ARG_ADVANCE
6113#define TARGET_FUNCTION_ARG_ADVANCE gcn_function_arg_advance
6114#define TARGET_FUNCTION_ARG gcn_function_arg
6115#undef TARGET_FUNCTION_VALUE
6116#define TARGET_FUNCTION_VALUE gcn_function_value
6117#undef TARGET_FUNCTION_VALUE_REGNO_P
6118#define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
6119#undef TARGET_GIMPLIFY_VA_ARG_EXPR
6120#define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
955cd057
TB
6121#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
6122#define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
5326695a
AS
6123#undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD
6124#define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
6125 gcn_goacc_adjust_propagation_record
6126#undef TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
6127#define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
6128#undef TARGET_GOACC_FORK_JOIN
6129#define TARGET_GOACC_FORK_JOIN gcn_fork_join
6130#undef TARGET_GOACC_REDUCTION
6131#define TARGET_GOACC_REDUCTION gcn_goacc_reduction
6132#undef TARGET_GOACC_VALIDATE_DIMS
6133#define TARGET_GOACC_VALIDATE_DIMS gcn_goacc_validate_dims
5326695a
AS
6134#undef TARGET_HARD_REGNO_MODE_OK
6135#define TARGET_HARD_REGNO_MODE_OK gcn_hard_regno_mode_ok
6136#undef TARGET_HARD_REGNO_NREGS
6137#define TARGET_HARD_REGNO_NREGS gcn_hard_regno_nregs
6138#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
6139#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
6140#undef TARGET_INIT_BUILTINS
6141#define TARGET_INIT_BUILTINS gcn_init_builtins
6142#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
6143#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
6144 gcn_ira_change_pseudo_allocno_class
6145#undef TARGET_LEGITIMATE_CONSTANT_P
6146#define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
6147#undef TARGET_LRA_P
6148#define TARGET_LRA_P hook_bool_void_true
6149#undef TARGET_MACHINE_DEPENDENT_REORG
6150#define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
6151#undef TARGET_MEMORY_MOVE_COST
6152#define TARGET_MEMORY_MOVE_COST gcn_memory_move_cost
6153#undef TARGET_MODES_TIEABLE_P
6154#define TARGET_MODES_TIEABLE_P gcn_modes_tieable_p
6155#undef TARGET_OPTION_OVERRIDE
6156#define TARGET_OPTION_OVERRIDE gcn_option_override
6157#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
6158#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED \
6159 gcn_pretend_outgoing_varargs_named
6160#undef TARGET_PROMOTE_FUNCTION_MODE
6161#define TARGET_PROMOTE_FUNCTION_MODE gcn_promote_function_mode
6162#undef TARGET_REGISTER_MOVE_COST
6163#define TARGET_REGISTER_MOVE_COST gcn_register_move_cost
6164#undef TARGET_RETURN_IN_MEMORY
6165#define TARGET_RETURN_IN_MEMORY gcn_return_in_memory
6166#undef TARGET_RTX_COSTS
6167#define TARGET_RTX_COSTS gcn_rtx_costs
6168#undef TARGET_SECONDARY_RELOAD
6169#define TARGET_SECONDARY_RELOAD gcn_secondary_reload
6170#undef TARGET_SECTION_TYPE_FLAGS
6171#define TARGET_SECTION_TYPE_FLAGS gcn_section_type_flags
6172#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
6173#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
6174 gcn_small_register_classes_for_mode_p
6175#undef TARGET_SPILL_CLASS
6176#define TARGET_SPILL_CLASS gcn_spill_class
6177#undef TARGET_STRICT_ARGUMENT_NAMING
6178#define TARGET_STRICT_ARGUMENT_NAMING gcn_strict_argument_naming
6179#undef TARGET_TRAMPOLINE_INIT
6180#define TARGET_TRAMPOLINE_INIT gcn_trampoline_init
6181#undef TARGET_TRULY_NOOP_TRUNCATION
6182#define TARGET_TRULY_NOOP_TRUNCATION gcn_truly_noop_truncation
6183#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
6184#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST gcn_vectorization_cost
6185#undef TARGET_VECTORIZE_GET_MASK_MODE
6186#define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
6187#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
6188#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
6189#undef TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
6190#define TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT \
6191 gcn_preferred_vector_alignment
2b99bed8
AS
6192#undef TARGET_VECTORIZE_RELATED_MODE
6193#define TARGET_VECTORIZE_RELATED_MODE gcn_related_vector_mode
5326695a
AS
6194#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
6195#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
6196 gcn_vectorize_support_vector_misalignment
6197#undef TARGET_VECTORIZE_VEC_PERM_CONST
6198#define TARGET_VECTORIZE_VEC_PERM_CONST gcn_vectorize_vec_perm_const
6199#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
6200#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
6201 gcn_vector_alignment_reachable
6202#undef TARGET_VECTOR_MODE_SUPPORTED_P
6203#define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
6204
6205struct gcc_target targetm = TARGET_INITIALIZER;
6206
6207#include "gt-gcn.h"
6208/* }}} */